From 74f1ca7158e6418273ed7c02561bca6b66ad42ad Mon Sep 17 00:00:00 2001 From: ChopinF Date: Wed, 17 Sep 2025 11:53:11 +0000 Subject: [PATCH] html feature: display or save fetched html from the url --- main.py | 20 +++++++++++++++----- src/torbot/modules/info.py | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index d8c2ad04..bf9207c9 100755 --- a/main.py +++ b/main.py @@ -10,7 +10,7 @@ from torbot.modules.api import get_ip from torbot.modules.color import color from torbot.modules.updater import check_version -from torbot.modules.info import execute_all +from torbot.modules.info import execute_all, fetch_html from torbot.modules.linktree import LinkTree @@ -35,9 +35,7 @@ def print_header(version: str) -> None: / __/ / / / /_/ / __ \/ __ \/ / / /_/ /_/ / _, _/ /_/ / /_/ / / \__/\____/_/ |_/_____/\____/_/ v{VERSION} - """.format( - VERSION=version - ) + """.format(VERSION=version) banner = color(banner, "red") title = r""" @@ -101,6 +99,11 @@ def run(arg_parser: argparse.ArgumentParser, version: str) -> None: elif args.save == "json": tree.saveJSON() + if args.html == "display": + fetch_html(client, args.url, tree) + elif args.html == "save": + fetch_html(client, args.url, tree, save_html=True) + # always print something, table is the default if args.visualize == "table" or not args.visualize: tree.showTable() @@ -158,6 +161,11 @@ def set_arguments() -> argparse.ArgumentParser: action="store_true", help="Executes HTTP requests without using SOCKS5 proxy", ) + parser.add_argument( + "--html", + choices=["save", "display"], + help="Saves / Displays the html of the onion link", + ) return parser @@ -165,7 +173,9 @@ def set_arguments() -> argparse.ArgumentParser: if __name__ == "__main__": try: arg_parser = set_arguments() - config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pyproject.toml") + config_file_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "pyproject.toml" + ) try: with open(config_file_path, "r") as f: data = toml.load(f) diff --git a/src/torbot/modules/info.py b/src/torbot/modules/info.py index d5c2a17d..3f745cd1 100644 --- a/src/torbot/modules/info.py +++ b/src/torbot/modules/info.py @@ -2,6 +2,7 @@ Module that contains methods for collecting all relevant data from links, and saving data to file. """ + import re import httpx import logging @@ -10,6 +11,8 @@ from bs4 import BeautifulSoup from termcolor import cprint +from torbot.modules.linktree import LinkTree + keys = set() # high entropy strings, prolly secret keys files = set() # pdf, css, png etc. @@ -86,6 +89,24 @@ def execute_all( # display_headers(response) +def fetch_html( + client: httpx.Client, link: str, tree: LinkTree, save_html: bool = False +) -> None: + resp = client.get(url=link) + soup = BeautifulSoup(resp.text, "html.parser") + + if save_html is False: + print(f""" + HTML file + {soup} + """) + else: # save_html is True + file_name = tree._get_tree_file_name() + print(f"SAVED to {file_name}.html\n\n") + with open(f"{file_name}.html", "w+") as f: + f.write(str(soup)) + + def display_headers(response): """Print all headers in response object.