From cfa4097df90407ad763365755ba1ae8e1bdb9be0 Mon Sep 17 00:00:00 2001 From: Siddharth Dushantha Date: Wed, 26 Jun 2024 21:57:11 +0200 Subject: [PATCH 01/23] removed support for tor --- sherlock/sherlock.py | 58 +++----------------------------------------- 1 file changed, 4 insertions(+), 54 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index db8e9c2cb..170ea32ab 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -30,7 +30,6 @@ ) from requests_futures.sessions import FuturesSession # noqa: E402 -from torrequest import TorRequest # noqa: E402 from sherlock.result import QueryStatus # noqa: E402 from sherlock.result import QueryResult # noqa: E402 from sherlock.notify import QueryNotify # noqa: E402 @@ -166,8 +165,6 @@ def sherlock( username, site_data, query_notify: QueryNotify, - tor: bool = False, - unique_tor: bool = False, proxy=None, timeout=60, ): @@ -182,8 +179,6 @@ def sherlock( query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. - tor -- Boolean indicating whether to use a tor circuit for the requests. - unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. @@ -204,20 +199,10 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) - # Create session based on request methodology - if tor or unique_tor: - # Requests using Tor obfuscation - try: - underlying_request = TorRequest() - except OSError: - print("Tor not found in system path. Unable to continue.\n") - sys.exit(query_notify.finish()) - underlying_session = underlying_request.session - else: - # Normal requests - underlying_session = requests.session() - underlying_request = requests.Request() + # Normal requests + underlying_session = requests.session() + underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -341,15 +326,10 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future - # Reset identify for tor (if needed) - if unique_tor: - underlying_request.reset_identity() - # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site # Open the file containing account links - # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -547,23 +527,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ) - parser.add_argument( - "--tor", - "-t", - action="store_true", - dest="tor", - default=False, - help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", - ) - parser.add_argument( - "--unique-tor", - "-u", - action="store_true", - dest="unique_tor", - default=False, - help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", - ) + ), parser.add_argument( "--csv", action="store_true", @@ -687,22 +651,10 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") - # Argument check - # TODO regex check on args.proxy - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") - # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) - if args.no_color: # Disable color output. init(strip=True, convert=False) @@ -781,8 +733,6 @@ def main(): username, site_data, query_notify, - tor=args.tor, - unique_tor=args.unique_tor, proxy=args.proxy, timeout=args.timeout, ) From 44ad8f506a3719e25bfd894a90e2952199d405f6 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 28 Jun 2024 23:38:44 -0400 Subject: [PATCH 02/23] Lint --- sherlock/sherlock.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 170ea32ab..30346bd5a 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -202,7 +202,6 @@ def sherlock( # Normal requests underlying_session = requests.session() - underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -527,7 +526,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ), + ) parser.add_argument( "--csv", action="store_true", From 2016892e648c5bc5674d2b3b910cd9f36988bcff Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 28 Jun 2024 23:39:38 -0400 Subject: [PATCH 03/23] Remove torrequest dep Not sure why it's not in my patch file, but I was removing via sed in my spec instead. --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fafa9f85f..5674f016e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,6 @@ PySocks = "^1.7.0" requests = "^2.22.0" requests-futures = "^1.0.0" stem = "^1.8.0" -torrequest = "^0.1.0" # pandas can likely be bumped up to ^2.0.0 after fc39 EOL pandas = ">=1.0.0,<3.0.0" openpyxl = "^3.0.10" From dc89f1cd27a358a6771877cc0b597b3db822c06c Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:41:23 +0900 Subject: [PATCH 04/23] feat: Add dcinside --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..ff8af0751 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -600,6 +600,12 @@ "urlMain": "https://www.dailymotion.com/", "username_claimed": "blue" }, + "dcinside": { + "errorType": "status_code", + "url": "https://gallog.dcinside.com/{}", + "urlMain": "https://www.dcinside.com/", + "username_claimed": "anrbrb" + }, "Dealabs": { "errorMsg": "La page que vous essayez", "errorType": "message", From e5cd5e5bfe7df4ebd93e220a69496a4fdfe7b39f Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:43:21 +0900 Subject: [PATCH 05/23] feat: Add namuwiki --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index ff8af0751..2b5dbf6b1 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1465,6 +1465,12 @@ "urlMain": "https://www.native-instruments.com/forum/", "username_claimed": "jambert" }, + "namuwiki": { + "errorType": "status_code", + "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", + "urlMain": "https://namu.wiki/", + "username_claimed": "namu" + }, "NationStates Nation": { "errorMsg": "Was this your nation? It may have ceased to exist due to inactivity, but can rise again!", "errorType": "message", From 86140af50e6a2aae642ff38b1cab365a980fa283 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:44:02 +0900 Subject: [PATCH 06/23] feat: Add SOOP --- sherlock_project/resources/data.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 2b5dbf6b1..eaf3e6708 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1964,6 +1964,13 @@ "urlMain": "https://www.snapchat.com", "username_claimed": "teamsnapchat" }, + "SOOP": { + "errorType": "status_code", + "url": "https://www.sooplive.co.kr/station/{}", + "urlMain": "https://www.sooplive.co.kr/", + "urlProbe": "https://api-channel.sooplive.co.kr/v1.1/channel/{}/station", + "username_claimed": "udkn" + }, "SoundCloud": { "errorType": "status_code", "url": "https://soundcloud.com/{}", From cd7c52e4fae2dc81bc3fd75d098498e430d8bec9 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Wed, 1 Oct 2025 00:44:55 +0900 Subject: [PATCH 07/23] Feat: Add tistory --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index eaf3e6708..c4efcbe39 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2138,6 +2138,12 @@ "urlMain": "https://themeforest.net/", "username_claimed": "user" }, + "tistory": { + "errorType": "status_code", + "url": "https://{}.tistory.com/", + "urlMain": "https://www.tistory.com/", + "username_claimed": "notice" + }, "TnAFlix": { "errorType": "status_code", "isNSFW": true, From 7b3632bdadd4eba3473a1c0a728df522631d4654 Mon Sep 17 00:00:00 2001 From: JongMyeong HAN Date: Fri, 3 Oct 2025 04:00:41 +0900 Subject: [PATCH 08/23] Add comment to site 'namuwiki' Co-authored-by: Paul Pfeister --- sherlock_project/resources/data.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index c4efcbe39..f019000fb 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1466,6 +1466,7 @@ "username_claimed": "jambert" }, "namuwiki": { + "__comment__": "This is a Korean site and it's expected to return false negatives in certain other regions.", "errorType": "status_code", "url": "https://namu.wiki/w/%EC%82%AC%EC%9A%A9%EC%9E%90:{}", "urlMain": "https://namu.wiki/", From 355bfbd328c31144983904a65e6ad3aa8c003d9c Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 00:42:07 +0530 Subject: [PATCH 09/23] fix(sites): Remediate false positive for DeviantArt --- sherlock_project/resources/data.json | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..9738699b4 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -608,13 +608,15 @@ "urlMain": "https://www.dealabs.com/", "username_claimed": "blue" }, - "DeviantART": { - "errorType": "status_code", - "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", - "url": "https://{}.deviantart.com", - "urlMain": "https://deviantart.com", - "username_claimed": "blue" - }, + "DeviantArt": { + "errorType": "message", + "errorMsg": "Llama Not Found", + "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", + "url": "https://www.deviantart.com/{}", + "urlMain": "https://www.deviantart.com/", + "username_claimed": "blue", + "username_unclaimed": "noonewouldeverusethis" +}, "DigitalSpy": { "errorMsg": "The page you were looking for could not be found.", "errorType": "message", From b811b2bd47f0b45ac1cdffa9518470fff91a253e Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 2 Oct 2025 18:21:20 -0400 Subject: [PATCH 10/23] chore: update code owners --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0f2eadf28..b9af7fda3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,5 +1,5 @@ ### REPOSITORY -/.github/CODEOWNERS @sdushantha +/.github/CODEOWNERS @sdushantha @ppfeister /.github/FUNDING.yml @sdushantha /LICENSE @sdushantha From 779d4c33f4a88421a443695931d7041e55a51c7e Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 03:55:03 +0530 Subject: [PATCH 11/23] fix: Remove username_unclaimed as requested --- sherlock_project/resources/data.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 9738699b4..dc422754c 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -608,14 +608,13 @@ "urlMain": "https://www.dealabs.com/", "username_claimed": "blue" }, - "DeviantArt": { + "DeviantArt": { "errorType": "message", "errorMsg": "Llama Not Found", "regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$", "url": "https://www.deviantart.com/{}", "urlMain": "https://www.deviantart.com/", - "username_claimed": "blue", - "username_unclaimed": "noonewouldeverusethis" + "username_claimed": "blue" }, "DigitalSpy": { "errorMsg": "The page you were looking for could not be found.", From c89a52caf7f55d36265866ffc2c9d390957a7734 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 04:25:46 +0530 Subject: [PATCH 12/23] fix(sites): Remediate false positive for AllMyLinks --- sherlock_project/resources/data.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..091e2e9a1 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -79,13 +79,13 @@ "username_claimed": "pink" }, "AllMyLinks": { - "errorMsg": "Not Found", - "errorType": "message", - "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", - "url": "https://allmylinks.com/{}", - "urlMain": "https://allmylinks.com/", - "username_claimed": "blue" - }, + "errorMsg": "Page not found", + "errorType": "message", + "regexCheck": "^[a-z0-9][a-z0-9-]{2,32}$", + "url": "https://allmylinks.com/{}", + "urlMain": "https://allmylinks.com/", + "username_claimed": "blue" +}, "AniWorld": { "errorMsg": "Dieses Profil ist nicht verf\u00fcgbar", "errorType": "message", From d314d75db1636b14511997fe2d19a9b8bc6ef9b6 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 04:43:05 +0530 Subject: [PATCH 13/23] fix(sites): Remediate false positive for Mydramalist --- sherlock_project/resources/data.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..dd1c2f393 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1440,12 +1440,12 @@ "username_claimed": "blue" }, "Mydramalist": { - "errorMsg": "Sign in - MyDramaList", - "errorType": "message", - "url": "https://www.mydramalist.com/profile/{}", - "urlMain": "https://mydramalist.com", - "username_claimed": "elhadidy12398" - }, + "errorMsg": "The requested page was not found", + "errorType": "message", + "url": "https://www.mydramalist.com/profile/{}", + "urlMain": "https://mydramalist.com", + "username_claimed": "elhadidy12398" +}, "Myspace": { "errorType": "status_code", "url": "https://myspace.com/{}", From b245c462c92bf1655b3c871217f9683c1544554e Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 05:56:52 +0530 Subject: [PATCH 14/23] fix(sites): Remediate false positive for Apple Discussions --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..cd081b002 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -115,7 +115,7 @@ "username_claimed": "lio24d" }, "Apple Discussions": { - "errorMsg": "The page you tried was not found. You may have used an outdated link or may have typed the address (URL) incorrectly.", + "errorMsg": "Looking for something in Apple Support Communities?", "errorType": "message", "url": "https://discussions.apple.com/profile/{}", "urlMain": "https://discussions.apple.com", From 0e7219b191d36b1ba06c16066c450377863ea571 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Fri, 3 Oct 2025 13:41:43 +0530 Subject: [PATCH 15/23] Security Fix: Add timeout parameters to HTTP requests This fix addresses a critical security vulnerability where HTTP requests could hang indefinitely, potentially causing denial of service. Changes: - Added 10-second timeout to version check API call - Added 10-second timeout to GitHub pull request API call - Added 30-second timeout to data file downloads (larger timeout for data) - Added 10-second timeout to exclusions list download Impact: - Prevents infinite hangs that could freeze the application - Improves user experience with predictable response times - Fixes security issue flagged by Bandit static analysis (B113) - Makes the application more robust in poor network conditions The timeouts are conservative enough to work with slow connections while preventing indefinite blocking that could be exploited. --- sherlock_project/sherlock.py | 4 ++-- sherlock_project/sites.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a57..ba630c738 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -742,7 +742,7 @@ def main(): # Check for newer version of Sherlock. If it exists, let the user know about it try: - latest_release_raw = requests.get(forge_api_latest_release).text + latest_release_raw = requests.get(forge_api_latest_release, timeout=10).text latest_release_json = json_loads(latest_release_raw) latest_remote_tag = latest_release_json["tag_name"] @@ -802,7 +802,7 @@ def main(): if args.json_file.isnumeric(): pull_number = args.json_file pull_url = f"https://api.github.com/repos/sherlock-project/sherlock/pulls/{pull_number}" - pull_request_raw = requests.get(pull_url).text + pull_request_raw = requests.get(pull_url, timeout=10).text pull_request_json = json_loads(pull_request_raw) # Check if it's a valid pull request diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index 2ba811d77..b7aaf4c58 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -129,7 +129,7 @@ def __init__( if data_file_path.lower().startswith("http"): # Reference is to a URL. try: - response = requests.get(url=data_file_path) + response = requests.get(url=data_file_path, timeout=30) except Exception as error: raise FileNotFoundError( f"Problem while attempting to access data file URL '{data_file_path}': {error}" @@ -166,7 +166,7 @@ def __init__( if honor_exclusions: try: - response = requests.get(url=EXCLUSIONS_URL) + response = requests.get(url=EXCLUSIONS_URL, timeout=10) if response.status_code == 200: exclusions = response.text.splitlines() exclusions = [exclusion.strip() for exclusion in exclusions] From 57a0ccef38066b769061736bc165fb0d94a4a516 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 14:30:40 +0530 Subject: [PATCH 16/23] Remediate False Positive for Roblox --- sherlock_project/resources/data.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec9298..2d9651763 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1823,8 +1823,7 @@ "username_claimed": "blue" }, "Roblox": { - "errorMsg": "Page cannot be found or no longer exists", - "errorType": "message", + "errorType": "status_code", "url": "https://www.roblox.com/user.aspx?username={}", "urlMain": "https://www.roblox.com/", "username_claimed": "bluewolfekiller" From 977ad5c1a48e93cce720941d6777e150099ac183 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 14:48:37 +0530 Subject: [PATCH 17/23] Remediate False Positive for SlideShare --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec9298..7e9842734 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -1932,7 +1932,7 @@ }, "SlideShare": { "errorType": "message", - "errorMsg": "Username available", + "errorMsg": "Page no longer exists", "url": "https://slideshare.net/{}", "urlMain": "https://slideshare.net/", "username_claimed": "blue" From 5cd769c2f46e9615fdc3d6e43341e3f868256597 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sat, 4 Oct 2025 15:12:20 +0530 Subject: [PATCH 18/23] Remediate False Positives for CyberDefenders --- sherlock_project/resources/data.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec9298..4787ffeb6 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -572,8 +572,7 @@ "username_claimed": "brown" }, "CyberDefenders": { - "errorMsg": "Blue Team Training for SOC analysts and DFIR - CyberDefenders", - "errorType": "message", + "errorType": "status_code", "regexCheck": "^[^\\/:*?\"<>|@]{3,50}$", "request_method": "GET", "url": "https://cyberdefenders.org/p/{}", From dc869852bc5674f158db79bb2b4a3ad42b879f0e Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 17:22:50 +0530 Subject: [PATCH 19/23] fix(sites): Fix Threads false positive detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Threads was showing false positives for non-existent users because the error message detection was incorrect. Updated errorMsg: - Old: "Threads" (generic, matches valid pages too) - New: "Threads • Log in" (specific to non-existent users) When a user doesn't exist, Threads redirects to a login page with the title "Threads • Log in". Valid user profiles have titles like "Username (@username) • Threads, Say more". Tested with: - Invalid user (impossibleuser12345): Correctly not found - Valid user (zuck): Correctly found This fixes the false positive issue where non-existent Threads profiles were being reported as found. --- sherlock_project/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec9298..1f6b3d9e2 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2820,7 +2820,7 @@ "username_claimed": "green" }, "threads": { - "errorMsg": "Threads", + "errorMsg": "Threads • Log in", "errorType": "message", "headers": { "Sec-Fetch-Mode": "navigate" From b99719ce6014312445614d856df95dbae37b5991 Mon Sep 17 00:00:00 2001 From: obiwan04kanobi Date: Sun, 5 Oct 2025 00:22:12 +0530 Subject: [PATCH 20/23] Add Docker build test to CI workflow - Adds docker-build-test job to regression.yml - Runs on push/merge to master and release branches - Extracts VERSION_TAG from pyproject.toml for build - Tests that Docker image builds and runs successfully - Resolves dockerfile syntax warnings - Resolves #2196" --- .github/workflows/regression.yml | 27 +++++++++++++++++++++++++-- Dockerfile | 2 +- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index e366f29d7..5029b8704 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -11,6 +11,7 @@ on: - '**/*.py' - '**/*.ini' - '**/*.toml' + - 'Dockerfile' push: branches: - master @@ -21,11 +22,13 @@ on: - '**/*.py' - '**/*.ini' - '**/*.toml' + - 'Dockerfile' jobs: tox-lint: - # Linting is ran through tox to ensure that the same linter is used by local runners runs-on: ubuntu-latest + # Linting is ran through tox to ensure that the same linter + # is used by local runners steps: - uses: actions/checkout@v4 - name: Set up linting environment @@ -41,7 +44,8 @@ jobs: tox-matrix: runs-on: ${{ matrix.os }} strategy: - fail-fast: false # We want to know what specicic versions it fails on + # We want to know what specicic versions it fails on + fail-fast: false matrix: os: [ ubuntu-latest, @@ -67,3 +71,22 @@ jobs: pip install tox-gh-actions - name: Run tox run: tox + docker-build-test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Get version from pyproject.toml + id: get-version + run: | + VERSION=$(grep -m1 'version = ' pyproject.toml | cut -d'"' -f2) + echo "version=$VERSION" >> $GITHUB_OUTPUT + - name: Build Docker image + run: | + docker build \ + --build-arg VERSION_TAG=${{ steps.get-version.outputs.version }} \ + -t sherlock-test:latest . + - name: Test Docker image runs + run: docker run --rm sherlock-test:latest --version diff --git a/Dockerfile b/Dockerfile index 361530abc..ccdfbf230 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ # 3. Build image with BOTH latest and version tags # i.e. `docker build -t sherlock/sherlock:0.16.0 -t sherlock/sherlock:latest .` -FROM python:3.12-slim-bullseye as build +FROM python:3.12-slim-bullseye AS build WORKDIR /sherlock RUN pip3 install --no-cache-dir --upgrade pip From 0794e02b525a2bf5c9222c3da51a714f96b42d64 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 16:53:30 -0400 Subject: [PATCH 21/23] feat: support multiple errorTypes --- sherlock_project/resources/data.schema.json | 217 +++++++++++++------- sherlock_project/sherlock.py | 107 +++++----- 2 files changed, 200 insertions(+), 124 deletions(-) diff --git a/sherlock_project/resources/data.schema.json b/sherlock_project/resources/data.schema.json index 216ffb62c..c717cb256 100644 --- a/sherlock_project/resources/data.schema.json +++ b/sherlock_project/resources/data.schema.json @@ -1,80 +1,149 @@ { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Sherlock Target Manifest", - "description": "Social media targets to probe for the existence of known usernames", - "type": "object", - "properties": { - "$schema": { "type": "string" } - }, - "patternProperties": { - "^(?!\\$).*?$": { - "type": "object", - "description": "Target name and associated information (key should be human readable name)", - "required": [ "url", "urlMain", "errorType", "username_claimed" ], - "properties": { - "url": { "type": "string" }, - "urlMain": { "type": "string" }, - "urlProbe": { "type": "string" }, - "username_claimed": { "type": "string" }, - "regexCheck": { "type": "string" }, - "isNSFW": { "type": "boolean" }, - "headers": { "type": "object" }, - "request_payload": { "type": "object" }, - "__comment__": { - "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." - }, - "tags": { - "oneOf": [ - { "$ref": "#/$defs/tag" }, - { "type": "array", "items": { "$ref": "#/$defs/tag" } } - ] - }, - "request_method": { - "type": "string", - "enum": [ "GET", "POST", "HEAD", "PUT" ] - }, - "errorType": { - "type": "string", - "enum": [ "message", "response_url", "status_code" ] - }, - "errorMsg": { - "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } - ] - }, - "errorCode": { - "oneOf": [ - { "type": "integer" }, - { "type": "array", "items": { "type": "integer" } } - ] - }, - "errorUrl": { "type": "string" }, - "response_url": { "type": "string" } + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Target Manifest", + "description": "Social media targets to probe for the existence of known usernames", + "type": "object", + "properties": { + "$schema": { "type": "string" } + }, + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "description": "Target name and associated information (key should be human readable name)", + "required": ["url", "urlMain", "errorType", "username_claimed"], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "__comment__": { + "type": "string", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." + }, + "tags": { + "oneOf": [ + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } + ] + }, + "request_method": { + "type": "string", + "enum": ["GET", "POST", "HEAD", "PUT"] + }, + "errorType": { + "oneOf": [ + { + "type": "string", + "enum": ["message", "response_url", "status_code"] }, - "dependencies": { - "errorMsg": { - "properties" : { "errorType": { "const": "message" } } - }, - "errorUrl": { - "properties": { "errorType": { "const": "response_url" } } - }, - "errorCode": { - "properties": { "errorType": { "const": "status_code" } } + { + "type": "array", + "items": { + "type": "string", + "enum": ["message", "response_url", "status_code"] + } + } + ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" } + }, + "dependencies": { + "errorMsg": { + "oneOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } } - }, - "if": { "properties": { "errorType": { "const": "message" } } }, - "then": { "required": [ "errorMsg" ] }, - "else": { - "if": { "properties": { "errorType": { "const": "response_url" } } }, - "then": { "required": [ "errorUrl" ] } - }, - "additionalProperties": false + } + } + ] + }, + "errorUrl": { + "oneOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "errorCode": { + "oneOf": [ + { "properties": { "errorType": { "const": "status_code" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "status_code" } + } + } + } + ] + } + }, + "allOf": [ + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "message" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "message" } + } + } + } + ] + }, + "then": { "required": ["errorMsg"] } + }, + { + "if": { + "anyOf": [ + { "properties": { "errorType": { "const": "response_url" } } }, + { + "properties": { + "errorType": { + "type": "array", + "contains": { "const": "response_url" } + } + } + } + ] + }, + "then": { "required": ["errorUrl"] } } - }, - "additionalProperties": false, - "$defs": { - "tag": { "type": "string", "enum": [ "adult", "gaming" ] } + ], + "additionalProperties": false } + }, + "additionalProperties": false, + "$defs": { + "tag": { "type": "string", "enum": ["adult", "gaming"] } + } } diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 250175a57..a776d8c3b 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -381,6 +381,8 @@ def sherlock( # Get the expected error type error_type = net_info["errorType"] + if isinstance(error_type, str): + error_type: list[str] = [error_type] # Retrieve future and ensure it has finished future = net_info["request_future"] @@ -425,58 +427,63 @@ def sherlock( elif any(hitMsg in r.text for hitMsg in WAFHitMsgs): query_status = QueryStatus.WAF - elif error_type == "message": - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: + else: + if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): + # It should be impossible to ever get here... + raise ValueError( + f"Unknown Error Type '{error_type}' for " + f"site '{social_network}'" + ) + + if "message" in error_type: + # error_flag True denotes no error found in the HTML + # error_flag False denotes error found in the HTML + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: error_flag = False - break - if error_flag: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - elif error_type == "status_code": - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED - - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] - - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - elif error_type == "response_url": - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE + + if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + error_codes = net_info.get("errorCode") query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - else: - # It should be impossible to ever get here... - raise ValueError( - f"Unknown Error Type '{error_type}' for " f"site '{social_network}'" - ) + + # Type consistency, allowing for both singlets and lists in manifest + if isinstance(error_codes, int): + error_codes = [error_codes] + + if error_codes is not None and r.status_code in error_codes: + query_status = QueryStatus.AVAILABLE + elif r.status_code >= 300 or r.status_code < 200: + query_status = QueryStatus.AVAILABLE + + if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + # For this detection method, we have turned off the redirect. + # So, there is no need to check the response URL: it will always + # match the request. Instead, we will ensure that the response + # code indicates that the request was successful (i.e. no 404, or + # forward to some odd redirect). + if 200 <= r.status_code < 300: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if dump_response: print("+++++++++++++++++++++") From 52cd5fdfc136340b2c88ffe8c1dc953ff8b51cc5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 20:22:34 -0400 Subject: [PATCH 22/23] feat: gracefully skip sites with invalid errorType --- sherlock_project/sherlock.py | 103 +++++++++++++++++------------------ 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index dcfbda045..d349c12bc 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -429,61 +429,58 @@ def sherlock( else: if any(errtype not in ["message", "status_code", "response_url"] for errtype in error_type): - # It should be impossible to ever get here... - raise ValueError( - f"Unknown Error Type '{error_type}' for " - f"site '{social_network}'" - ) - - if "message" in error_type: - # error_flag True denotes no error found in the HTML - # error_flag False denotes error found in the HTML - error_flag = True - errors = net_info.get("errorMsg") - # errors will hold the error message - # it can be string or list - # by isinstance method we can detect that - # and handle the case for strings as normal procedure - # and if its list we can iterate the errors - if isinstance(errors, str): - # Checks if the error message is in the HTML - # if error is present we will set flag to False - if errors in r.text: - error_flag = False - else: - # If it's list, it will iterate all the error message - for error in errors: - if error in r.text: + error_context = f"Unknown error type '{error_type}' for {social_network}" + query_status = QueryStatus.UNKNOWN + else: + if "message" in error_type: + # error_flag True denotes no error found in the HTML + # error_flag False denotes error found in the HTML + error_flag = True + errors = net_info.get("errorMsg") + # errors will hold the error message + # it can be string or list + # by isinstance method we can detect that + # and handle the case for strings as normal procedure + # and if its list we can iterate the errors + if isinstance(errors, str): + # Checks if the error message is in the HTML + # if error is present we will set flag to False + if errors in r.text: error_flag = False - break - if error_flag: + else: + # If it's list, it will iterate all the error message + for error in errors: + if error in r.text: + error_flag = False + break + if error_flag: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE + + if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: + error_codes = net_info.get("errorCode") query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE - - if "status_code" in error_type and query_status is not QueryStatus.AVAILABLE: - error_codes = net_info.get("errorCode") - query_status = QueryStatus.CLAIMED - - # Type consistency, allowing for both singlets and lists in manifest - if isinstance(error_codes, int): - error_codes = [error_codes] - - if error_codes is not None and r.status_code in error_codes: - query_status = QueryStatus.AVAILABLE - elif r.status_code >= 300 or r.status_code < 200: - query_status = QueryStatus.AVAILABLE - - if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: - # For this detection method, we have turned off the redirect. - # So, there is no need to check the response URL: it will always - # match the request. Instead, we will ensure that the response - # code indicates that the request was successful (i.e. no 404, or - # forward to some odd redirect). - if 200 <= r.status_code < 300: - query_status = QueryStatus.CLAIMED - else: - query_status = QueryStatus.AVAILABLE + + # Type consistency, allowing for both singlets and lists in manifest + if isinstance(error_codes, int): + error_codes = [error_codes] + + if error_codes is not None and r.status_code in error_codes: + query_status = QueryStatus.AVAILABLE + elif r.status_code >= 300 or r.status_code < 200: + query_status = QueryStatus.AVAILABLE + + if "response_url" in error_type and query_status is not QueryStatus.AVAILABLE: + # For this detection method, we have turned off the redirect. + # So, there is no need to check the response URL: it will always + # match the request. Instead, we will ensure that the response + # code indicates that the request was successful (i.e. no 404, or + # forward to some odd redirect). + if 200 <= r.status_code < 300: + query_status = QueryStatus.CLAIMED + else: + query_status = QueryStatus.AVAILABLE if dump_response: print("+++++++++++++++++++++") From 4246a7b16fb399967d766aac9d677c7d48b60aa5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 4 Oct 2025 20:32:16 -0400 Subject: [PATCH 23/23] chore: make default --no-txt Workflows where a txt file is still required should use --txt --- sherlock_project/sherlock.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index d349c12bc..07b19af7d 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -723,12 +723,22 @@ def main(): help="Include checking of NSFW sites from default list.", ) + # TODO deprecated in favor of --txt, retained for workflow compatibility, to be removed + # in future release parser.add_argument( "--no-txt", action="store_true", dest="no_txt", default=False, - help="Disable creation of a txt file", + help="Disable creation of a txt file - WILL BE DEPRECATED", + ) + + parser.add_argument( + "--txt", + action="store_true", + dest="output_txt", + default=False, + help="Enable creation of a txt file", ) parser.add_argument( @@ -892,7 +902,7 @@ def main(): else: result_file = f"{username}.txt" - if not args.no_txt: + if args.output_txt: with open(result_file, "w", encoding="utf-8") as file: exists_counter = 0 for website_name in results: