From 4fe41f09ff9868ee77080a6640da93fb89f2fae9 Mon Sep 17 00:00:00 2001 From: Ethan Zhang Date: Thu, 2 Oct 2025 12:42:47 +1000 Subject: [PATCH 01/11] Removed duplicate Bluesky entry in data.json --- sherlock_project/resources/data.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..74ac5698d 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2840,13 +2840,6 @@ "urlMain": "https://znanylekarz.pl", "username_claimed": "janusz-nowak" }, - "Bluesky": { - "errorType": "status_code", - "url": "https://bsky.app/profile/{}.bsky.social", - "urlProbe": "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor={}.bsky.social", - "urlMain": "https://bsky.app/", - "username_claimed": "mcuban" - }, "Platzi": { "errorType": "status_code", "errorCode": 404, From 876e58b159e71d13a5b63524c08011108d3473a8 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Fri, 3 Oct 2025 05:45:43 +0530 Subject: [PATCH 02/11] fix(sites): Remediate false positive for Blitz Tactics --- sherlock_project/resources/data.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 4c84ac524..19395b4dd 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -259,7 +259,8 @@ "username_claimed": "blue" }, "Blitz Tactics": { - "errorType": "status_code", + "errorMsg": "That page doesn't exist", + "errorType": "message", "url": "https://blitztactics.com/{}", "urlMain": "https://blitztactics.com/", "username_claimed": "Lance5500" @@ -279,7 +280,7 @@ "username_claimed": "mcuban" }, "BoardGameGeek": { - "errorType": "message", + "errorType": "status_code", "regexCheck": "^[a-zA-Z0-9_]*$", "errorMsg": "User not found", "url": "https://boardgamegeek.com/user/{}", From 91f3b16993f2f1dc70d3750d84249ebff8d24038 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 02:55:57 +0530 Subject: [PATCH 03/11] fix(sites): Update BoardGameGeek URL structure and detection method BoardGameGeek changed from /user/{} to /profile/{} URL structure. Also updated from message to status_code detection as the site no longer returns clear error messages for non-existent users. --- sherlock_project/resources/data.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index b30ec9298..3f7f5ac30 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -279,10 +279,9 @@ "username_claimed": "mcuban" }, "BoardGameGeek": { - "errorType": "message", + "errorType": "status_code", "regexCheck": "^[a-zA-Z0-9_]*$", - "errorMsg": "User not found", - "url": "https://boardgamegeek.com/user/{}", + "url": "https://boardgamegeek.com/profile/{}", "urlMain": "https://boardgamegeek.com", "username_claimed": "blue" }, From 3e653c46b07c858811619517b28a17742cb4847a Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 03:12:47 +0530 Subject: [PATCH 04/11] fix(sites): Remove BoardGameGeek - unreliable detection BoardGameGeek returns identical pages for both existing and non-existing users, making reliable username detection impossible with HTTP-based methods. The site likely uses JavaScript to load user-specific content dynamically. --- sherlock_project/resources/data.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 3f7f5ac30..891b6245a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,13 +278,6 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, - "BoardGameGeek": { - "errorType": "status_code", - "regexCheck": "^[a-zA-Z0-9_]*$", - "url": "https://boardgamegeek.com/profile/{}", - "urlMain": "https://boardgamegeek.com", - "username_claimed": "blue" - }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From c5e209d78e203f931a9e3bc6e51d6b49fdd33d3c Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 11:23:55 +0530 Subject: [PATCH 05/11] fix(sites): Implement BoardGameGeek API detection as suggested Using the API endpoint suggested by akh7177: https://api.geekdo.com/api/users?username={} However, there's an edge case where valid users contain empty arrays in their JSON response (adminBadges[], userMicrobadges[], supportYears[]) which causes Sherlock's substring matching to incorrectly flag them as 'not found' when looking for the '[]' error pattern. The API correctly returns: - Valid user: JSON object with user data (but contains [] substrings) - Invalid user: Exactly '[]' (2 characters total) This needs further refinement to distinguish between the exact '[]' response vs JSON containing '[]' substrings. --- sherlock_project/resources/data.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 891b6245a..09168d173 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,6 +278,15 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, + "BoardGameGeek": { + "errorMsg": "[]", + "errorType": "message", + "regexCheck": "^[a-zA-Z0-9_]*$", + "url": "https://boardgamegeek.com/profile/{}", + "urlMain": "https://boardgamegeek.com", + "urlProbe": "https://api.geekdo.com/api/users?username={}", + "username_claimed": "blue" + }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From 94c013886a677df9b7e1192267d548b4520f2958 Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sat, 4 Oct 2025 11:33:27 +0530 Subject: [PATCH 06/11] fix(sites): Remove BoardGameGeek due to incompatible detection BoardGameGeek cannot be reliably detected with Sherlock's current capabilities: - Original HTML detection: Returns false positives - API endpoint approach: The API returns status 200 for both valid and invalid users - Invalid user: Returns exactly '[]' - Valid user: Returns JSON containing '[]' substrings (e.g., "adminBadges":[]) Since Sherlock's 'message' errorType uses substring matching, it incorrectly identifies valid users as "not found" when checking for '[]' in the response. The site's API response format is fundamentally incompatible with Sherlock's detection methods (message/status_code/response_url), so removal is the only viable solution to prevent false positives and false negatives. Addresses false positive issue originally reported in testing. --- sherlock_project/resources/data.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 09168d173..891b6245a 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -278,15 +278,6 @@ "urlMain": "https://bsky.app/", "username_claimed": "mcuban" }, - "BoardGameGeek": { - "errorMsg": "[]", - "errorType": "message", - "regexCheck": "^[a-zA-Z0-9_]*$", - "url": "https://boardgamegeek.com/profile/{}", - "urlMain": "https://boardgamegeek.com", - "urlProbe": "https://api.geekdo.com/api/users?username={}", - "username_claimed": "blue" - }, "BongaCams": { "errorType": "status_code", "isNSFW": true, From 3079e7a218dcb1e25373e0ca73b43d8782ee5906 Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Sat, 4 Oct 2025 15:25:30 +0530 Subject: [PATCH 07/11] fix(ci): Use merge-base for correct target validation --- .../workflows/validate_modified_targets.yml | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index de0240907..3e5fde311 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -14,33 +14,43 @@ jobs: contents: read pull-requests: write steps: - - name: Checkout repository + - name: Checkout PR branch uses: actions/checkout@v5 with: - ref: ${{ github.base_ref }} - fetch-depth: 1 + # Check out the actual PR code, not the base branch + ref: ${{ github.event.pull_request.head.sha }} + # Fetch all history so we can find the common ancestor (merge-base) + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.13' + python-version: "3.13" - name: Install Poetry uses: abatilo/actions-poetry@v4 with: - poetry-version: 'latest' + poetry-version: "latest" - name: Install dependencies run: | poetry install --no-interaction --with dev - - name: Drop in place updated manifest from base + - name: Prepare JSON versions for comparison run: | - cp sherlock_project/resources/data.json data.json.base - git fetch origin pull/${{ github.event.pull_request.number }}/head:pr --depth=1 - git show pr:sherlock_project/resources/data.json > sherlock_project/resources/data.json + # Fetch the target branch to ensure we can compare against it + git fetch origin ${{ github.base_ref }} + + # Find the exact commit where this branch split from the target branch + MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} HEAD) + echo "Comparing HEAD against merge-base commit: $MERGE_BASE" + + # Copy the version of the file from the current PR branch (HEAD) cp sherlock_project/resources/data.json data.json.head + # Extract the version of the file from the merge-base commit + git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base + - name: Discover modified targets id: discover-modified run: | From 4d00884d8c9689bce722ef64fb5e0a5bb4238f8c Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Sun, 5 Oct 2025 03:00:21 +0530 Subject: [PATCH 08/11] fix(ci): Implement secure diff logic per feedback --- .../workflows/validate_modified_targets.yml | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 3e5fde311..4738ae2b1 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -14,13 +14,11 @@ jobs: contents: read pull-requests: write steps: - - name: Checkout PR branch + - name: Checkout repository uses: actions/checkout@v5 with: - # Check out the actual PR code, not the base branch - ref: ${{ github.event.pull_request.head.sha }} - # Fetch all history so we can find the common ancestor (merge-base) - fetch-depth: 0 + # This is the original, secure checkout of the base branch. + ref: ${{ github.base_ref }} - name: Set up Python uses: actions/setup-python@v6 @@ -38,17 +36,21 @@ jobs: - name: Prepare JSON versions for comparison run: | - # Fetch the target branch to ensure we can compare against it - git fetch origin ${{ github.base_ref }} + # Fetch the PR's branch head and give it a local name 'pr' + git fetch origin pull/${{ github.event.pull_request.number }}/head:pr - # Find the exact commit where this branch split from the target branch - MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} HEAD) - echo "Comparing HEAD against merge-base commit: $MERGE_BASE" + # The initial checkout may be shallow. To find a merge-base, + # we need more history. We can 'unshallow' the repository if needed. + git fetch --unshallow || true - # Copy the version of the file from the current PR branch (HEAD) - cp sherlock_project/resources/data.json data.json.head + # Find the merge-base commit between the target branch (master) and the PR branch (pr) + MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr) + echo "Comparing PR head against merge-base commit: $MERGE_BASE" - # Extract the version of the file from the merge-base commit + # Safely extract the version of the file from the PR's head without checking it out + git show pr:sherlock_project/resources/data.json > data.json.head + + # Safely extract the version of the file from the merge-base commit git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base - name: Discover modified targets @@ -57,8 +59,16 @@ jobs: CHANGED=$( python - <<'EOF' import json - with open("data.json.base") as f: base = json.load(f) - with open("data.json.head") as f: head = json.load(f) + import sys + try: + with open("data.json.base") as f: base = json.load(f) + with open("data.json.head") as f: head = json.load(f) + except FileNotFoundError as e: + print(f"Error: Could not find {e.filename}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error: Could not decode JSON from a file - {e}", file=sys.stderr) + sys.exit(1) changed = [] for k, v in head.items(): From 70e3c0ddd8fd162d162bdace19c296da96be861b Mon Sep 17 00:00:00 2001 From: shreyasNaik0101 Date: Sun, 5 Oct 2025 11:00:14 +0530 Subject: [PATCH 09/11] fix(ci): Address review feedback for correctness and efficiency --- .../workflows/validate_modified_targets.yml | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/validate_modified_targets.yml b/.github/workflows/validate_modified_targets.yml index 4738ae2b1..bb2445117 100644 --- a/.github/workflows/validate_modified_targets.yml +++ b/.github/workflows/validate_modified_targets.yml @@ -17,8 +17,9 @@ jobs: - name: Checkout repository uses: actions/checkout@v5 with: - # This is the original, secure checkout of the base branch. + # Checkout the base branch but fetch all history to avoid a second fetch call ref: ${{ github.base_ref }} + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 @@ -36,23 +37,21 @@ jobs: - name: Prepare JSON versions for comparison run: | - # Fetch the PR's branch head and give it a local name 'pr' + # Fetch only the PR's branch head (single network call in this step) git fetch origin pull/${{ github.event.pull_request.number }}/head:pr - # The initial checkout may be shallow. To find a merge-base, - # we need more history. We can 'unshallow' the repository if needed. - git fetch --unshallow || true - - # Find the merge-base commit between the target branch (master) and the PR branch (pr) + # Find the merge-base commit between the target branch and the PR branch MERGE_BASE=$(git merge-base origin/${{ github.base_ref }} pr) echo "Comparing PR head against merge-base commit: $MERGE_BASE" - # Safely extract the version of the file from the PR's head without checking it out + # Safely extract the file from the PR's head and the merge-base commit git show pr:sherlock_project/resources/data.json > data.json.head - - # Safely extract the version of the file from the merge-base commit git show $MERGE_BASE:sherlock_project/resources/data.json > data.json.base + # CRITICAL FIX: Overwrite the checked-out data.json with the one from the PR + # This ensures that pytest runs against the new, updated file. + cp data.json.head sherlock_project/resources/data.json + - name: Discover modified targets id: discover-modified run: | @@ -83,6 +82,8 @@ jobs: echo -e ">>> Changed targets: \n$(echo $CHANGED | tr ',' '\n')" echo "changed_targets=$CHANGED" >> "$GITHUB_OUTPUT" + # --- The rest of the steps below are unchanged --- + - name: Validate modified targets if: steps.discover-modified.outputs.changed_targets != '' continue-on-error: true From 9e3448d9923fecec7504ef67cc5d0f0892494dcb Mon Sep 17 00:00:00 2001 From: dollaransh17 Date: Sun, 5 Oct 2025 11:59:41 +0530 Subject: [PATCH 10/11] fix(sites): So , Implemented BoardGameGeek using username validation API - Added BoardGameGeek back using the new API endpoint suggested by @ppfeister - Uses https://api.geekdo.com/api/accounts/validate/username?username={} for detection - errorMsg checks for '"isValid":true' to detect valid usernames - This approach avoids the previous issues with: * HTML parsing returning false positives * User API returning JSON with '[]' substrings that caused detection problems - Successfully tested with both valid (blue) and invalid usernames Thanks @ppfeister for the API suggestion and @akh7177 for the initial guidance --- sherlock_project/resources/data.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 891b6245a..6c09c39c6 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -291,6 +291,14 @@ "urlMain": "https://www.bookcrossing.com/", "username_claimed": "blue" }, + "BoardGameGeek": { + "errorMsg": "\"isValid\":true", + "errorType": "message", + "url": "https://boardgamegeek.com/user/{}", + "urlMain": "https://boardgamegeek.com/", + "urlProbe": "https://api.geekdo.com/api/accounts/validate/username?username={}", + "username_claimed": "blue" + }, "BraveCommunity": { "errorType": "status_code", "url": "https://community.brave.com/u/{}/", From f0510a169ac3960171841d240dc52de7fe406b02 Mon Sep 17 00:00:00 2001 From: Abhyuday K Hegde <66260177+akh7177@users.noreply.github.com> Date: Sun, 5 Oct 2025 15:52:56 +0530 Subject: [PATCH 11/11] Add support for WakaTime --- sherlock_project/resources/data.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock_project/resources/data.json b/sherlock_project/resources/data.json index 7837e5f6c..9c1602239 100644 --- a/sherlock_project/resources/data.json +++ b/sherlock_project/resources/data.json @@ -2320,6 +2320,12 @@ "urlMain": "https://discourse.wicg.io/", "username_claimed": "stefano" }, + "Wakatime": { + "errorType": "status_code", + "url": "https://wakatime.com/@{}", + "urlMain": "https://wakatime.com/", + "username_claimed": "blue" + }, "Warrior Forum": { "errorType": "status_code", "url": "https://www.warriorforum.com/members/{}.html",