CircleCI failed test summary (huggingface#42240)

ArthurZucker · ydshieh · leaderofARS · commit ada5cae47e3b · 2025-12-09T11:53:24.000+05:30
* clean

* final run

* final run

* final

* final

---------

Co-authored-by: ydshieh &lt;ydshieh@users.noreply.github.com&gt;
diff --git a/.github/workflows/circleci-failure-summary-comment.yml b/.github/workflows/circleci-failure-summary-comment.yml
@@ -0,0 +1,206 @@
+name: CircleCI Failure Summary Comment
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  comment:
+    runs-on: ubuntu-22.04
+    permissions:
+      pull-requests: write
+    env:
+      TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
+      TARGET_SHA: ${{ github.event.pull_request.head.sha }}
+      PR_NUMBER: ${{ github.event.pull_request.number }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.13"
+
+      - name: Install dependencies
+        run: python -m pip install requests huggingface_hub
+
+      - name: Wait for CircleCI check suite completion
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+        run: |
+          # Exit on error, undefined variables, or pipe failures
+          set -euo pipefail
+          
+          echo "Waiting for CircleCI check suite to complete..."
+          # Timeout after 30 minutes (1800 seconds)
+          end=$((SECONDS + 1800))
+          
+          while [ $SECONDS -lt $end ]; do
+            # Query GitHub API for check suites associated with this commit
+            # || echo "" allows retry on transient API failures instead of exiting
+            suite_json=$(gh api "repos/${GITHUB_REPOSITORY}/commits/${COMMIT_SHA}/check-suites" \
+              --jq '.check_suites[] | select(.app.slug == "circleci-checks")' || echo "")
+            
+            if [ -z "$suite_json" ]; then
+              echo "CircleCI check suite not found yet, retrying..."
+            else
+              status=$(echo "$suite_json" | jq -r '.status')
+              conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty')
+              echo "CircleCI status: $status, conclusion: $conclusion"
+              
+              # Check suite is done when status is "completed" AND conclusion is set
+              if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then
+                echo "Check suite completed successfully"
+                exit 0
+              fi
+            fi
+            
+            # Poll every 20 seconds
+            sleep 20
+          done
+    
+          echo "ERROR: Timed out waiting for CircleCI check suite"
+          exit 1
+
+      - name: Get CircleCI run's artifacts and upload them to Hub
+        id: circleci
+        env:
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+          REPO: ${{ github.repository }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Step 1: Get CircleCI check suite ID
+          echo "Getting check suites for commit ${COMMIT_SHA}..."
+          check_suites=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
+            "https://api.github.com/repos/${REPO}/commits/${COMMIT_SHA}/check-suites")
+          
+          circleci_suite_id=$(echo "$check_suites" | jq -r '.check_suites[] | select(.app.slug == "circleci-checks") | .id' | head -n 1)
+          echo "CircleCI check suite ID: ${circleci_suite_id}"
+          
+          # Step 2: Get check runs from the CircleCI suite
+          echo "Getting check runs for suite ${circleci_suite_id}..."
+          check_runs=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
+            "https://api.github.com/repos/${REPO}/check-suites/${circleci_suite_id}/check-runs")
+          
+          # Step 3: Extract workflow ID from the "run_tests" check run
+          workflow_id=$(echo "$check_runs" | jq -r '.check_runs[] | select(.name == "run_tests") | .details_url' | grep -oP 'workflows/\K[a-f0-9-]+')
+          echo "CircleCI Workflow ID: ${workflow_id}"
+          
+          # Step 4: Get all jobs in the workflow
+          echo "Getting jobs for workflow ${workflow_id}..."
+          jobs=$(curl -s \
+            "https://circleci.com/api/v2/workflow/${workflow_id}/job")
+          
+          # Step 5: Extract collection_job details
+          collection_job_number=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .job_number')
+          collection_job_id=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .id')
+          echo "CircleCI Collection job number: ${collection_job_number}"
+          echo "CircleCI Collection job ID: ${collection_job_id}"
+          
+          # Step 6: Get artifacts list
+          echo "Getting artifacts for job ${collection_job_number}..."
+          artifacts=$(curl -s \
+            "https://circleci.com/api/v2/project/gh/${REPO}/${collection_job_number}/artifacts")
+          
+          echo "$artifacts" | jq '.'
+          
+          # Step 7: Download failure_summary.json specifically
+          failure_summary_url=$(echo "$artifacts" | jq -r '.items[] | select(.path == "outputs/failure_summary.json") | .url')
+          
+          if [ -z "$failure_summary_url" ]; then
+            echo "failure_summary.json not found in artifacts"
+            exit 1
+          fi
+          
+          echo "Downloading failure_summary.json from: ${failure_summary_url}"
+          mkdir -p outputs
+          curl -s -L "${failure_summary_url}" -o outputs/failure_summary.json
+          ls -la outputs
+          
+          echo "Downloaded failure_summary.json successfully"
+          
+          # Verify the file was downloaded
+          if [ -f outputs/failure_summary.json ]; then
+            echo "File size: $(wc -c < outputs/failure_summary.json) bytes"
+          else
+            echo "Failed to download failure_summary.json"
+            exit 1
+          fi
+          
+          # Export variables for next steps
+          echo "workflow_id=${workflow_id}" >> $GITHUB_OUTPUT
+          echo "collection_job_number=${collection_job_number}" >> $GITHUB_OUTPUT
+
+      - name: Upload summaries to Hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }}
+          CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          python << 'EOF'
+          import os
+          from pathlib import Path
+          from huggingface_hub import HfApi
+          
+          # Setup paths
+          pr_number = os.environ["PR_NUMBER"]
+          commit_short = os.environ["COMMIT_SHA"][:12]
+          folder_path = f"pr-{pr_number}/sha-{commit_short}"
+          
+          # Create folder and move file
+          Path(folder_path).mkdir(parents=True, exist_ok=True)
+          Path("outputs/failure_summary.json").rename(f"{folder_path}/failure_summary.json")
+          
+          # Upload to Hub
+          dataset_id = os.environ["CIRCLECI_RESULTS_DATASET_ID"]
+          api = HfApi(token=os.environ["HF_TOKEN"])
+          api.upload_folder(
+              commit_message=f"Update CircleCI artifacts for PR {pr_number} ({commit_short})",
+              folder_path=folder_path,
+              path_in_repo=folder_path,
+              repo_id=dataset_id,
+              repo_type="dataset",
+          )
+          
+          print(f"Uploaded {folder_path} to {dataset_id}")
+          EOF
+
+      - name: Post comment with helper link
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          COMMIT_SHORT="${PR_SHA:0:12}"
+          SUMMARY_FILE="pr-${PR_NUMBER}/sha-${COMMIT_SHORT}/failure_summary.json"
+          
+          if [ ! -f "$SUMMARY_FILE" ]; then
+            echo "failure_summary.json missing, skipping comment."
+            exit 0
+          fi
+          
+          failures=$(jq '.failures | length' "$SUMMARY_FILE")
+          if [ "$failures" -eq 0 ]; then
+            echo "No failures detected, skipping PR comment."
+            exit 0
+          fi
+          
+          # Build Space URL with encoded parameters
+          repo_enc=$(jq -rn --arg v "$GITHUB_REPOSITORY" '$v|@uri')
+          pr_enc=$(jq -rn --arg v "$PR_NUMBER" '$v|@uri')
+          sha_short="${PR_SHA:0:6}"
+          sha_enc=$(jq -rn --arg v "$sha_short" '$v|@uri')
+          SPACE_URL="https://huggingface.co/spaces/transformers-community/circle-ci-viz?pr=${pr_enc}&sha=${sha_enc}"
+
+          # Post comment (using printf for proper newlines)
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
+            -f body="$(printf "View the CircleCI Test Summary for this PR:\n\n%s" "$SPACE_URL")"
diff --git a/utils/process_circleci_workflow_test_reports.py b/utils/process_circleci_workflow_test_reports.py
@@ -14,6 +14,8 @@
 import argparse
 import json
 import os
+import re
+from collections import Counter
 
 import requests
 
@@ -22,64 +24,123 @@
     parser = argparse.ArgumentParser()
     parser.add_argument("--workflow_id", type=str, required=True)
     args = parser.parse_args()
-    workflow_id = args.workflow_id
 
     r = requests.get(
-        f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
+        f"https://circleci.com/api/v2/workflow/{args.workflow_id}/job",
         headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
     )
     jobs = r.json()["items"]
 
     os.makedirs("outputs", exist_ok=True)
-
     workflow_summary = {}
-    # for each job, download artifacts
+    failure_entries = []
+
     for job in jobs:
-        project_slug = job["project_slug"]
         if job["name"].startswith(("tests_", "examples_", "pipelines_")):
-            url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts"
+            url = f"https://circleci.com/api/v2/project/{job['project_slug']}/{job['job_number']}/artifacts"
             r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
             job_artifacts = r.json()["items"]
 
-            os.makedirs(job["name"], exist_ok=True)
             os.makedirs(f"outputs/{job['name']}", exist_ok=True)
 
             job_test_summaries = {}
+            job_failure_lines = {}
+
             for artifact in job_artifacts:
-                if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
-                    node_index = artifact["node_index"]
-                    url = artifact["url"]
+                url = artifact["url"]
+                if artifact["path"].endswith("/summary_short.txt"):
+                    r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+                    job_test_summaries[artifact["node_index"]] = r.text
+                elif artifact["path"].endswith("/failures_line.txt"):
                     r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
-                    test_summary = r.text
-                    job_test_summaries[node_index] = test_summary
+                    job_failure_lines[artifact["node_index"]] = r.text
 
             summary = {}
             for node_index, node_test_summary in job_test_summaries.items():
                 for line in node_test_summary.splitlines():
                     if line.startswith("PASSED "):
-                        test = line[len("PASSED ") :]
-                        summary[test] = "passed"
+                        summary[line[7:]] = "passed"
                     elif line.startswith("FAILED "):
-                        test = line[len("FAILED ") :].split()[0]
-                        summary[test] = "failed"
-            # failed before passed
+                        summary[line[7:].split()[0]] = "failed"
+
             summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
             workflow_summary[job["name"]] = summary
 
-            # collected version
             with open(f"outputs/{job['name']}/test_summary.json", "w") as fp:
                 json.dump(summary, fp, indent=4)
 
+            # Collect failure details
+            for node_index, summary_text in job_test_summaries.items():
+                failure_lines_list = [
+                    l.strip()
+                    for l in job_failure_lines.get(node_index, "").splitlines()
+                    if l.strip() and not l.strip().startswith(("=", "_", "short test summary")) and ": " in l
+                ]
+
+                failure_idx = 0
+                for line in summary_text.splitlines():
+                    if line.startswith("FAILED ") and " - Failed: (subprocess)" not in line:
+                        test_name, _, short_error = line[7:].strip().partition(" - ")
+                        test_name = test_name.strip()
+                        parts = test_name.split("::", 1)[0].split("/")
+                        model_name = parts[2] if len(parts) >= 3 and test_name.startswith("tests/models/") else None
+                        full_error = (
+                            failure_lines_list[failure_idx] if failure_idx < len(failure_lines_list) else short_error
+                        )
+
+                        failure_entries.append(
+                            {
+                                "job_name": job["name"],
+                                "test_name": test_name,
+                                "short_error": short_error,
+                                "error": full_error,
+                                "model_name": model_name,
+                            }
+                        )
+                        failure_idx += 1
+
+    # Build workflow summary
     new_workflow_summary = {}
     for job_name, job_summary in workflow_summary.items():
         for test, status in job_summary.items():
-            if test not in new_workflow_summary:
-                new_workflow_summary[test] = {}
-            new_workflow_summary[test][job_name] = status
+            new_workflow_summary.setdefault(test, {})[job_name] = status
 
-    for test, result in new_workflow_summary.items():
-        new_workflow_summary[test] = dict(sorted(result.items()))
-    new_workflow_summary = dict(sorted(new_workflow_summary.items()))
+    new_workflow_summary = {
+        test: dict(sorted(result.items())) for test, result in sorted(new_workflow_summary.items())
+    }
 
     with open("outputs/test_summary.json", "w") as fp:
         json.dump(new_workflow_summary, fp, indent=4)
+
+    # Aggregate failures by test and model
+    by_test, by_model = {}, {}
+
+    for entry in failure_entries:
+        # Normalize test name
+        normalized = entry["test_name"].split("[", 1)[0]
+        parts = normalized.split("::")
+        normalized = "::".join(parts[:-1] + [re.sub(r"_\d{2,}.*$", "", parts[-1])])
+
+        by_test.setdefault(normalized, {"count": 0, "errors": Counter(), "jobs": set(), "variants": set()})
+        by_test[normalized]["count"] += 1
+        by_test[normalized]["errors"][entry["error"]] += 1
+        by_test[normalized]["jobs"].add(entry["job_name"])
+        by_test[normalized]["variants"].add(entry["test_name"])
+
+        if entry["model_name"]:
+            by_model.setdefault(entry["model_name"], {"count": 0, "errors": Counter(), "tests": set()})
+            by_model[entry["model_name"]]["count"] += 1
+            by_model[entry["model_name"]]["errors"][entry["error"]] += 1
+            by_model[entry["model_name"]]["tests"].add(entry["test_name"])
+
+    # Convert Counter and sets to dicts/lists for JSON serialization
+    for info in by_test.values():
+        info["errors"] = dict(info["errors"].most_common())
+        info["jobs"] = sorted(info["jobs"])
+        info["variants"] = sorted(info["variants"])
+    for info in by_model.values():
+        info["errors"] = dict(info["errors"].most_common())
+        info["tests"] = sorted(info["tests"])
+
+    with open("outputs/failure_summary.json", "w") as fp:
+        json.dump({"failures": failure_entries, "by_test": by_test, "by_model": by_model}, fp, indent=4)