Skip to content

Commit ada5cae

Browse files
ArthurZuckerydshieh
authored andcommitted
CircleCI failed test summary (huggingface#42240)
* clean * final run * final run * final * final --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
1 parent cdf41c9 commit ada5cae

File tree

2 files changed

+291
-24
lines changed

2 files changed

+291
-24
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
name: CircleCI Failure Summary Comment
2+
3+
on:
4+
pull_request_target:
5+
types: [opened, synchronize, reopened]
6+
7+
jobs:
8+
comment:
9+
runs-on: ubuntu-22.04
10+
permissions:
11+
pull-requests: write
12+
env:
13+
TARGET_BRANCH: ${{ github.event.pull_request.head.ref }}
14+
TARGET_SHA: ${{ github.event.pull_request.head.sha }}
15+
PR_NUMBER: ${{ github.event.pull_request.number }}
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
20+
- name: Setup Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: "3.13"
24+
25+
- name: Install dependencies
26+
run: python -m pip install requests huggingface_hub
27+
28+
- name: Wait for CircleCI check suite completion
29+
env:
30+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
31+
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
32+
GITHUB_REPOSITORY: ${{ github.repository }}
33+
run: |
34+
# Exit on error, undefined variables, or pipe failures
35+
set -euo pipefail
36+
37+
echo "Waiting for CircleCI check suite to complete..."
38+
# Timeout after 30 minutes (1800 seconds)
39+
end=$((SECONDS + 1800))
40+
41+
while [ $SECONDS -lt $end ]; do
42+
# Query GitHub API for check suites associated with this commit
43+
# || echo "" allows retry on transient API failures instead of exiting
44+
suite_json=$(gh api "repos/${GITHUB_REPOSITORY}/commits/${COMMIT_SHA}/check-suites" \
45+
--jq '.check_suites[] | select(.app.slug == "circleci-checks")' || echo "")
46+
47+
if [ -z "$suite_json" ]; then
48+
echo "CircleCI check suite not found yet, retrying..."
49+
else
50+
status=$(echo "$suite_json" | jq -r '.status')
51+
conclusion=$(echo "$suite_json" | jq -r '.conclusion // empty')
52+
echo "CircleCI status: $status, conclusion: $conclusion"
53+
54+
# Check suite is done when status is "completed" AND conclusion is set
55+
if [ "$status" = "completed" ] && [ -n "$conclusion" ]; then
56+
echo "Check suite completed successfully"
57+
exit 0
58+
fi
59+
fi
60+
61+
# Poll every 20 seconds
62+
sleep 20
63+
done
64+
65+
echo "ERROR: Timed out waiting for CircleCI check suite"
66+
exit 1
67+
68+
- name: Get CircleCI run's artifacts and upload them to Hub
69+
id: circleci
70+
env:
71+
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
72+
REPO: ${{ github.repository }}
73+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74+
run: |
75+
# Step 1: Get CircleCI check suite ID
76+
echo "Getting check suites for commit ${COMMIT_SHA}..."
77+
check_suites=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
78+
"https://api.github.com/repos/${REPO}/commits/${COMMIT_SHA}/check-suites")
79+
80+
circleci_suite_id=$(echo "$check_suites" | jq -r '.check_suites[] | select(.app.slug == "circleci-checks") | .id' | head -n 1)
81+
echo "CircleCI check suite ID: ${circleci_suite_id}"
82+
83+
# Step 2: Get check runs from the CircleCI suite
84+
echo "Getting check runs for suite ${circleci_suite_id}..."
85+
check_runs=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \
86+
"https://api.github.com/repos/${REPO}/check-suites/${circleci_suite_id}/check-runs")
87+
88+
# Step 3: Extract workflow ID from the "run_tests" check run
89+
workflow_id=$(echo "$check_runs" | jq -r '.check_runs[] | select(.name == "run_tests") | .details_url' | grep -oP 'workflows/\K[a-f0-9-]+')
90+
echo "CircleCI Workflow ID: ${workflow_id}"
91+
92+
# Step 4: Get all jobs in the workflow
93+
echo "Getting jobs for workflow ${workflow_id}..."
94+
jobs=$(curl -s \
95+
"https://circleci.com/api/v2/workflow/${workflow_id}/job")
96+
97+
# Step 5: Extract collection_job details
98+
collection_job_number=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .job_number')
99+
collection_job_id=$(echo "$jobs" | jq -r '.items[] | select(.name == "collection_job") | .id')
100+
echo "CircleCI Collection job number: ${collection_job_number}"
101+
echo "CircleCI Collection job ID: ${collection_job_id}"
102+
103+
# Step 6: Get artifacts list
104+
echo "Getting artifacts for job ${collection_job_number}..."
105+
artifacts=$(curl -s \
106+
"https://circleci.com/api/v2/project/gh/${REPO}/${collection_job_number}/artifacts")
107+
108+
echo "$artifacts" | jq '.'
109+
110+
# Step 7: Download failure_summary.json specifically
111+
failure_summary_url=$(echo "$artifacts" | jq -r '.items[] | select(.path == "outputs/failure_summary.json") | .url')
112+
113+
if [ -z "$failure_summary_url" ]; then
114+
echo "failure_summary.json not found in artifacts"
115+
exit 1
116+
fi
117+
118+
echo "Downloading failure_summary.json from: ${failure_summary_url}"
119+
mkdir -p outputs
120+
curl -s -L "${failure_summary_url}" -o outputs/failure_summary.json
121+
ls -la outputs
122+
123+
echo "Downloaded failure_summary.json successfully"
124+
125+
# Verify the file was downloaded
126+
if [ -f outputs/failure_summary.json ]; then
127+
echo "File size: $(wc -c < outputs/failure_summary.json) bytes"
128+
else
129+
echo "Failed to download failure_summary.json"
130+
exit 1
131+
fi
132+
133+
# Export variables for next steps
134+
echo "workflow_id=${workflow_id}" >> $GITHUB_OUTPUT
135+
echo "collection_job_number=${collection_job_number}" >> $GITHUB_OUTPUT
136+
137+
- name: Upload summaries to Hub
138+
env:
139+
HF_TOKEN: ${{ secrets.HF_CI_WRITE_TOKEN }}
140+
CIRCLECI_RESULTS_DATASET_ID: "transformers-community/circleci-test-results"
141+
PR_NUMBER: ${{ github.event.pull_request.number }}
142+
COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
143+
run: |
144+
python << 'EOF'
145+
import os
146+
from pathlib import Path
147+
from huggingface_hub import HfApi
148+
149+
# Setup paths
150+
pr_number = os.environ["PR_NUMBER"]
151+
commit_short = os.environ["COMMIT_SHA"][:12]
152+
folder_path = f"pr-{pr_number}/sha-{commit_short}"
153+
154+
# Create folder and move file
155+
Path(folder_path).mkdir(parents=True, exist_ok=True)
156+
Path("outputs/failure_summary.json").rename(f"{folder_path}/failure_summary.json")
157+
158+
# Upload to Hub
159+
dataset_id = os.environ["CIRCLECI_RESULTS_DATASET_ID"]
160+
api = HfApi(token=os.environ["HF_TOKEN"])
161+
api.upload_folder(
162+
commit_message=f"Update CircleCI artifacts for PR {pr_number} ({commit_short})",
163+
folder_path=folder_path,
164+
path_in_repo=folder_path,
165+
repo_id=dataset_id,
166+
repo_type="dataset",
167+
)
168+
169+
print(f"Uploaded {folder_path} to {dataset_id}")
170+
EOF
171+
172+
- name: Post comment with helper link
173+
env:
174+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
175+
GITHUB_REPOSITORY: ${{ github.repository }}
176+
PR_NUMBER: ${{ github.event.pull_request.number }}
177+
PR_SHA: ${{ github.event.pull_request.head.sha }}
178+
run: |
179+
COMMIT_SHORT="${PR_SHA:0:12}"
180+
SUMMARY_FILE="pr-${PR_NUMBER}/sha-${COMMIT_SHORT}/failure_summary.json"
181+
182+
if [ ! -f "$SUMMARY_FILE" ]; then
183+
echo "failure_summary.json missing, skipping comment."
184+
exit 0
185+
fi
186+
187+
failures=$(jq '.failures | length' "$SUMMARY_FILE")
188+
if [ "$failures" -eq 0 ]; then
189+
echo "No failures detected, skipping PR comment."
190+
exit 0
191+
fi
192+
193+
# Build Space URL with encoded parameters
194+
repo_enc=$(jq -rn --arg v "$GITHUB_REPOSITORY" '$v|@uri')
195+
pr_enc=$(jq -rn --arg v "$PR_NUMBER" '$v|@uri')
196+
sha_short="${PR_SHA:0:6}"
197+
sha_enc=$(jq -rn --arg v "$sha_short" '$v|@uri')
198+
SPACE_URL="https://huggingface.co/spaces/transformers-community/circle-ci-viz?pr=${pr_enc}&sha=${sha_enc}"
199+
200+
# Post comment (using printf for proper newlines)
201+
gh api \
202+
--method POST \
203+
-H "Accept: application/vnd.github+json" \
204+
-H "X-GitHub-Api-Version: 2022-11-28" \
205+
"repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \
206+
-f body="$(printf "View the CircleCI Test Summary for this PR:\n\n%s" "$SPACE_URL")"

utils/process_circleci_workflow_test_reports.py

Lines changed: 85 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import argparse
1515
import json
1616
import os
17+
import re
18+
from collections import Counter
1719

1820
import requests
1921

@@ -22,64 +24,123 @@
2224
parser = argparse.ArgumentParser()
2325
parser.add_argument("--workflow_id", type=str, required=True)
2426
args = parser.parse_args()
25-
workflow_id = args.workflow_id
2627

2728
r = requests.get(
28-
f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
29+
f"https://circleci.com/api/v2/workflow/{args.workflow_id}/job",
2930
headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
3031
)
3132
jobs = r.json()["items"]
3233

3334
os.makedirs("outputs", exist_ok=True)
34-
3535
workflow_summary = {}
36-
# for each job, download artifacts
36+
failure_entries = []
37+
3738
for job in jobs:
38-
project_slug = job["project_slug"]
3939
if job["name"].startswith(("tests_", "examples_", "pipelines_")):
40-
url = f"https://circleci.com/api/v2/project/{project_slug}/{job['job_number']}/artifacts"
40+
url = f"https://circleci.com/api/v2/project/{job['project_slug']}/{job['job_number']}/artifacts"
4141
r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
4242
job_artifacts = r.json()["items"]
4343

44-
os.makedirs(job["name"], exist_ok=True)
4544
os.makedirs(f"outputs/{job['name']}", exist_ok=True)
4645

4746
job_test_summaries = {}
47+
job_failure_lines = {}
48+
4849
for artifact in job_artifacts:
49-
if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
50-
node_index = artifact["node_index"]
51-
url = artifact["url"]
50+
url = artifact["url"]
51+
if artifact["path"].endswith("/summary_short.txt"):
52+
r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
53+
job_test_summaries[artifact["node_index"]] = r.text
54+
elif artifact["path"].endswith("/failures_line.txt"):
5255
r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
53-
test_summary = r.text
54-
job_test_summaries[node_index] = test_summary
56+
job_failure_lines[artifact["node_index"]] = r.text
5557

5658
summary = {}
5759
for node_index, node_test_summary in job_test_summaries.items():
5860
for line in node_test_summary.splitlines():
5961
if line.startswith("PASSED "):
60-
test = line[len("PASSED ") :]
61-
summary[test] = "passed"
62+
summary[line[7:]] = "passed"
6263
elif line.startswith("FAILED "):
63-
test = line[len("FAILED ") :].split()[0]
64-
summary[test] = "failed"
65-
# failed before passed
64+
summary[line[7:].split()[0]] = "failed"
65+
6666
summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
6767
workflow_summary[job["name"]] = summary
6868

69-
# collected version
7069
with open(f"outputs/{job['name']}/test_summary.json", "w") as fp:
7170
json.dump(summary, fp, indent=4)
7271

72+
# Collect failure details
73+
for node_index, summary_text in job_test_summaries.items():
74+
failure_lines_list = [
75+
l.strip()
76+
for l in job_failure_lines.get(node_index, "").splitlines()
77+
if l.strip() and not l.strip().startswith(("=", "_", "short test summary")) and ": " in l
78+
]
79+
80+
failure_idx = 0
81+
for line in summary_text.splitlines():
82+
if line.startswith("FAILED ") and " - Failed: (subprocess)" not in line:
83+
test_name, _, short_error = line[7:].strip().partition(" - ")
84+
test_name = test_name.strip()
85+
parts = test_name.split("::", 1)[0].split("/")
86+
model_name = parts[2] if len(parts) >= 3 and test_name.startswith("tests/models/") else None
87+
full_error = (
88+
failure_lines_list[failure_idx] if failure_idx < len(failure_lines_list) else short_error
89+
)
90+
91+
failure_entries.append(
92+
{
93+
"job_name": job["name"],
94+
"test_name": test_name,
95+
"short_error": short_error,
96+
"error": full_error,
97+
"model_name": model_name,
98+
}
99+
)
100+
failure_idx += 1
101+
102+
# Build workflow summary
73103
new_workflow_summary = {}
74104
for job_name, job_summary in workflow_summary.items():
75105
for test, status in job_summary.items():
76-
if test not in new_workflow_summary:
77-
new_workflow_summary[test] = {}
78-
new_workflow_summary[test][job_name] = status
106+
new_workflow_summary.setdefault(test, {})[job_name] = status
79107

80-
for test, result in new_workflow_summary.items():
81-
new_workflow_summary[test] = dict(sorted(result.items()))
82-
new_workflow_summary = dict(sorted(new_workflow_summary.items()))
108+
new_workflow_summary = {
109+
test: dict(sorted(result.items())) for test, result in sorted(new_workflow_summary.items())
110+
}
83111

84112
with open("outputs/test_summary.json", "w") as fp:
85113
json.dump(new_workflow_summary, fp, indent=4)
114+
115+
# Aggregate failures by test and model
116+
by_test, by_model = {}, {}
117+
118+
for entry in failure_entries:
119+
# Normalize test name
120+
normalized = entry["test_name"].split("[", 1)[0]
121+
parts = normalized.split("::")
122+
normalized = "::".join(parts[:-1] + [re.sub(r"_\d{2,}.*$", "", parts[-1])])
123+
124+
by_test.setdefault(normalized, {"count": 0, "errors": Counter(), "jobs": set(), "variants": set()})
125+
by_test[normalized]["count"] += 1
126+
by_test[normalized]["errors"][entry["error"]] += 1
127+
by_test[normalized]["jobs"].add(entry["job_name"])
128+
by_test[normalized]["variants"].add(entry["test_name"])
129+
130+
if entry["model_name"]:
131+
by_model.setdefault(entry["model_name"], {"count": 0, "errors": Counter(), "tests": set()})
132+
by_model[entry["model_name"]]["count"] += 1
133+
by_model[entry["model_name"]]["errors"][entry["error"]] += 1
134+
by_model[entry["model_name"]]["tests"].add(entry["test_name"])
135+
136+
# Convert Counter and sets to dicts/lists for JSON serialization
137+
for info in by_test.values():
138+
info["errors"] = dict(info["errors"].most_common())
139+
info["jobs"] = sorted(info["jobs"])
140+
info["variants"] = sorted(info["variants"])
141+
for info in by_model.values():
142+
info["errors"] = dict(info["errors"].most_common())
143+
info["tests"] = sorted(info["tests"])
144+
145+
with open("outputs/failure_summary.json", "w") as fp:
146+
json.dump({"failures": failure_entries, "by_test": by_test, "by_model": by_model}, fp, indent=4)

0 commit comments

Comments
 (0)