From 48b45ed14007a0df0c187b9f90f63f432cc1d0fa Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 13:44:13 -0500 Subject: [PATCH 01/15] --mem-monitoring-filepath --- .github/workflows/production.yml | 4 +++- tests/conftest.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index c50aefd5af..b594a083b9 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -150,7 +150,7 @@ jobs: run: | srun ${SRUN_COMMON} bash -e -s <<'EOF' source /venv/bin/activate - pytest --print -x -m "benchmarks" ./tests + pytest --mem-monitoring-filepath mem_results_${SLURM_JOB_NAME}.txt --print -x -m "benchmarks" ./tests cat speed_test*.txt > "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" EOF @@ -162,6 +162,8 @@ jobs: fi - name: Display benchmark stats run: | + ls -lh mem_results_*.txt + cat mem_results_*.txt cat "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" - name: Upload benchmark stats as artifact uses: actions/upload-artifact@v4 diff --git a/tests/conftest.py b/tests/conftest.py index a87ec77738..7d529ebdd6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,6 +66,10 @@ def pytest_make_parametrize_id(config, val, argname): @pytest.hookimpl def pytest_cmdline_main(config: pytest.Config) -> None: + import random + id = random.randint(0, 1000) + with open(f"logs/pytest_cmdline_main_{id}.txt", "w") as f: + f.write("pytest_cmdline_main\n") # Make sure that no unsupported markers have been specified in CLI declared_markers = set(name for spec in config.getini("markers") if (name := spec.split(":")[0]) != "forked") try: @@ -87,6 +91,12 @@ def pytest_cmdline_main(config: pytest.Config) -> None: if not sys.platform.startswith("linux"): config.option.forked = False + mem_monitoring = config.getoption("--mem-monitoring-filepath") + with open(mem_monitoring, "a") as f: + f.write("foo\n") + with open(f"logs/pytest_cmdline_main_{id}.txt", "a") as f: + f.write(f"mem monitoring filepath {mem_monitoring}\n") + # Force disabling distributed framework if interactive viewer is enabled show_viewer = config.getoption("--vis") if show_viewer: @@ -342,12 +352,17 @@ def pytest_runtest_setup(item): def pytest_addoption(parser): + import random + id = random.randint(0, 1000) + with open(f"logs/pytest_addoption_{id}.txt", "w") as f: + f.write("pytest_addoption\n") parser.addoption("--backend", action="store", default=None, help="Default simulation backend.") parser.addoption( "--logical", action="store_true", default=False, help="Consider logical cores in default number of workers." ) parser.addoption("--vis", action="store_true", default=False, help="Enable interactive viewer.") parser.addoption("--dev", action="store_true", default=False, help="Enable genesis debug mode.") + parser.addoption("--mem-monitoring-filepath", type=str, help="Run memory monitoring, and output results to this filepath.") @pytest.fixture(scope="session") From 4506521e63515c90805daa17aca964770d96cfd4 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 13:44:42 -0500 Subject: [PATCH 02/15] comment out other tests --- .github/workflows/alarm.yml | 970 +++++++++++++++---------------- .github/workflows/examples.yml | 138 ++--- .github/workflows/generic.yml | 418 ++++++------- .github/workflows/production.yml | 118 ++-- 4 files changed, 822 insertions(+), 822 deletions(-) diff --git a/.github/workflows/alarm.yml b/.github/workflows/alarm.yml index 571a82f4b0..7f159323d3 100644 --- a/.github/workflows/alarm.yml +++ b/.github/workflows/alarm.yml @@ -1,485 +1,485 @@ -name: Benchmark Comparison & Alarm Regression - -on: - workflow_run: - workflows: ["Production"] - types: [completed] - -permissions: - contents: read - actions: read - pull-requests: write - checks: write - -jobs: - comment-if-regressed: - runs-on: ubuntu-latest - if: > - github.event.workflow_run.event == 'pull_request' && - contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion) - - steps: - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Install deps - run: | - python -m pip install --quiet --upgrade wandb frozendict - - - name: Download artifacts from triggering run - id: dl - uses: actions/download-artifact@v4 - with: - pattern: speed-test-* - run-id: ${{ github.event.workflow_run.id }} - github-token: ${{ secrets.GITHUB_TOKEN }} - path: ./artifacts - - - name: Show downloaded files - run: | - echo "Downloaded into ${{ steps.dl.outputs.download-path }}" - ls -la ${{ steps.dl.outputs.download-path }} || true - (command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true - - - name: Check regressions + build outputs - id: analyze - env: - # Note that secrets are not passed to workflows that are triggered by a pull request from a fork - # --- W&B --- - WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} - WANDB_ENTITY: genesis-ai-company - WANDB_PROJECT: genesis-benchmarks - WANDB_SILENT: "true" - - # --- Parameters --- - MAX_VALID_REVISIONS: 5 - MAX_FETCH_REVISIONS: 40 - RUNTIME_REGRESSION_TOLERANCE_PCT: 8 - COMPILE_REGRESSION_TOLERANCE_PCT: 16 - - # Input/Output paths - ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }} - CHECK_BODY_PATH: check_output.md - CSV_RUNTIME_PATH: runtime_fps.csv - CSV_COMPILE_PATH: compile_time.csv - EXIT_CODE_REGRESSION: 42 - EXIT_CODE_ALERT: 43 - run: | - { python - << 'PY'; EXIT_CODE=$?; } || true - - import os, sys, json, re, math, statistics - import wandb - from frozendict import frozendict - from pathlib import Path - import csv - - # ----- arguments ----- - - MAX_VALID_REVISIONS = int(os.environ["MAX_VALID_REVISIONS"]) - MAX_FETCH_REVISIONS = int(os.environ["MAX_FETCH_REVISIONS"]) - - METRICS_TOL = { - "runtime_fps": float(os.environ["RUNTIME_REGRESSION_TOLERANCE_PCT"]), - "compile_time": float(os.environ["COMPILE_REGRESSION_TOLERANCE_PCT"]), - } - - artifacts_dir = Path(os.environ["ARTIFACTS_DIR"]).expanduser().resolve() - check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser() - - csv_files = { - "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(), - "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(), - } - - # ---------- helpers ---------- - - METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor") - - def parse_benchmark_id(bid: str) -> dict: - kv = {} - if bid: - for token in bid.split("-"): - token = token.strip() - if token and "=" in token: - k, v = token.split("=", 1) - kv[k.strip()] = v.strip() - return kv - - def normalize_benchmark_id(bid: str) -> frozendict[str, str]: - return frozendict(parse_benchmark_id(bid)) - - def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]: - """ - Merge a list of tuples into a single tuple of keys that: - - Preserves the relative order of keys within each tuple - - Gives precedence to later tuples when conflicts arise - """ - merged = list(bids[-1]) - merged_set = set(merged) - for tup in bids[:-1]: - for key in tup: - if key not in merged_set: - merged.append(key) - merged_set.add(key) - return tuple(merged) - - def sort_key(d): - key_list = [] - for col in params_name: - if col in d: - val = d[col] - key_list.append((0, val)) - else: - key_list.append((1, None)) - return key_list - - def artifacts_parse_csv_summary(current_txt_path): - out = {} - for line in current_txt_path.read_text().splitlines(): - kv = dict(map(str.strip, p.split("=", 1)) for p in line.split("|") if "=" in p) - record = {} - for k in METRIC_KEYS: - try: - record[k] = float(kv.pop(k)) - except (ValueError, TypeError, KeyError): - pass - nbid = frozendict(kv) - out[nbid] = record - return out - - def fmt_num(v, is_int: bool): - return f"{int(v):,}" if is_int else f"{v:.2f}" - - # ----- load artifacts (current results) ----- - - current_csv_paths = list(artifacts_dir.rglob("speed_test*.txt")) - if not current_csv_paths: - check_body_path.touch() - sys.exit(0) - - current_bm = {} - for csv_path in current_csv_paths: - current_bm |= artifacts_parse_csv_summary(csv_path) - bids_set = frozenset(current_bm.keys()) - assert bids_set - - # ----- W&B baselines ----- - - if not "WANDB_API_KEY" in os.environ: - print("WANDB_API_KEY is not set") - sys.exit(0) - ENTITY = os.environ["WANDB_ENTITY"] - PROJECT = os.environ["WANDB_PROJECT"] - - api = wandb.Api() - runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at") - - revs = set() - records_by_rev = {} - for i, run in enumerate(runs_iter): - # Abort if still not complete after checking enough runs. - # This would happen if a new benchmark has been added, and not enough past data is available yet. - if len(revs) == MAX_FETCH_REVISIONS: - break - - # Early return if enough complete records have been collected - records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()] - if sum(records_is_complete) == MAX_VALID_REVISIONS: - break - - # Load config and summary, with support of legacy runs - config, summary = run.config, run.summary - if isinstance(config, str): - config = {k: v["value"] for k, v in json.loads(run.config).items() if not k.startswith("_")} - if isinstance(summary._json_dict, str): - summary = json.loads(summary._json_dict) - - # Extract revision commit and branch - try: - rev, branch = config["revision"].split("@", 1) - revs.add(rev) - except ValueError: - # Ignore this run if the revision has been corrupted for some unknown reason - continue - # Ignore runs associated with a commit that is not part of the official repository - if not branch.startswith('Genesis-Embodied-AI/'): - continue - - # Skip runs did not finish for some reason - if run.state != "finished": - continue - - # Do not store new records if the desired number of revision is already reached - if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev: - continue - - # Extract benchmark ID and normalize it to make sure it does not depends on key ordering. - # Note that the rigid body benchmark suite is the only one being supported for now. - sid, bid = config["benchmark_id"].split("-", 1) - if sid != "rigid_body": - continue - - # Make sure that stats are valid - try: - is_valid = True - for k in METRIC_KEYS: - v = summary[k] - if not isinstance(v, (float, int)) or math.isnan(v): - is_valid = False - break - if not is_valid: - continue - except KeyError: - continue - - # Store all the records into a dict - nbid = normalize_benchmark_id(bid) - records_by_rev.setdefault(rev, {})[nbid] = { - metric: summary[metric] for metric in METRIC_KEYS - } - - # ----- build TWO tables ----- - - # Parse benchmark IDs into key-value dicts while preserving order - params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys())) - - reg_found, alert_found = False, False - tables = {} - rows_for_csv = {"runtime_fps": [], "compile_time": []} - info = {} - for metric, alias, sign in (("runtime_fps", "FPS", 1), ("compile_time", "compile", -1)): - rows_md = [] - - header_cells = ( - "status", - *params_name, - f"current {alias}", - f"baseline {alias} [last (mean ± std)] (*1)", - f"Δ {alias} (*2)" - ) - header = "| " + " | ".join(header_cells) + " |" - align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|" - - for bid in sorted(current_bm.keys(), key=sort_key): - value_cur = current_bm[bid][metric] - is_int = isinstance(value_cur, int) or value_cur.is_integer() - value_repr = fmt_num(value_cur, is_int) - - params_repr = [bid.get(k, "-") for k in params_name] - info = { - **dict(zip(params_name, params_repr)), - "current": value_cur, - "baseline_last": None, - "baseline_min": None, - "baseline_max": None, - } - - values_prev = [ - record[bid][metric] - for record in records_by_rev.values() - if bid in record - ] - if values_prev: - value_last = values_prev[0] - value_ref = statistics.fmean(values_prev) - delta = (value_cur - value_last) / value_last * 100.0 - - info["baseline_last"] = int(value_last) if is_int else float(value_last) - - stats_repr = f"{fmt_num(value_last, is_int)}" - delta_repr = f"{delta:+.1f}%" - if len(values_prev) == MAX_VALID_REVISIONS: - info["baseline_mean"] = int(value_ref) if is_int else float(value_ref) - info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev)) - info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev)) - - value_std = statistics.stdev(values_prev) - stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})" - if sign * delta < - METRICS_TOL[metric]: - info["status"] = "regression" - - delta_repr = f"**{delta_repr}**" - picto = "🔴" - reg_found = True - elif sign * delta > METRICS_TOL[metric]: - info["status"] = "alert" - - delta_repr = f"**{delta_repr}**" - picto = "⚠️" - alert_found = True - else: - info["status"] = "ok" - - picto = "✅" - else: - info["status"] = "n/a" - - picto = "ℹ️" - else: - picto, stats_repr, delta_repr = "ℹ️", "---", "---" - - rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |") - rows_for_csv[metric].append(info) - - tables[metric] = [header, align] + rows_md - - # ----- baseline commit list (MD) ----- - blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)] - baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist - - # ----- CHECK body (always) ----- - - thr_repr = ", ".join( - f"{alias} ± {METRICS_TOL[metric]:.0f}%" - for metric, alias in (("runtime_fps", "runtime"), ("compile_time", "compile")) - ) - - check_body = "\n".join( - [ - *baseline_block, - "", - f"Thresholds: {thr_repr}", - "", - "### Runtime FPS", - *tables["runtime_fps"], - "", - "### Compile Time", - *tables["compile_time"], - "", - f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.", - f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.", - ] - ) - - # ----- COMMENT body (only if regressions) ----- - - if reg_found: - comment_body = "\n".join([":warning: **Benchmark Regression Detected**", *check_body]) - else: - comment_body = "" - - # CSV file - for metric in ("runtime_fps", "compile_time"): - with csv_files[metric].open("w", newline="", encoding="utf-8") as f: - w = csv.DictWriter(f, fieldnames=info.keys()) - w.writeheader() - for rec in rows_for_csv[metric]: - w.writerow(rec) - - # write md results - check_body_path.write_text(check_body + "\n", encoding="utf-8") - - # Exit with error code - if reg_found: - exit_code = int(os.environ["EXIT_CODE_REGRESSION"]) - elif alert_found: - exit_code = int(os.environ["EXIT_CODE_ALERT"]) - else: - exit_code = 0 - sys.exit(exit_code) - PY - - # Enable command trace to ease debugging - set -o xtrace - - # Expose outputs to later steps - if [ -f "$CHECK_BODY_PATH" ]; then - { - echo 'CHECK_OUTPUT<<__EOF__' - cat "$CHECK_BODY_PATH" - echo '__EOF__' - } >> "$GITHUB_ENV" - else - echo "CHECK_OUTPUT=" >> "$GITHUB_ENV" - fi - - # Export status - echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" - echo "HAS_ALERTS=$([ "$EXIT_CODE" = "$EXIT_CODE_ALERT" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" - - - name: Upload benchmark comparisons in CSV - id: upload - uses: actions/upload-artifact@v4 - with: - name: benchmark-comparison-tables - path: | - runtime_fps.csv - compile_time.csv - if-no-files-found: warn - - - name: Publish PR check - id: publish_check - uses: actions/github-script@v8 - env: - CHECK_NAME: Benchmark Comparison - CHECK_OUTPUT: ${{ env.CHECK_OUTPUT }} - HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} - HAS_ALERTS: ${{ env.HAS_ALERTS }} - ARTIFACT_URL: ${{ steps.upload.outputs.artifact-url }} - with: - script: | - const artifactUrl = process.env.ARTIFACT_URL || ''; - let body = process.env.CHECK_OUTPUT || ''; - if (body && artifactUrl) { - body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`; - } - - let summary; - let conclusion = 'success'; - if ((process.env.HAS_REGRESSIONS || '0') === '1') { - summary = '🔴 Regressions detected. See tables below.'; - conclusion = 'failure'; - } else if ((process.env.HAS_ALERTS || '0') === '1') { - summary = '⚠️ Large deviation detected. See tables below.'; - } else { - summary = '✅ No regressions detected. See tables below.'; - } - - const check = await github.rest.checks.create({ - owner: context.repo.owner, - repo: context.repo.repo, - head_sha: context.payload.workflow_run.head_sha, - name: process.env.CHECK_NAME, - status: 'completed', - conclusion: conclusion, - output: { - title: process.env.CHECK_NAME, - summary, - text: body || undefined - } - }); - core.setOutput("check-url", check.data.html_url); - - - name: Add PR comment - if: ${{ env.HAS_REGRESSIONS == '1' || env.HAS_ALERTS == '1' }} - uses: actions/github-script@v8 - env: - HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} - REPORT_URL: ${{ steps.publish_check.outputs.check-url }} - with: - script: | - // Getting PR number when using 'workflow_run' is tricky. For reference, see: - // * https://docs.github.com/en/webhooks/webhook-events-and-payloads#workflow_run - // * https://stackoverflow.com/a/75420270/4820605 - const { data } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ - owner: context.payload.workflow_run.head_repository.owner.login, - repo: context.payload.workflow_run.head_repository.name, - commit_sha: context.payload.workflow_run.head_sha, - }); - if (!data || !data.length) { - core.info('No associated PR; skipping comment.'); - return; - } - - const title = (process.env.HAS_REGRESSIONS || '0') === '1' - ? '🔴 Benchmark Regression Detected' : '⚠️ Abnormal Benchmark Result Detected'; - const comment = `**${title} ➡️ [Report](${process.env.REPORT_URL})**`; - - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: data[0].number, - body: comment - }); +# name: Benchmark Comparison & Alarm Regression + +# on: +# workflow_run: +# workflows: ["Production"] +# types: [completed] + +# permissions: +# contents: read +# actions: read +# pull-requests: write +# checks: write + +# jobs: +# comment-if-regressed: +# runs-on: ubuntu-latest +# if: > +# github.event.workflow_run.event == 'pull_request' && +# contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion) + +# steps: +# - name: Setup Python +# uses: actions/setup-python@v5 +# with: +# python-version: '3.10' + +# - name: Install deps +# run: | +# python -m pip install --quiet --upgrade wandb frozendict + +# - name: Download artifacts from triggering run +# id: dl +# uses: actions/download-artifact@v4 +# with: +# pattern: speed-test-* +# run-id: ${{ github.event.workflow_run.id }} +# github-token: ${{ secrets.GITHUB_TOKEN }} +# path: ./artifacts + +# - name: Show downloaded files +# run: | +# echo "Downloaded into ${{ steps.dl.outputs.download-path }}" +# ls -la ${{ steps.dl.outputs.download-path }} || true +# (command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true + +# - name: Check regressions + build outputs +# id: analyze +# env: +# # Note that secrets are not passed to workflows that are triggered by a pull request from a fork +# # --- W&B --- +# WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} +# WANDB_ENTITY: genesis-ai-company +# WANDB_PROJECT: genesis-benchmarks +# WANDB_SILENT: "true" + +# # --- Parameters --- +# MAX_VALID_REVISIONS: 5 +# MAX_FETCH_REVISIONS: 40 +# RUNTIME_REGRESSION_TOLERANCE_PCT: 8 +# COMPILE_REGRESSION_TOLERANCE_PCT: 16 + +# # Input/Output paths +# ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }} +# CHECK_BODY_PATH: check_output.md +# CSV_RUNTIME_PATH: runtime_fps.csv +# CSV_COMPILE_PATH: compile_time.csv +# EXIT_CODE_REGRESSION: 42 +# EXIT_CODE_ALERT: 43 +# run: | +# { python - << 'PY'; EXIT_CODE=$?; } || true + +# import os, sys, json, re, math, statistics +# import wandb +# from frozendict import frozendict +# from pathlib import Path +# import csv + +# # ----- arguments ----- + +# MAX_VALID_REVISIONS = int(os.environ["MAX_VALID_REVISIONS"]) +# MAX_FETCH_REVISIONS = int(os.environ["MAX_FETCH_REVISIONS"]) + +# METRICS_TOL = { +# "runtime_fps": float(os.environ["RUNTIME_REGRESSION_TOLERANCE_PCT"]), +# "compile_time": float(os.environ["COMPILE_REGRESSION_TOLERANCE_PCT"]), +# } + +# artifacts_dir = Path(os.environ["ARTIFACTS_DIR"]).expanduser().resolve() +# check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser() + +# csv_files = { +# "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(), +# "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(), +# } + +# # ---------- helpers ---------- + +# METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor") + +# def parse_benchmark_id(bid: str) -> dict: +# kv = {} +# if bid: +# for token in bid.split("-"): +# token = token.strip() +# if token and "=" in token: +# k, v = token.split("=", 1) +# kv[k.strip()] = v.strip() +# return kv + +# def normalize_benchmark_id(bid: str) -> frozendict[str, str]: +# return frozendict(parse_benchmark_id(bid)) + +# def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]: +# """ +# Merge a list of tuples into a single tuple of keys that: +# - Preserves the relative order of keys within each tuple +# - Gives precedence to later tuples when conflicts arise +# """ +# merged = list(bids[-1]) +# merged_set = set(merged) +# for tup in bids[:-1]: +# for key in tup: +# if key not in merged_set: +# merged.append(key) +# merged_set.add(key) +# return tuple(merged) + +# def sort_key(d): +# key_list = [] +# for col in params_name: +# if col in d: +# val = d[col] +# key_list.append((0, val)) +# else: +# key_list.append((1, None)) +# return key_list + +# def artifacts_parse_csv_summary(current_txt_path): +# out = {} +# for line in current_txt_path.read_text().splitlines(): +# kv = dict(map(str.strip, p.split("=", 1)) for p in line.split("|") if "=" in p) +# record = {} +# for k in METRIC_KEYS: +# try: +# record[k] = float(kv.pop(k)) +# except (ValueError, TypeError, KeyError): +# pass +# nbid = frozendict(kv) +# out[nbid] = record +# return out + +# def fmt_num(v, is_int: bool): +# return f"{int(v):,}" if is_int else f"{v:.2f}" + +# # ----- load artifacts (current results) ----- + +# current_csv_paths = list(artifacts_dir.rglob("speed_test*.txt")) +# if not current_csv_paths: +# check_body_path.touch() +# sys.exit(0) + +# current_bm = {} +# for csv_path in current_csv_paths: +# current_bm |= artifacts_parse_csv_summary(csv_path) +# bids_set = frozenset(current_bm.keys()) +# assert bids_set + +# # ----- W&B baselines ----- + +# if not "WANDB_API_KEY" in os.environ: +# print("WANDB_API_KEY is not set") +# sys.exit(0) +# ENTITY = os.environ["WANDB_ENTITY"] +# PROJECT = os.environ["WANDB_PROJECT"] + +# api = wandb.Api() +# runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at") + +# revs = set() +# records_by_rev = {} +# for i, run in enumerate(runs_iter): +# # Abort if still not complete after checking enough runs. +# # This would happen if a new benchmark has been added, and not enough past data is available yet. +# if len(revs) == MAX_FETCH_REVISIONS: +# break + +# # Early return if enough complete records have been collected +# records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()] +# if sum(records_is_complete) == MAX_VALID_REVISIONS: +# break + +# # Load config and summary, with support of legacy runs +# config, summary = run.config, run.summary +# if isinstance(config, str): +# config = {k: v["value"] for k, v in json.loads(run.config).items() if not k.startswith("_")} +# if isinstance(summary._json_dict, str): +# summary = json.loads(summary._json_dict) + +# # Extract revision commit and branch +# try: +# rev, branch = config["revision"].split("@", 1) +# revs.add(rev) +# except ValueError: +# # Ignore this run if the revision has been corrupted for some unknown reason +# continue +# # Ignore runs associated with a commit that is not part of the official repository +# if not branch.startswith('Genesis-Embodied-AI/'): +# continue + +# # Skip runs did not finish for some reason +# if run.state != "finished": +# continue + +# # Do not store new records if the desired number of revision is already reached +# if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev: +# continue + +# # Extract benchmark ID and normalize it to make sure it does not depends on key ordering. +# # Note that the rigid body benchmark suite is the only one being supported for now. +# sid, bid = config["benchmark_id"].split("-", 1) +# if sid != "rigid_body": +# continue + +# # Make sure that stats are valid +# try: +# is_valid = True +# for k in METRIC_KEYS: +# v = summary[k] +# if not isinstance(v, (float, int)) or math.isnan(v): +# is_valid = False +# break +# if not is_valid: +# continue +# except KeyError: +# continue + +# # Store all the records into a dict +# nbid = normalize_benchmark_id(bid) +# records_by_rev.setdefault(rev, {})[nbid] = { +# metric: summary[metric] for metric in METRIC_KEYS +# } + +# # ----- build TWO tables ----- + +# # Parse benchmark IDs into key-value dicts while preserving order +# params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys())) + +# reg_found, alert_found = False, False +# tables = {} +# rows_for_csv = {"runtime_fps": [], "compile_time": []} +# info = {} +# for metric, alias, sign in (("runtime_fps", "FPS", 1), ("compile_time", "compile", -1)): +# rows_md = [] + +# header_cells = ( +# "status", +# *params_name, +# f"current {alias}", +# f"baseline {alias} [last (mean ± std)] (*1)", +# f"Δ {alias} (*2)" +# ) +# header = "| " + " | ".join(header_cells) + " |" +# align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|" + +# for bid in sorted(current_bm.keys(), key=sort_key): +# value_cur = current_bm[bid][metric] +# is_int = isinstance(value_cur, int) or value_cur.is_integer() +# value_repr = fmt_num(value_cur, is_int) + +# params_repr = [bid.get(k, "-") for k in params_name] +# info = { +# **dict(zip(params_name, params_repr)), +# "current": value_cur, +# "baseline_last": None, +# "baseline_min": None, +# "baseline_max": None, +# } + +# values_prev = [ +# record[bid][metric] +# for record in records_by_rev.values() +# if bid in record +# ] +# if values_prev: +# value_last = values_prev[0] +# value_ref = statistics.fmean(values_prev) +# delta = (value_cur - value_last) / value_last * 100.0 + +# info["baseline_last"] = int(value_last) if is_int else float(value_last) + +# stats_repr = f"{fmt_num(value_last, is_int)}" +# delta_repr = f"{delta:+.1f}%" +# if len(values_prev) == MAX_VALID_REVISIONS: +# info["baseline_mean"] = int(value_ref) if is_int else float(value_ref) +# info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev)) +# info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev)) + +# value_std = statistics.stdev(values_prev) +# stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})" +# if sign * delta < - METRICS_TOL[metric]: +# info["status"] = "regression" + +# delta_repr = f"**{delta_repr}**" +# picto = "🔴" +# reg_found = True +# elif sign * delta > METRICS_TOL[metric]: +# info["status"] = "alert" + +# delta_repr = f"**{delta_repr}**" +# picto = "⚠️" +# alert_found = True +# else: +# info["status"] = "ok" + +# picto = "✅" +# else: +# info["status"] = "n/a" + +# picto = "ℹ️" +# else: +# picto, stats_repr, delta_repr = "ℹ️", "---", "---" + +# rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |") +# rows_for_csv[metric].append(info) + +# tables[metric] = [header, align] + rows_md + +# # ----- baseline commit list (MD) ----- +# blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)] +# baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist + +# # ----- CHECK body (always) ----- + +# thr_repr = ", ".join( +# f"{alias} ± {METRICS_TOL[metric]:.0f}%" +# for metric, alias in (("runtime_fps", "runtime"), ("compile_time", "compile")) +# ) + +# check_body = "\n".join( +# [ +# *baseline_block, +# "", +# f"Thresholds: {thr_repr}", +# "", +# "### Runtime FPS", +# *tables["runtime_fps"], +# "", +# "### Compile Time", +# *tables["compile_time"], +# "", +# f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.", +# f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.", +# ] +# ) + +# # ----- COMMENT body (only if regressions) ----- + +# if reg_found: +# comment_body = "\n".join([":warning: **Benchmark Regression Detected**", *check_body]) +# else: +# comment_body = "" + +# # CSV file +# for metric in ("runtime_fps", "compile_time"): +# with csv_files[metric].open("w", newline="", encoding="utf-8") as f: +# w = csv.DictWriter(f, fieldnames=info.keys()) +# w.writeheader() +# for rec in rows_for_csv[metric]: +# w.writerow(rec) + +# # write md results +# check_body_path.write_text(check_body + "\n", encoding="utf-8") + +# # Exit with error code +# if reg_found: +# exit_code = int(os.environ["EXIT_CODE_REGRESSION"]) +# elif alert_found: +# exit_code = int(os.environ["EXIT_CODE_ALERT"]) +# else: +# exit_code = 0 +# sys.exit(exit_code) +# PY + +# # Enable command trace to ease debugging +# set -o xtrace + +# # Expose outputs to later steps +# if [ -f "$CHECK_BODY_PATH" ]; then +# { +# echo 'CHECK_OUTPUT<<__EOF__' +# cat "$CHECK_BODY_PATH" +# echo '__EOF__' +# } >> "$GITHUB_ENV" +# else +# echo "CHECK_OUTPUT=" >> "$GITHUB_ENV" +# fi + +# # Export status +# echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" +# echo "HAS_ALERTS=$([ "$EXIT_CODE" = "$EXIT_CODE_ALERT" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" + +# - name: Upload benchmark comparisons in CSV +# id: upload +# uses: actions/upload-artifact@v4 +# with: +# name: benchmark-comparison-tables +# path: | +# runtime_fps.csv +# compile_time.csv +# if-no-files-found: warn + +# - name: Publish PR check +# id: publish_check +# uses: actions/github-script@v8 +# env: +# CHECK_NAME: Benchmark Comparison +# CHECK_OUTPUT: ${{ env.CHECK_OUTPUT }} +# HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} +# HAS_ALERTS: ${{ env.HAS_ALERTS }} +# ARTIFACT_URL: ${{ steps.upload.outputs.artifact-url }} +# with: +# script: | +# const artifactUrl = process.env.ARTIFACT_URL || ''; +# let body = process.env.CHECK_OUTPUT || ''; +# if (body && artifactUrl) { +# body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`; +# } + +# let summary; +# let conclusion = 'success'; +# if ((process.env.HAS_REGRESSIONS || '0') === '1') { +# summary = '🔴 Regressions detected. See tables below.'; +# conclusion = 'failure'; +# } else if ((process.env.HAS_ALERTS || '0') === '1') { +# summary = '⚠️ Large deviation detected. See tables below.'; +# } else { +# summary = '✅ No regressions detected. See tables below.'; +# } + +# const check = await github.rest.checks.create({ +# owner: context.repo.owner, +# repo: context.repo.repo, +# head_sha: context.payload.workflow_run.head_sha, +# name: process.env.CHECK_NAME, +# status: 'completed', +# conclusion: conclusion, +# output: { +# title: process.env.CHECK_NAME, +# summary, +# text: body || undefined +# } +# }); +# core.setOutput("check-url", check.data.html_url); + +# - name: Add PR comment +# if: ${{ env.HAS_REGRESSIONS == '1' || env.HAS_ALERTS == '1' }} +# uses: actions/github-script@v8 +# env: +# HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} +# REPORT_URL: ${{ steps.publish_check.outputs.check-url }} +# with: +# script: | +# // Getting PR number when using 'workflow_run' is tricky. For reference, see: +# // * https://docs.github.com/en/webhooks/webhook-events-and-payloads#workflow_run +# // * https://stackoverflow.com/a/75420270/4820605 +# const { data } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ +# owner: context.payload.workflow_run.head_repository.owner.login, +# repo: context.payload.workflow_run.head_repository.name, +# commit_sha: context.payload.workflow_run.head_sha, +# }); +# if (!data || !data.length) { +# core.info('No associated PR; skipping comment.'); +# return; +# } + +# const title = (process.env.HAS_REGRESSIONS || '0') === '1' +# ? '🔴 Benchmark Regression Detected' : '⚠️ Abnormal Benchmark Result Detected'; +# const comment = `**${title} ➡️ [Report](${process.env.REPORT_URL})**`; + +# await github.rest.issues.createComment({ +# owner: context.repo.owner, +# repo: context.repo.repo, +# issue_number: data[0].number, +# body: comment +# }); diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index 14df548019..23774505cc 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -1,81 +1,81 @@ -name: Examples (CPU) +# name: Examples (CPU) -on: - pull_request: - branches: - - main +# on: +# pull_request: +# branches: +# - main -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} - cancel-in-progress: true +# concurrency: +# group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} +# cancel-in-progress: true -jobs: - run-examples: - runs-on: ubuntu-24.04 - name: ubuntu-24.04-3.12-examples +# jobs: +# run-examples: +# runs-on: ubuntu-24.04 +# name: ubuntu-24.04-3.12-examples - env: - HF_HUB_DOWNLOAD_TIMEOUT: 60 - FORCE_COLOR: 1 - PY_COLORS: 1 - GS_CACHE_FILE_PATH: ".cache/genesis" - TI_OFFLINE_CACHE: "1" - TI_OFFLINE_CACHE_CLEANING_POLICY: "never" - TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" - TI_ENABLE_CUDA: "0" - TI_ENABLE_METAL: "0" - TI_ENABLE_OPENGL: "0" - TI_ENABLE_VULKAN: "0" - TI_DEBUG: "0" +# env: +# HF_HUB_DOWNLOAD_TIMEOUT: 60 +# FORCE_COLOR: 1 +# PY_COLORS: 1 +# GS_CACHE_FILE_PATH: ".cache/genesis" +# TI_OFFLINE_CACHE: "1" +# TI_OFFLINE_CACHE_CLEANING_POLICY: "never" +# TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" +# TI_ENABLE_CUDA: "0" +# TI_ENABLE_METAL: "0" +# TI_ENABLE_OPENGL: "0" +# TI_ENABLE_VULKAN: "0" +# TI_DEBUG: "0" - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 1 +# steps: +# - name: Checkout code +# uses: actions/checkout@v4 +# with: +# fetch-depth: 1 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" +# - name: Set up Python +# uses: actions/setup-python@v5 +# with: +# python-version: "3.12" - - name: Install Mesa OpenGL driver for headless rendering - run: | - sudo apt-get update - sudo apt install -y \ - libglu1-mesa \ - libegl-mesa0 \ - libgl1-mesa-dev +# - name: Install Mesa OpenGL driver for headless rendering +# run: | +# sudo apt-get update +# sudo apt install -y \ +# libglu1-mesa \ +# libegl-mesa0 \ +# libgl1-mesa-dev - - name: Install Python deps - run: | - pip install --upgrade pip setuptools wheel - pip install torch --index-url https://download.pytorch.org/whl/cpu - pip install -e '.[dev]' pynput +# - name: Install Python deps +# run: | +# pip install --upgrade pip setuptools wheel +# pip install torch --index-url https://download.pytorch.org/whl/cpu +# pip install -e '.[dev]' pynput - - name: Get gstaichi version - id: gstaichi_version - shell: bash - run: | - GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") - echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" - echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT +# - name: Get gstaichi version +# id: gstaichi_version +# shell: bash +# run: | +# GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") +# echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" +# echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT - - name: Restore cache - uses: actions/cache/restore@v4 - with: - path: .cache - key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }} - restore-keys: | - ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}- +# - name: Restore cache +# uses: actions/cache/restore@v4 +# with: +# path: .cache +# key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }} +# restore-keys: | +# ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}- - - name: Run examples suite - run: | - pytest -v -m examples tests/test_examples.py +# - name: Run examples suite +# run: | +# pytest -v -m examples tests/test_examples.py - - name: Save cache - if: always() - uses: actions/cache/save@v4 - with: - path: .cache - key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}-${{ github.run_id }}-${{ github.run_attempt }} +# - name: Save cache +# if: always() +# uses: actions/cache/save@v4 +# with: +# path: .cache +# key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}-${{ github.run_id }}-${{ github.run_attempt }} diff --git a/.github/workflows/generic.yml b/.github/workflows/generic.yml index 19c8b4ddbe..cd2d71e523 100644 --- a/.github/workflows/generic.yml +++ b/.github/workflows/generic.yml @@ -1,209 +1,209 @@ -name: Generic - -on: - pull_request: - branches: - - main - release: - branches: - - main - types: [published] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - generic-cpu: - name: ${{ matrix.OS }}-${{ matrix.PYTHON_VERSION }}-${{ matrix.GS_BACKEND }}-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - - strategy: - fail-fast: false - matrix: - # See official Github documentation for details: https://shorturl.at/NJgsj - OS: ["ubuntu-24.04", "macos-15"] - PYTHON_VERSION: ["3.10", "3.11", "3.12", "3.13"] - GS_BACKEND: ["cpu"] - GS_ENABLE_NDARRAY: ["1"] - include: - # CPU backend - dynamic array (other OSes) - - OS: "ubuntu-22.04" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "1" - - OS: "ubuntu-24.04-arm" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "1" - - OS: "windows-2025" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "1" - # CPU backend - field array - - OS: "ubuntu-24.04" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "0" - - OS: "ubuntu-24.04-arm" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "0" - - OS: "windows-2025" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "0" - - OS: "macos-15" - PYTHON_VERSION: "3.12" - GS_BACKEND: "cpu" - GS_ENABLE_NDARRAY: "0" - # GPU backend - field array - - OS: "macos-15" - PYTHON_VERSION: "3.12" - GS_BACKEND: "gpu" - GS_ENABLE_NDARRAY: "0" - - env: - HF_HUB_DOWNLOAD_TIMEOUT: "60" - FORCE_COLOR: "1" - PY_COLORS: "1" - GS_CACHE_FILE_PATH: ".cache/genesis" - GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} - GS_TORCH_FORCE_CPU_DEVICE: ${{ startsWith(matrix.OS, 'macos-') && '1' || '0' }} - TI_OFFLINE_CACHE: "1" - TI_OFFLINE_CACHE_CLEANING_POLICY: "never" - TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" - TI_ENABLE_CUDA: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} - TI_ENABLE_METAL: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} - TI_ENABLE_OPENGL: "0" - TI_ENABLE_VULKAN: "0" - TI_DEBUG: "0" - - runs-on: ${{ matrix.OS }} - if: github.event_name != 'release' - - steps: - - name: Print system information (Windows) - if: startsWith(matrix.OS, 'windows-') - shell: pwsh - run: | - $cpu = Get-CimInstance -ClassName Win32_Processor - $ram = Get-CimInstance -ClassName Win32_ComputerSystem - [PSCustomObject]@{ - CPU_Name = $cpu.Name - Physical_Cores = ($cpu | Measure-Object -Property NumberOfCores -Sum).Sum - Logical_Processors = ($cpu | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum - Total_RAM_GB = [math]::Round($ram.TotalPhysicalMemory / 1GB, 2) - } - - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.PYTHON_VERSION }} - - - name: Install system dependencies (Windows) - if: startsWith(matrix.OS, 'windows-') - shell: bash - run: | - curl -L -o mesa.7z https://github.com/pal1000/mesa-dist-win/releases/download/25.1.5/mesa3d-25.1.5-release-msvc.7z - 7z x mesa.7z -omesa - mv -v mesa/x64/* /C/Windows/System32/ - - - name: Install Mesa 25 OpenGL driver (Linux) - if: startsWith(matrix.OS, 'ubuntu-') - run: | - sudo add-apt-repository -y ppa:kisak/kisak-mesa - sudo apt-get update - sudo apt install -y \ - libglu1-mesa \ - libegl-mesa0 \ - libgl1-mesa-dev - - name: Install python dependencies - run: | - pip install --upgrade pip setuptools pkg-info wheel - pip3 install torch --index-url https://download.pytorch.org/whl/cpu - - - name: Black Format Check - if: ${{ matrix.OS == 'ubuntu-24.04' && matrix.PYTHON_VERSION == '3.12' && matrix.GS_BACKEND == 'cpu' && matrix.GS_ENABLE_NDARRAY == '1' }} - run: | - pip install black - black --line-length 120 --check . - - - name: Install Genesis - shell: bash - run: | - PYTHON_DEPS="dev" - if [[ "${{ matrix.OS }}" != 'ubuntu-24.04-arm' ]] ; then - PYTHON_DEPS="${PYTHON_DEPS},usd" - fi - pip install -e ".[${PYTHON_DEPS}]" - - - name: Get artifact prefix name - id: artifact_prefix - shell: bash - run: | - OS_FAMILY=$(python -c "import platform; print(platform.system())") - MACHINE_ARCH=$(python -c "import platform; print(platform.machine())") - GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") - echo "ARTIFACT_PREFIX=${OS_FAMILY}-${MACHINE_ARCH}-${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT - - - name: Restore Taichi Kernel Cache - if: ${{ always() && steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} - uses: actions/cache/restore@v4 - with: - path: .cache - key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }} - restore-keys: | - ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}- - - - name: Run unit tests - run: | - pytest -v --logical --dev --backend ${{ matrix.GS_BACKEND }} -m required --forked ./tests - - - name: Save Updated Taichi Kernel Cache - if: >- - ${{ always() && - (matrix.OS == 'ubuntu-24.04' || matrix.OS == 'ubuntu-24.04-arm' || matrix.OS == 'macos-15' || matrix.OS == 'windows-2025') && - matrix.PYTHON_VERSION == '3.12' && - matrix.GS_BACKEND == 'cpu' && - matrix.GS_ENABLE_NDARRAY == '1' && - steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} - uses: actions/cache/save@v4 - with: - path: .cache - # Note that it is necessary to create a new archive systematically for now: - # See: https://github.com/actions/cache/issues/1594 - key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}-${{ github.run_id }}-${{ github.run_attempt }} - - publish-pypi: - name: Publish on PyPI - runs-on: ubuntu-24.04 - permissions: - id-token: write - environment: - name: advance - - if: github.event_name == 'release' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - - name: Build wheels - run: | - pip wheel --no-deps . -w wheelhouse - - - name: Publish the wheels on PyPI - uses: pypa/gh-action-pypi-publish@v1.12.4 - with: - packages-dir: wheelhouse - verify-metadata: true - attestations: true - print-hash: true - skip-existing: true +# name: Generic + +# on: +# pull_request: +# branches: +# - main +# release: +# branches: +# - main +# types: [published] + +# concurrency: +# group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} +# cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +# jobs: +# generic-cpu: +# name: ${{ matrix.OS }}-${{ matrix.PYTHON_VERSION }}-${{ matrix.GS_BACKEND }}-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} + +# strategy: +# fail-fast: false +# matrix: +# # See official Github documentation for details: https://shorturl.at/NJgsj +# OS: ["ubuntu-24.04", "macos-15"] +# PYTHON_VERSION: ["3.10", "3.11", "3.12", "3.13"] +# GS_BACKEND: ["cpu"] +# GS_ENABLE_NDARRAY: ["1"] +# include: +# # CPU backend - dynamic array (other OSes) +# - OS: "ubuntu-22.04" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "1" +# - OS: "ubuntu-24.04-arm" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "1" +# - OS: "windows-2025" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "1" +# # CPU backend - field array +# - OS: "ubuntu-24.04" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "0" +# - OS: "ubuntu-24.04-arm" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "0" +# - OS: "windows-2025" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "0" +# - OS: "macos-15" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "cpu" +# GS_ENABLE_NDARRAY: "0" +# # GPU backend - field array +# - OS: "macos-15" +# PYTHON_VERSION: "3.12" +# GS_BACKEND: "gpu" +# GS_ENABLE_NDARRAY: "0" + +# env: +# HF_HUB_DOWNLOAD_TIMEOUT: "60" +# FORCE_COLOR: "1" +# PY_COLORS: "1" +# GS_CACHE_FILE_PATH: ".cache/genesis" +# GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} +# GS_TORCH_FORCE_CPU_DEVICE: ${{ startsWith(matrix.OS, 'macos-') && '1' || '0' }} +# TI_OFFLINE_CACHE: "1" +# TI_OFFLINE_CACHE_CLEANING_POLICY: "never" +# TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" +# TI_ENABLE_CUDA: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} +# TI_ENABLE_METAL: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} +# TI_ENABLE_OPENGL: "0" +# TI_ENABLE_VULKAN: "0" +# TI_DEBUG: "0" + +# runs-on: ${{ matrix.OS }} +# if: github.event_name != 'release' + +# steps: +# - name: Print system information (Windows) +# if: startsWith(matrix.OS, 'windows-') +# shell: pwsh +# run: | +# $cpu = Get-CimInstance -ClassName Win32_Processor +# $ram = Get-CimInstance -ClassName Win32_ComputerSystem +# [PSCustomObject]@{ +# CPU_Name = $cpu.Name +# Physical_Cores = ($cpu | Measure-Object -Property NumberOfCores -Sum).Sum +# Logical_Processors = ($cpu | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum +# Total_RAM_GB = [math]::Round($ram.TotalPhysicalMemory / 1GB, 2) +# } + +# - name: Checkout code +# uses: actions/checkout@v4 +# with: +# fetch-depth: 1 + +# - name: Set up Python +# uses: actions/setup-python@v5 +# with: +# python-version: ${{ matrix.PYTHON_VERSION }} + +# - name: Install system dependencies (Windows) +# if: startsWith(matrix.OS, 'windows-') +# shell: bash +# run: | +# curl -L -o mesa.7z https://github.com/pal1000/mesa-dist-win/releases/download/25.1.5/mesa3d-25.1.5-release-msvc.7z +# 7z x mesa.7z -omesa +# mv -v mesa/x64/* /C/Windows/System32/ + +# - name: Install Mesa 25 OpenGL driver (Linux) +# if: startsWith(matrix.OS, 'ubuntu-') +# run: | +# sudo add-apt-repository -y ppa:kisak/kisak-mesa +# sudo apt-get update +# sudo apt install -y \ +# libglu1-mesa \ +# libegl-mesa0 \ +# libgl1-mesa-dev +# - name: Install python dependencies +# run: | +# pip install --upgrade pip setuptools pkg-info wheel +# pip3 install torch --index-url https://download.pytorch.org/whl/cpu + +# - name: Black Format Check +# if: ${{ matrix.OS == 'ubuntu-24.04' && matrix.PYTHON_VERSION == '3.12' && matrix.GS_BACKEND == 'cpu' && matrix.GS_ENABLE_NDARRAY == '1' }} +# run: | +# pip install black +# black --line-length 120 --check . + +# - name: Install Genesis +# shell: bash +# run: | +# PYTHON_DEPS="dev" +# if [[ "${{ matrix.OS }}" != 'ubuntu-24.04-arm' ]] ; then +# PYTHON_DEPS="${PYTHON_DEPS},usd" +# fi +# pip install -e ".[${PYTHON_DEPS}]" + +# - name: Get artifact prefix name +# id: artifact_prefix +# shell: bash +# run: | +# OS_FAMILY=$(python -c "import platform; print(platform.system())") +# MACHINE_ARCH=$(python -c "import platform; print(platform.machine())") +# GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") +# echo "ARTIFACT_PREFIX=${OS_FAMILY}-${MACHINE_ARCH}-${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT + +# - name: Restore Taichi Kernel Cache +# if: ${{ always() && steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} +# uses: actions/cache/restore@v4 +# with: +# path: .cache +# key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }} +# restore-keys: | +# ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}- + +# - name: Run unit tests +# run: | +# pytest -v --logical --dev --backend ${{ matrix.GS_BACKEND }} -m required --forked ./tests + +# - name: Save Updated Taichi Kernel Cache +# if: >- +# ${{ always() && +# (matrix.OS == 'ubuntu-24.04' || matrix.OS == 'ubuntu-24.04-arm' || matrix.OS == 'macos-15' || matrix.OS == 'windows-2025') && +# matrix.PYTHON_VERSION == '3.12' && +# matrix.GS_BACKEND == 'cpu' && +# matrix.GS_ENABLE_NDARRAY == '1' && +# steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} +# uses: actions/cache/save@v4 +# with: +# path: .cache +# # Note that it is necessary to create a new archive systematically for now: +# # See: https://github.com/actions/cache/issues/1594 +# key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}-${{ github.run_id }}-${{ github.run_attempt }} + +# publish-pypi: +# name: Publish on PyPI +# runs-on: ubuntu-24.04 +# permissions: +# id-token: write +# environment: +# name: advance + +# if: github.event_name == 'release' + +# steps: +# - name: Checkout code +# uses: actions/checkout@v4 +# with: +# fetch-depth: 1 + +# - name: Build wheels +# run: | +# pip wheel --no-deps . -w wheelhouse + +# - name: Publish the wheels on PyPI +# uses: pypa/gh-action-pypi-publish@v1.12.4 +# with: +# packages-dir: wheelhouse +# verify-metadata: true +# attestations: true +# print-hash: true +# skip-existing: true diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index b594a083b9..7a193b0d78 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -28,69 +28,69 @@ env: OMNI_KIT_ALLOW_ROOT: "1" jobs: - unit-tests: - name: production-unit_tests-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - - runs-on: [self-hosted, coreweave, genesis-world] - - strategy: - fail-fast: true - max-parallel: 1 - matrix: - GS_ENABLE_NDARRAY: ["0", "1"] - - env: - GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Run unit tests - if: github.event_name == 'pull_request' - run: | - SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" - echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV - - mkdir -p "${HOME}/.cache" "${HOME}/.venv" - - # TODO: USD baking does not currently support Python 3.11 since - # NVIDIA does not currently release `omniverse-kit==107.3` on PyPI. - # See: https://github.com/Genesis-Embodied-AI/Genesis/pull/1300 - srun \ - --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ - --container-mounts=\ - "${HOME}/.venv":/root/.venv,\ - "${HOME}/.cache":/root/.cache,\ - "${{ github.workspace }}":/root/workspace \ - --no-container-mount-home --container-workdir=/root/workspace \ - --export=NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY} \ - --partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \ - --job-name=${SLURM_JOB_NAME} \ - bash -e -s << 'EOF' - if test -n "$(find /root/.venv -maxdepth 0 -empty)"; then - python3 -m venv --system-site-packages /root/.venv - source /root/.venv/bin/activate - pip install --no-input --upgrade pip pkg-info wheel - pip install --no-input --ignore-installed --upgrade blinker pyparsing setuptools - fi - source /root/.venv/bin/activate - - pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit - pip install --no-input ".[dev,render,usd]" - - pytest -v -ra --backend gpu --dev --forked ./tests - EOF - - name: Kill srun job systematically - if: always() - run: | - if [ -n "${SLURM_JOB_NAME}" ] ; then - scancel --user=${USER} --name="${SLURM_JOB_NAME}" - fi + # unit-tests: + # name: production-unit_tests-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} + + # runs-on: [self-hosted, coreweave, genesis-world] + + # strategy: + # fail-fast: true + # max-parallel: 1 + # matrix: + # GS_ENABLE_NDARRAY: ["0", "1"] + + # env: + # GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} + + # steps: + # - name: Checkout code + # uses: actions/checkout@v4 + # - name: Run unit tests + # if: github.event_name == 'pull_request' + # run: | + # SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" + # echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV + + # mkdir -p "${HOME}/.cache" "${HOME}/.venv" + + # # TODO: USD baking does not currently support Python 3.11 since + # # NVIDIA does not currently release `omniverse-kit==107.3` on PyPI. + # # See: https://github.com/Genesis-Embodied-AI/Genesis/pull/1300 + # srun \ + # --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ + # --container-mounts=\ + # "${HOME}/.venv":/root/.venv,\ + # "${HOME}/.cache":/root/.cache,\ + # "${{ github.workspace }}":/root/workspace \ + # --no-container-mount-home --container-workdir=/root/workspace \ + # --export=NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY} \ + # --partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \ + # --job-name=${SLURM_JOB_NAME} \ + # bash -e -s << 'EOF' + # if test -n "$(find /root/.venv -maxdepth 0 -empty)"; then + # python3 -m venv --system-site-packages /root/.venv + # source /root/.venv/bin/activate + # pip install --no-input --upgrade pip pkg-info wheel + # pip install --no-input --ignore-installed --upgrade blinker pyparsing setuptools + # fi + # source /root/.venv/bin/activate + + # pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit + # pip install --no-input ".[dev,render,usd]" + + # pytest -v -ra --backend gpu --dev --forked ./tests + # EOF + # - name: Kill srun job systematically + # if: always() + # run: | + # if [ -n "${SLURM_JOB_NAME}" ] ; then + # scancel --user=${USER} --name="${SLURM_JOB_NAME}" + # fi benchmarks: name: production-benchmarks-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - needs: unit-tests + # needs: unit-tests runs-on: [self-hosted, coreweave, genesis-world] strategy: From ef646f0524100e47bd4204239c98a807aee00179 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 13:47:59 -0500 Subject: [PATCH 03/15] mkdir logs --- .github/workflows/production.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index 7a193b0d78..119b180592 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -148,6 +148,10 @@ jobs: - name: Run benchmarks run: | + if [[ -d logs ]]; then { + rm -rf logs + } fi + mkdir logs srun ${SRUN_COMMON} bash -e -s <<'EOF' source /venv/bin/activate pytest --mem-monitoring-filepath mem_results_${SLURM_JOB_NAME}.txt --print -x -m "benchmarks" ./tests From 37cc553b89425f35a04b00b377864685bdb56a5e Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 14:50:43 -0500 Subject: [PATCH 04/15] lgs -lh logs --- .github/workflows/production.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index 119b180592..b8a54661fc 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -157,6 +157,8 @@ jobs: pytest --mem-monitoring-filepath mem_results_${SLURM_JOB_NAME}.txt --print -x -m "benchmarks" ./tests cat speed_test*.txt > "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" EOF + ls -lh logs/ + cat logs/* - name: Kill srun job systematically if: always() From 5281ac435bcf3083de99e14fcc79df70d232fd28 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:02:39 -0500 Subject: [PATCH 05/15] add tryfirst=True --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7d529ebdd6..a722ea51bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -64,7 +64,7 @@ def pytest_make_parametrize_id(config, val, argname): return f"{val}" -@pytest.hookimpl +@pytest.hookimpl(tryfirst=True) def pytest_cmdline_main(config: pytest.Config) -> None: import random id = random.randint(0, 1000) From 18393d0d49d461e10af9405206f947308b9be43a Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:11:56 -0500 Subject: [PATCH 06/15] setproctitle --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index a722ea51bf..50d677d56e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,7 @@ from io import BytesIO from pathlib import Path +import setproctitle import psutil import pyglet import pytest From 935e0488fbdd2b53a3eef53218d228e98c1fa506 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:15:20 -0500 Subject: [PATCH 07/15] add setproctilteo pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6e3c821047..8ce0113fb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ dev = [ "pytest-print", # - 16.0 is causing pytest-xdist to crash in case of failure or skipped tests "pytest-rerunfailures!=16.0", + "setproctitle", # allows renaming the test processes on the cluster "syrupy", "huggingface_hub[hf_xet]", "wandb", From d93c9147c812162fe5a30b0f2bb5168f34000b67 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:34:59 -0500 Subject: [PATCH 08/15] move earlier in funciotn --- tests/conftest.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 50d677d56e..23dbedf13a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -78,6 +78,12 @@ def pytest_cmdline_main(config: pytest.Config) -> None: except NameError as e: raise pytest.UsageError(f"Unknown marker in CLI expression: '{e.name}'") + mem_monitoring = config.getoption("--mem-monitoring-filepath") + with open(mem_monitoring, "a") as f: + f.write("foo\n") + with open(f"logs/pytest_cmdline_main_{id}.txt", "a") as f: + f.write(f"mem monitoring filepath {mem_monitoring}\n") + # Make sure that benchmarks are running on GPU and the number of workers if valid expr = Expression.compile(config.option.markexpr) is_benchmarks = expr.evaluate(MarkMatcher.from_markers((pytest.mark.benchmarks,))) @@ -92,12 +98,6 @@ def pytest_cmdline_main(config: pytest.Config) -> None: if not sys.platform.startswith("linux"): config.option.forked = False - mem_monitoring = config.getoption("--mem-monitoring-filepath") - with open(mem_monitoring, "a") as f: - f.write("foo\n") - with open(f"logs/pytest_cmdline_main_{id}.txt", "a") as f: - f.write(f"mem monitoring filepath {mem_monitoring}\n") - # Force disabling distributed framework if interactive viewer is enabled show_viewer = config.getoption("--vis") if show_viewer: From e1a2aecd544ca6c36764779a632754c1bfb6ea34 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:48:13 -0500 Subject: [PATCH 09/15] try starting process --- .github/workflows/production.yml | 5 ++ tests/conftest.py | 20 ++++-- tests/monitor_test_mem.py | 110 +++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 tests/monitor_test_mem.py diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index b8a54661fc..e2826fd675 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -176,3 +176,8 @@ jobs: with: name: speed-test-${{ matrix.GS_ENABLE_NDARRAY }} path: "/mnt/data/artifacts/speed_test_${{ env.SLURM_JOB_NAME }}.txt" + - name: Upload benchmark mem stats as artifact + uses: actions/upload-artifact@v4 + with: + name: mem-test-${{ matrix.GS_ENABLE_NDARRAY }} + path: "/mnt/data/artifacts/mem_test_${{ env.SLURM_JOB_NAME }}.csv" \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 23dbedf13a..651579b5bf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -78,11 +78,21 @@ def pytest_cmdline_main(config: pytest.Config) -> None: except NameError as e: raise pytest.UsageError(f"Unknown marker in CLI expression: '{e.name}'") - mem_monitoring = config.getoption("--mem-monitoring-filepath") - with open(mem_monitoring, "a") as f: - f.write("foo\n") - with open(f"logs/pytest_cmdline_main_{id}.txt", "a") as f: - f.write(f"mem monitoring filepath {mem_monitoring}\n") + # Only launch memory monitor from the main process, not from xdist workers + mem_filepath = config.getoption("--mem-monitoring-filepath") + if mem_filepath and not os.environ.get("PYTEST_XDIST_WORKER"): + supported, reason = is_mem_monitoring_supported() + if not supported: + raise pytest.UsageError(f"--mem-monitoring-filepath is not supported on this platform: {reason}") + subprocess.Popen( + [ + sys.executable, + "tests/monitor_test_mem.py", + "--die-with-parent", + "--out-csv-filepath", + mem_filepath, + ] + ) # Make sure that benchmarks are running on GPU and the number of workers if valid expr = Expression.compile(config.option.markexpr) diff --git a/tests/monitor_test_mem.py b/tests/monitor_test_mem.py new file mode 100644 index 0000000000..f5ddda07ab --- /dev/null +++ b/tests/monitor_test_mem.py @@ -0,0 +1,110 @@ +from collections import defaultdict +import csv +import subprocess +import time +import os +import argparse +import psutil + + +def grep(contents: list[str], target): + return [l for l in contents if target in l] + + +def get_cuda_usage() -> dict[int, int]: + output = subprocess.check_output(["nvidia-smi"]).decode("utf-8") + section = 0 + subsec = 0 + res = {} + for line in output.split("\n"): + if line.startswith("|============"): + section += 1 + subsec = 0 + continue + if line.startswith("+-------"): + subsec += 1 + continue + if section == 2 and subsec == 0: + if "No running processes" in line: + continue + split_line = line.split() + pid = int(split_line[4]) + mem = int(split_line[-2].split("MiB")[0]) + res[pid] = mem + return res + + +def get_test_name_by_pid() -> dict[int, str]: + test_by_psid = {} + for proc in psutil.process_iter(["pid", "cmdline"]): + try: + cmdline = proc.info["cmdline"] + if cmdline is None: + continue + # Join cmdline to get full command string + cmd_str = " ".join(cmdline) + if "pytest: tests" in cmd_str: + # Find the test name after "::" + if "::" in cmd_str: + test_name = cmd_str.partition("::")[2] + if test_name.strip() != "": + test_by_psid[proc.info["pid"]] = test_name + except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess): + # Process may have terminated or we don't have permission + pass + return test_by_psid + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--out-csv-filepath", type=str, required=True) + parser.add_argument("--die-with-parent", action="store_true") + args = parser.parse_args() + + max_mem_by_test = defaultdict(int) + + f = open(args.out_csv_filepath, "w") + dict_writer = csv.DictWriter(f, fieldnames=["test", "max_mem_mb"]) + dict_writer.writeheader() + old_mem_by_test = {} + num_results_written = 0 + disp = False + while not args.die_with_parent or os.getppid() != 1: + mem_by_pid = get_cuda_usage() + test_by_psid = get_test_name_by_pid() + num_tests = len(test_by_psid) + _mem_by_test = {} + for psid, test in test_by_psid.items(): + if psid not in mem_by_pid: + continue + if test.strip() == "": + continue + _mem = mem_by_pid[psid] + _mem_by_test[test] = _mem + for test, _mem in _mem_by_test.items(): + max_mem_by_test[test] = max(_mem, max_mem_by_test[test]) + for _test, _mem in old_mem_by_test.items(): + if _test not in _mem_by_test: + dict_writer.writerow({"test": _test, "max_mem_mb": max_mem_by_test[_test]}) + f.flush() + num_results_written += 1 + spinny = "x" if disp else "+" + print( + num_tests, + "tests running, of which", + len(_mem_by_test), + "on gpu. Num results written: ", + num_results_written, + "[updating]", + " ", + end="\r", + flush=True, + ) + old_mem_by_test = _mem_by_test + disp = not disp + time.sleep(2.0) + print("Test monitor exiting") + + +if __name__ == "__main__": + main() From dbd26a7bc0c1ffc8e00376dd1ce8928167dfecef Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 16:52:22 -0500 Subject: [PATCH 10/15] add is_mem_monitoring_supported --- tests/conftest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 651579b5bf..d3b54ad633 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -57,6 +57,15 @@ IMG_NUM_ERR_THR = 0.001 +def is_mem_monitoring_supported(): + try: + assert sys.platform.startswith("linux") + subprocess.check_output(["nvidia-smi"], stderr=subprocess.STDOUT, timeout=2) + return True, None + except Exception as exc: # platform or nvidia-smi unavailable + return False, exc + + def pytest_make_parametrize_id(config, val, argname): if isinstance(val, Enum): return val.name From 5d546f34dcf4157440e225923c80e63c86c71056 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 17:39:11 -0500 Subject: [PATCH 11/15] set process name, and check if mem monitoring supported --- tests/conftest.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index d3b54ad633..fd59979256 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ import os import re import subprocess +from argparse import SUPPRESS import sys from enum import Enum from io import BytesIO @@ -358,7 +359,12 @@ def pytest_collection_modifyitems(config, items): items[:] = [item for bucket in sorted(buckets, key=len) for item in bucket] +@pytest.hookimpl(tryfirst=True) def pytest_runtest_setup(item): + # Include test name in process title + test_name = item.nodeid.replace(" ", "") + setproctitle.setproctitle(f"pytest: {test_name}") + # Match CUDA device with EGL device. # Note that this must be done here instead of 'pytest_cmdline_main', otherwise it will segfault when using # 'pytest-forked', because EGL instances are not allowed to cross thread boundaries. @@ -382,7 +388,13 @@ def pytest_addoption(parser): ) parser.addoption("--vis", action="store_true", default=False, help="Enable interactive viewer.") parser.addoption("--dev", action="store_true", default=False, help="Enable genesis debug mode.") - parser.addoption("--mem-monitoring-filepath", type=str, help="Run memory monitoring, and output results to this filepath.") + supported, _reason = is_mem_monitoring_supported() + help_text = ( + "Run memory monitoring, and store results to mem_monitoring_filepath. CUDA on linux ONLY." + if supported + else SUPPRESS + ) + parser.addoption("--mem-monitoring-filepath", type=str, help=help_text) @pytest.fixture(scope="session") From 444cd62b455ce00e712564f83f4c726615727365 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 17:54:05 -0500 Subject: [PATCH 12/15] path to /mnt/data/artifacts --- .github/workflows/production.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index e2826fd675..eb548e2722 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -154,7 +154,8 @@ jobs: mkdir logs srun ${SRUN_COMMON} bash -e -s <<'EOF' source /venv/bin/activate - pytest --mem-monitoring-filepath mem_results_${SLURM_JOB_NAME}.txt --print -x -m "benchmarks" ./tests + pytest --mem-monitoring-filepath "/mnt/data/artifacts/mem_test_${SLURM_JOB_NAME}.csv" \ + --print -x -m "benchmarks" ./tests cat speed_test*.txt > "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" EOF ls -lh logs/ From 37e8e7b5cf5797d2e7e6d0ceeb108084564029a9 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 18:07:04 -0500 Subject: [PATCH 13/15] remove test code --- .github/workflows/production.yml | 8 -------- tests/conftest.py | 4 ---- 2 files changed, 12 deletions(-) diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index eb548e2722..43284caddf 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -148,18 +148,12 @@ jobs: - name: Run benchmarks run: | - if [[ -d logs ]]; then { - rm -rf logs - } fi - mkdir logs srun ${SRUN_COMMON} bash -e -s <<'EOF' source /venv/bin/activate pytest --mem-monitoring-filepath "/mnt/data/artifacts/mem_test_${SLURM_JOB_NAME}.csv" \ --print -x -m "benchmarks" ./tests cat speed_test*.txt > "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" EOF - ls -lh logs/ - cat logs/* - name: Kill srun job systematically if: always() @@ -169,8 +163,6 @@ jobs: fi - name: Display benchmark stats run: | - ls -lh mem_results_*.txt - cat mem_results_*.txt cat "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" - name: Upload benchmark stats as artifact uses: actions/upload-artifact@v4 diff --git a/tests/conftest.py b/tests/conftest.py index fd59979256..0c5c937765 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,10 +77,6 @@ def pytest_make_parametrize_id(config, val, argname): @pytest.hookimpl(tryfirst=True) def pytest_cmdline_main(config: pytest.Config) -> None: - import random - id = random.randint(0, 1000) - with open(f"logs/pytest_cmdline_main_{id}.txt", "w") as f: - f.write("pytest_cmdline_main\n") # Make sure that no unsupported markers have been specified in CLI declared_markers = set(name for spec in config.getini("markers") if (name := spec.split(":")[0]) != "forked") try: From 401f4c02766daa05f31a5f54bdb48a6e31d1d9c4 Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 5 Jan 2026 18:13:28 -0500 Subject: [PATCH 14/15] remove more test code --- tests/conftest.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0c5c937765..77e8303cb6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -374,10 +374,6 @@ def pytest_runtest_setup(item): def pytest_addoption(parser): - import random - id = random.randint(0, 1000) - with open(f"logs/pytest_addoption_{id}.txt", "w") as f: - f.write("pytest_addoption\n") parser.addoption("--backend", action="store", default=None, help="Default simulation backend.") parser.addoption( "--logical", action="store_true", default=False, help="Consider logical cores in default number of workers." From 7c2c88243350953abee96bd14089f16e8b0b3ff3 Mon Sep 17 00:00:00 2001 From: Alexis Duburcq Date: Tue, 6 Jan 2026 09:37:08 +0100 Subject: [PATCH 15/15] Uncomment workflows. --- .github/workflows/alarm.yml | 970 +++++++++++++++---------------- .github/workflows/examples.yml | 138 ++--- .github/workflows/generic.yml | 418 ++++++------- .github/workflows/production.yml | 118 ++-- 4 files changed, 822 insertions(+), 822 deletions(-) diff --git a/.github/workflows/alarm.yml b/.github/workflows/alarm.yml index 7f159323d3..571a82f4b0 100644 --- a/.github/workflows/alarm.yml +++ b/.github/workflows/alarm.yml @@ -1,485 +1,485 @@ -# name: Benchmark Comparison & Alarm Regression - -# on: -# workflow_run: -# workflows: ["Production"] -# types: [completed] - -# permissions: -# contents: read -# actions: read -# pull-requests: write -# checks: write - -# jobs: -# comment-if-regressed: -# runs-on: ubuntu-latest -# if: > -# github.event.workflow_run.event == 'pull_request' && -# contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion) - -# steps: -# - name: Setup Python -# uses: actions/setup-python@v5 -# with: -# python-version: '3.10' - -# - name: Install deps -# run: | -# python -m pip install --quiet --upgrade wandb frozendict - -# - name: Download artifacts from triggering run -# id: dl -# uses: actions/download-artifact@v4 -# with: -# pattern: speed-test-* -# run-id: ${{ github.event.workflow_run.id }} -# github-token: ${{ secrets.GITHUB_TOKEN }} -# path: ./artifacts - -# - name: Show downloaded files -# run: | -# echo "Downloaded into ${{ steps.dl.outputs.download-path }}" -# ls -la ${{ steps.dl.outputs.download-path }} || true -# (command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true - -# - name: Check regressions + build outputs -# id: analyze -# env: -# # Note that secrets are not passed to workflows that are triggered by a pull request from a fork -# # --- W&B --- -# WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} -# WANDB_ENTITY: genesis-ai-company -# WANDB_PROJECT: genesis-benchmarks -# WANDB_SILENT: "true" - -# # --- Parameters --- -# MAX_VALID_REVISIONS: 5 -# MAX_FETCH_REVISIONS: 40 -# RUNTIME_REGRESSION_TOLERANCE_PCT: 8 -# COMPILE_REGRESSION_TOLERANCE_PCT: 16 - -# # Input/Output paths -# ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }} -# CHECK_BODY_PATH: check_output.md -# CSV_RUNTIME_PATH: runtime_fps.csv -# CSV_COMPILE_PATH: compile_time.csv -# EXIT_CODE_REGRESSION: 42 -# EXIT_CODE_ALERT: 43 -# run: | -# { python - << 'PY'; EXIT_CODE=$?; } || true - -# import os, sys, json, re, math, statistics -# import wandb -# from frozendict import frozendict -# from pathlib import Path -# import csv - -# # ----- arguments ----- - -# MAX_VALID_REVISIONS = int(os.environ["MAX_VALID_REVISIONS"]) -# MAX_FETCH_REVISIONS = int(os.environ["MAX_FETCH_REVISIONS"]) - -# METRICS_TOL = { -# "runtime_fps": float(os.environ["RUNTIME_REGRESSION_TOLERANCE_PCT"]), -# "compile_time": float(os.environ["COMPILE_REGRESSION_TOLERANCE_PCT"]), -# } - -# artifacts_dir = Path(os.environ["ARTIFACTS_DIR"]).expanduser().resolve() -# check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser() - -# csv_files = { -# "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(), -# "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(), -# } - -# # ---------- helpers ---------- - -# METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor") - -# def parse_benchmark_id(bid: str) -> dict: -# kv = {} -# if bid: -# for token in bid.split("-"): -# token = token.strip() -# if token and "=" in token: -# k, v = token.split("=", 1) -# kv[k.strip()] = v.strip() -# return kv - -# def normalize_benchmark_id(bid: str) -> frozendict[str, str]: -# return frozendict(parse_benchmark_id(bid)) - -# def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]: -# """ -# Merge a list of tuples into a single tuple of keys that: -# - Preserves the relative order of keys within each tuple -# - Gives precedence to later tuples when conflicts arise -# """ -# merged = list(bids[-1]) -# merged_set = set(merged) -# for tup in bids[:-1]: -# for key in tup: -# if key not in merged_set: -# merged.append(key) -# merged_set.add(key) -# return tuple(merged) - -# def sort_key(d): -# key_list = [] -# for col in params_name: -# if col in d: -# val = d[col] -# key_list.append((0, val)) -# else: -# key_list.append((1, None)) -# return key_list - -# def artifacts_parse_csv_summary(current_txt_path): -# out = {} -# for line in current_txt_path.read_text().splitlines(): -# kv = dict(map(str.strip, p.split("=", 1)) for p in line.split("|") if "=" in p) -# record = {} -# for k in METRIC_KEYS: -# try: -# record[k] = float(kv.pop(k)) -# except (ValueError, TypeError, KeyError): -# pass -# nbid = frozendict(kv) -# out[nbid] = record -# return out - -# def fmt_num(v, is_int: bool): -# return f"{int(v):,}" if is_int else f"{v:.2f}" - -# # ----- load artifacts (current results) ----- - -# current_csv_paths = list(artifacts_dir.rglob("speed_test*.txt")) -# if not current_csv_paths: -# check_body_path.touch() -# sys.exit(0) - -# current_bm = {} -# for csv_path in current_csv_paths: -# current_bm |= artifacts_parse_csv_summary(csv_path) -# bids_set = frozenset(current_bm.keys()) -# assert bids_set - -# # ----- W&B baselines ----- - -# if not "WANDB_API_KEY" in os.environ: -# print("WANDB_API_KEY is not set") -# sys.exit(0) -# ENTITY = os.environ["WANDB_ENTITY"] -# PROJECT = os.environ["WANDB_PROJECT"] - -# api = wandb.Api() -# runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at") - -# revs = set() -# records_by_rev = {} -# for i, run in enumerate(runs_iter): -# # Abort if still not complete after checking enough runs. -# # This would happen if a new benchmark has been added, and not enough past data is available yet. -# if len(revs) == MAX_FETCH_REVISIONS: -# break - -# # Early return if enough complete records have been collected -# records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()] -# if sum(records_is_complete) == MAX_VALID_REVISIONS: -# break - -# # Load config and summary, with support of legacy runs -# config, summary = run.config, run.summary -# if isinstance(config, str): -# config = {k: v["value"] for k, v in json.loads(run.config).items() if not k.startswith("_")} -# if isinstance(summary._json_dict, str): -# summary = json.loads(summary._json_dict) - -# # Extract revision commit and branch -# try: -# rev, branch = config["revision"].split("@", 1) -# revs.add(rev) -# except ValueError: -# # Ignore this run if the revision has been corrupted for some unknown reason -# continue -# # Ignore runs associated with a commit that is not part of the official repository -# if not branch.startswith('Genesis-Embodied-AI/'): -# continue - -# # Skip runs did not finish for some reason -# if run.state != "finished": -# continue - -# # Do not store new records if the desired number of revision is already reached -# if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev: -# continue - -# # Extract benchmark ID and normalize it to make sure it does not depends on key ordering. -# # Note that the rigid body benchmark suite is the only one being supported for now. -# sid, bid = config["benchmark_id"].split("-", 1) -# if sid != "rigid_body": -# continue - -# # Make sure that stats are valid -# try: -# is_valid = True -# for k in METRIC_KEYS: -# v = summary[k] -# if not isinstance(v, (float, int)) or math.isnan(v): -# is_valid = False -# break -# if not is_valid: -# continue -# except KeyError: -# continue - -# # Store all the records into a dict -# nbid = normalize_benchmark_id(bid) -# records_by_rev.setdefault(rev, {})[nbid] = { -# metric: summary[metric] for metric in METRIC_KEYS -# } - -# # ----- build TWO tables ----- - -# # Parse benchmark IDs into key-value dicts while preserving order -# params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys())) - -# reg_found, alert_found = False, False -# tables = {} -# rows_for_csv = {"runtime_fps": [], "compile_time": []} -# info = {} -# for metric, alias, sign in (("runtime_fps", "FPS", 1), ("compile_time", "compile", -1)): -# rows_md = [] - -# header_cells = ( -# "status", -# *params_name, -# f"current {alias}", -# f"baseline {alias} [last (mean ± std)] (*1)", -# f"Δ {alias} (*2)" -# ) -# header = "| " + " | ".join(header_cells) + " |" -# align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|" - -# for bid in sorted(current_bm.keys(), key=sort_key): -# value_cur = current_bm[bid][metric] -# is_int = isinstance(value_cur, int) or value_cur.is_integer() -# value_repr = fmt_num(value_cur, is_int) - -# params_repr = [bid.get(k, "-") for k in params_name] -# info = { -# **dict(zip(params_name, params_repr)), -# "current": value_cur, -# "baseline_last": None, -# "baseline_min": None, -# "baseline_max": None, -# } - -# values_prev = [ -# record[bid][metric] -# for record in records_by_rev.values() -# if bid in record -# ] -# if values_prev: -# value_last = values_prev[0] -# value_ref = statistics.fmean(values_prev) -# delta = (value_cur - value_last) / value_last * 100.0 - -# info["baseline_last"] = int(value_last) if is_int else float(value_last) - -# stats_repr = f"{fmt_num(value_last, is_int)}" -# delta_repr = f"{delta:+.1f}%" -# if len(values_prev) == MAX_VALID_REVISIONS: -# info["baseline_mean"] = int(value_ref) if is_int else float(value_ref) -# info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev)) -# info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev)) - -# value_std = statistics.stdev(values_prev) -# stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})" -# if sign * delta < - METRICS_TOL[metric]: -# info["status"] = "regression" - -# delta_repr = f"**{delta_repr}**" -# picto = "🔴" -# reg_found = True -# elif sign * delta > METRICS_TOL[metric]: -# info["status"] = "alert" - -# delta_repr = f"**{delta_repr}**" -# picto = "⚠️" -# alert_found = True -# else: -# info["status"] = "ok" - -# picto = "✅" -# else: -# info["status"] = "n/a" - -# picto = "ℹ️" -# else: -# picto, stats_repr, delta_repr = "ℹ️", "---", "---" - -# rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |") -# rows_for_csv[metric].append(info) - -# tables[metric] = [header, align] + rows_md - -# # ----- baseline commit list (MD) ----- -# blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)] -# baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist - -# # ----- CHECK body (always) ----- - -# thr_repr = ", ".join( -# f"{alias} ± {METRICS_TOL[metric]:.0f}%" -# for metric, alias in (("runtime_fps", "runtime"), ("compile_time", "compile")) -# ) - -# check_body = "\n".join( -# [ -# *baseline_block, -# "", -# f"Thresholds: {thr_repr}", -# "", -# "### Runtime FPS", -# *tables["runtime_fps"], -# "", -# "### Compile Time", -# *tables["compile_time"], -# "", -# f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.", -# f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.", -# ] -# ) - -# # ----- COMMENT body (only if regressions) ----- - -# if reg_found: -# comment_body = "\n".join([":warning: **Benchmark Regression Detected**", *check_body]) -# else: -# comment_body = "" - -# # CSV file -# for metric in ("runtime_fps", "compile_time"): -# with csv_files[metric].open("w", newline="", encoding="utf-8") as f: -# w = csv.DictWriter(f, fieldnames=info.keys()) -# w.writeheader() -# for rec in rows_for_csv[metric]: -# w.writerow(rec) - -# # write md results -# check_body_path.write_text(check_body + "\n", encoding="utf-8") - -# # Exit with error code -# if reg_found: -# exit_code = int(os.environ["EXIT_CODE_REGRESSION"]) -# elif alert_found: -# exit_code = int(os.environ["EXIT_CODE_ALERT"]) -# else: -# exit_code = 0 -# sys.exit(exit_code) -# PY - -# # Enable command trace to ease debugging -# set -o xtrace - -# # Expose outputs to later steps -# if [ -f "$CHECK_BODY_PATH" ]; then -# { -# echo 'CHECK_OUTPUT<<__EOF__' -# cat "$CHECK_BODY_PATH" -# echo '__EOF__' -# } >> "$GITHUB_ENV" -# else -# echo "CHECK_OUTPUT=" >> "$GITHUB_ENV" -# fi - -# # Export status -# echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" -# echo "HAS_ALERTS=$([ "$EXIT_CODE" = "$EXIT_CODE_ALERT" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" - -# - name: Upload benchmark comparisons in CSV -# id: upload -# uses: actions/upload-artifact@v4 -# with: -# name: benchmark-comparison-tables -# path: | -# runtime_fps.csv -# compile_time.csv -# if-no-files-found: warn - -# - name: Publish PR check -# id: publish_check -# uses: actions/github-script@v8 -# env: -# CHECK_NAME: Benchmark Comparison -# CHECK_OUTPUT: ${{ env.CHECK_OUTPUT }} -# HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} -# HAS_ALERTS: ${{ env.HAS_ALERTS }} -# ARTIFACT_URL: ${{ steps.upload.outputs.artifact-url }} -# with: -# script: | -# const artifactUrl = process.env.ARTIFACT_URL || ''; -# let body = process.env.CHECK_OUTPUT || ''; -# if (body && artifactUrl) { -# body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`; -# } - -# let summary; -# let conclusion = 'success'; -# if ((process.env.HAS_REGRESSIONS || '0') === '1') { -# summary = '🔴 Regressions detected. See tables below.'; -# conclusion = 'failure'; -# } else if ((process.env.HAS_ALERTS || '0') === '1') { -# summary = '⚠️ Large deviation detected. See tables below.'; -# } else { -# summary = '✅ No regressions detected. See tables below.'; -# } - -# const check = await github.rest.checks.create({ -# owner: context.repo.owner, -# repo: context.repo.repo, -# head_sha: context.payload.workflow_run.head_sha, -# name: process.env.CHECK_NAME, -# status: 'completed', -# conclusion: conclusion, -# output: { -# title: process.env.CHECK_NAME, -# summary, -# text: body || undefined -# } -# }); -# core.setOutput("check-url", check.data.html_url); - -# - name: Add PR comment -# if: ${{ env.HAS_REGRESSIONS == '1' || env.HAS_ALERTS == '1' }} -# uses: actions/github-script@v8 -# env: -# HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} -# REPORT_URL: ${{ steps.publish_check.outputs.check-url }} -# with: -# script: | -# // Getting PR number when using 'workflow_run' is tricky. For reference, see: -# // * https://docs.github.com/en/webhooks/webhook-events-and-payloads#workflow_run -# // * https://stackoverflow.com/a/75420270/4820605 -# const { data } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ -# owner: context.payload.workflow_run.head_repository.owner.login, -# repo: context.payload.workflow_run.head_repository.name, -# commit_sha: context.payload.workflow_run.head_sha, -# }); -# if (!data || !data.length) { -# core.info('No associated PR; skipping comment.'); -# return; -# } - -# const title = (process.env.HAS_REGRESSIONS || '0') === '1' -# ? '🔴 Benchmark Regression Detected' : '⚠️ Abnormal Benchmark Result Detected'; -# const comment = `**${title} ➡️ [Report](${process.env.REPORT_URL})**`; - -# await github.rest.issues.createComment({ -# owner: context.repo.owner, -# repo: context.repo.repo, -# issue_number: data[0].number, -# body: comment -# }); +name: Benchmark Comparison & Alarm Regression + +on: + workflow_run: + workflows: ["Production"] + types: [completed] + +permissions: + contents: read + actions: read + pull-requests: write + checks: write + +jobs: + comment-if-regressed: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion) + + steps: + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Install deps + run: | + python -m pip install --quiet --upgrade wandb frozendict + + - name: Download artifacts from triggering run + id: dl + uses: actions/download-artifact@v4 + with: + pattern: speed-test-* + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + path: ./artifacts + + - name: Show downloaded files + run: | + echo "Downloaded into ${{ steps.dl.outputs.download-path }}" + ls -la ${{ steps.dl.outputs.download-path }} || true + (command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true + + - name: Check regressions + build outputs + id: analyze + env: + # Note that secrets are not passed to workflows that are triggered by a pull request from a fork + # --- W&B --- + WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} + WANDB_ENTITY: genesis-ai-company + WANDB_PROJECT: genesis-benchmarks + WANDB_SILENT: "true" + + # --- Parameters --- + MAX_VALID_REVISIONS: 5 + MAX_FETCH_REVISIONS: 40 + RUNTIME_REGRESSION_TOLERANCE_PCT: 8 + COMPILE_REGRESSION_TOLERANCE_PCT: 16 + + # Input/Output paths + ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }} + CHECK_BODY_PATH: check_output.md + CSV_RUNTIME_PATH: runtime_fps.csv + CSV_COMPILE_PATH: compile_time.csv + EXIT_CODE_REGRESSION: 42 + EXIT_CODE_ALERT: 43 + run: | + { python - << 'PY'; EXIT_CODE=$?; } || true + + import os, sys, json, re, math, statistics + import wandb + from frozendict import frozendict + from pathlib import Path + import csv + + # ----- arguments ----- + + MAX_VALID_REVISIONS = int(os.environ["MAX_VALID_REVISIONS"]) + MAX_FETCH_REVISIONS = int(os.environ["MAX_FETCH_REVISIONS"]) + + METRICS_TOL = { + "runtime_fps": float(os.environ["RUNTIME_REGRESSION_TOLERANCE_PCT"]), + "compile_time": float(os.environ["COMPILE_REGRESSION_TOLERANCE_PCT"]), + } + + artifacts_dir = Path(os.environ["ARTIFACTS_DIR"]).expanduser().resolve() + check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser() + + csv_files = { + "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(), + "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(), + } + + # ---------- helpers ---------- + + METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor") + + def parse_benchmark_id(bid: str) -> dict: + kv = {} + if bid: + for token in bid.split("-"): + token = token.strip() + if token and "=" in token: + k, v = token.split("=", 1) + kv[k.strip()] = v.strip() + return kv + + def normalize_benchmark_id(bid: str) -> frozendict[str, str]: + return frozendict(parse_benchmark_id(bid)) + + def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]: + """ + Merge a list of tuples into a single tuple of keys that: + - Preserves the relative order of keys within each tuple + - Gives precedence to later tuples when conflicts arise + """ + merged = list(bids[-1]) + merged_set = set(merged) + for tup in bids[:-1]: + for key in tup: + if key not in merged_set: + merged.append(key) + merged_set.add(key) + return tuple(merged) + + def sort_key(d): + key_list = [] + for col in params_name: + if col in d: + val = d[col] + key_list.append((0, val)) + else: + key_list.append((1, None)) + return key_list + + def artifacts_parse_csv_summary(current_txt_path): + out = {} + for line in current_txt_path.read_text().splitlines(): + kv = dict(map(str.strip, p.split("=", 1)) for p in line.split("|") if "=" in p) + record = {} + for k in METRIC_KEYS: + try: + record[k] = float(kv.pop(k)) + except (ValueError, TypeError, KeyError): + pass + nbid = frozendict(kv) + out[nbid] = record + return out + + def fmt_num(v, is_int: bool): + return f"{int(v):,}" if is_int else f"{v:.2f}" + + # ----- load artifacts (current results) ----- + + current_csv_paths = list(artifacts_dir.rglob("speed_test*.txt")) + if not current_csv_paths: + check_body_path.touch() + sys.exit(0) + + current_bm = {} + for csv_path in current_csv_paths: + current_bm |= artifacts_parse_csv_summary(csv_path) + bids_set = frozenset(current_bm.keys()) + assert bids_set + + # ----- W&B baselines ----- + + if not "WANDB_API_KEY" in os.environ: + print("WANDB_API_KEY is not set") + sys.exit(0) + ENTITY = os.environ["WANDB_ENTITY"] + PROJECT = os.environ["WANDB_PROJECT"] + + api = wandb.Api() + runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at") + + revs = set() + records_by_rev = {} + for i, run in enumerate(runs_iter): + # Abort if still not complete after checking enough runs. + # This would happen if a new benchmark has been added, and not enough past data is available yet. + if len(revs) == MAX_FETCH_REVISIONS: + break + + # Early return if enough complete records have been collected + records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()] + if sum(records_is_complete) == MAX_VALID_REVISIONS: + break + + # Load config and summary, with support of legacy runs + config, summary = run.config, run.summary + if isinstance(config, str): + config = {k: v["value"] for k, v in json.loads(run.config).items() if not k.startswith("_")} + if isinstance(summary._json_dict, str): + summary = json.loads(summary._json_dict) + + # Extract revision commit and branch + try: + rev, branch = config["revision"].split("@", 1) + revs.add(rev) + except ValueError: + # Ignore this run if the revision has been corrupted for some unknown reason + continue + # Ignore runs associated with a commit that is not part of the official repository + if not branch.startswith('Genesis-Embodied-AI/'): + continue + + # Skip runs did not finish for some reason + if run.state != "finished": + continue + + # Do not store new records if the desired number of revision is already reached + if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev: + continue + + # Extract benchmark ID and normalize it to make sure it does not depends on key ordering. + # Note that the rigid body benchmark suite is the only one being supported for now. + sid, bid = config["benchmark_id"].split("-", 1) + if sid != "rigid_body": + continue + + # Make sure that stats are valid + try: + is_valid = True + for k in METRIC_KEYS: + v = summary[k] + if not isinstance(v, (float, int)) or math.isnan(v): + is_valid = False + break + if not is_valid: + continue + except KeyError: + continue + + # Store all the records into a dict + nbid = normalize_benchmark_id(bid) + records_by_rev.setdefault(rev, {})[nbid] = { + metric: summary[metric] for metric in METRIC_KEYS + } + + # ----- build TWO tables ----- + + # Parse benchmark IDs into key-value dicts while preserving order + params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys())) + + reg_found, alert_found = False, False + tables = {} + rows_for_csv = {"runtime_fps": [], "compile_time": []} + info = {} + for metric, alias, sign in (("runtime_fps", "FPS", 1), ("compile_time", "compile", -1)): + rows_md = [] + + header_cells = ( + "status", + *params_name, + f"current {alias}", + f"baseline {alias} [last (mean ± std)] (*1)", + f"Δ {alias} (*2)" + ) + header = "| " + " | ".join(header_cells) + " |" + align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|" + + for bid in sorted(current_bm.keys(), key=sort_key): + value_cur = current_bm[bid][metric] + is_int = isinstance(value_cur, int) or value_cur.is_integer() + value_repr = fmt_num(value_cur, is_int) + + params_repr = [bid.get(k, "-") for k in params_name] + info = { + **dict(zip(params_name, params_repr)), + "current": value_cur, + "baseline_last": None, + "baseline_min": None, + "baseline_max": None, + } + + values_prev = [ + record[bid][metric] + for record in records_by_rev.values() + if bid in record + ] + if values_prev: + value_last = values_prev[0] + value_ref = statistics.fmean(values_prev) + delta = (value_cur - value_last) / value_last * 100.0 + + info["baseline_last"] = int(value_last) if is_int else float(value_last) + + stats_repr = f"{fmt_num(value_last, is_int)}" + delta_repr = f"{delta:+.1f}%" + if len(values_prev) == MAX_VALID_REVISIONS: + info["baseline_mean"] = int(value_ref) if is_int else float(value_ref) + info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev)) + info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev)) + + value_std = statistics.stdev(values_prev) + stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})" + if sign * delta < - METRICS_TOL[metric]: + info["status"] = "regression" + + delta_repr = f"**{delta_repr}**" + picto = "🔴" + reg_found = True + elif sign * delta > METRICS_TOL[metric]: + info["status"] = "alert" + + delta_repr = f"**{delta_repr}**" + picto = "⚠️" + alert_found = True + else: + info["status"] = "ok" + + picto = "✅" + else: + info["status"] = "n/a" + + picto = "ℹ️" + else: + picto, stats_repr, delta_repr = "ℹ️", "---", "---" + + rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |") + rows_for_csv[metric].append(info) + + tables[metric] = [header, align] + rows_md + + # ----- baseline commit list (MD) ----- + blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)] + baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist + + # ----- CHECK body (always) ----- + + thr_repr = ", ".join( + f"{alias} ± {METRICS_TOL[metric]:.0f}%" + for metric, alias in (("runtime_fps", "runtime"), ("compile_time", "compile")) + ) + + check_body = "\n".join( + [ + *baseline_block, + "", + f"Thresholds: {thr_repr}", + "", + "### Runtime FPS", + *tables["runtime_fps"], + "", + "### Compile Time", + *tables["compile_time"], + "", + f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.", + f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.", + ] + ) + + # ----- COMMENT body (only if regressions) ----- + + if reg_found: + comment_body = "\n".join([":warning: **Benchmark Regression Detected**", *check_body]) + else: + comment_body = "" + + # CSV file + for metric in ("runtime_fps", "compile_time"): + with csv_files[metric].open("w", newline="", encoding="utf-8") as f: + w = csv.DictWriter(f, fieldnames=info.keys()) + w.writeheader() + for rec in rows_for_csv[metric]: + w.writerow(rec) + + # write md results + check_body_path.write_text(check_body + "\n", encoding="utf-8") + + # Exit with error code + if reg_found: + exit_code = int(os.environ["EXIT_CODE_REGRESSION"]) + elif alert_found: + exit_code = int(os.environ["EXIT_CODE_ALERT"]) + else: + exit_code = 0 + sys.exit(exit_code) + PY + + # Enable command trace to ease debugging + set -o xtrace + + # Expose outputs to later steps + if [ -f "$CHECK_BODY_PATH" ]; then + { + echo 'CHECK_OUTPUT<<__EOF__' + cat "$CHECK_BODY_PATH" + echo '__EOF__' + } >> "$GITHUB_ENV" + else + echo "CHECK_OUTPUT=" >> "$GITHUB_ENV" + fi + + # Export status + echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" + echo "HAS_ALERTS=$([ "$EXIT_CODE" = "$EXIT_CODE_ALERT" ] && echo 1 || echo 0)" >> "$GITHUB_ENV" + + - name: Upload benchmark comparisons in CSV + id: upload + uses: actions/upload-artifact@v4 + with: + name: benchmark-comparison-tables + path: | + runtime_fps.csv + compile_time.csv + if-no-files-found: warn + + - name: Publish PR check + id: publish_check + uses: actions/github-script@v8 + env: + CHECK_NAME: Benchmark Comparison + CHECK_OUTPUT: ${{ env.CHECK_OUTPUT }} + HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} + HAS_ALERTS: ${{ env.HAS_ALERTS }} + ARTIFACT_URL: ${{ steps.upload.outputs.artifact-url }} + with: + script: | + const artifactUrl = process.env.ARTIFACT_URL || ''; + let body = process.env.CHECK_OUTPUT || ''; + if (body && artifactUrl) { + body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`; + } + + let summary; + let conclusion = 'success'; + if ((process.env.HAS_REGRESSIONS || '0') === '1') { + summary = '🔴 Regressions detected. See tables below.'; + conclusion = 'failure'; + } else if ((process.env.HAS_ALERTS || '0') === '1') { + summary = '⚠️ Large deviation detected. See tables below.'; + } else { + summary = '✅ No regressions detected. See tables below.'; + } + + const check = await github.rest.checks.create({ + owner: context.repo.owner, + repo: context.repo.repo, + head_sha: context.payload.workflow_run.head_sha, + name: process.env.CHECK_NAME, + status: 'completed', + conclusion: conclusion, + output: { + title: process.env.CHECK_NAME, + summary, + text: body || undefined + } + }); + core.setOutput("check-url", check.data.html_url); + + - name: Add PR comment + if: ${{ env.HAS_REGRESSIONS == '1' || env.HAS_ALERTS == '1' }} + uses: actions/github-script@v8 + env: + HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }} + REPORT_URL: ${{ steps.publish_check.outputs.check-url }} + with: + script: | + // Getting PR number when using 'workflow_run' is tricky. For reference, see: + // * https://docs.github.com/en/webhooks/webhook-events-and-payloads#workflow_run + // * https://stackoverflow.com/a/75420270/4820605 + const { data } = await github.rest.repos.listPullRequestsAssociatedWithCommit({ + owner: context.payload.workflow_run.head_repository.owner.login, + repo: context.payload.workflow_run.head_repository.name, + commit_sha: context.payload.workflow_run.head_sha, + }); + if (!data || !data.length) { + core.info('No associated PR; skipping comment.'); + return; + } + + const title = (process.env.HAS_REGRESSIONS || '0') === '1' + ? '🔴 Benchmark Regression Detected' : '⚠️ Abnormal Benchmark Result Detected'; + const comment = `**${title} ➡️ [Report](${process.env.REPORT_URL})**`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: data[0].number, + body: comment + }); diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index 23774505cc..14df548019 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -1,81 +1,81 @@ -# name: Examples (CPU) +name: Examples (CPU) -# on: -# pull_request: -# branches: -# - main +on: + pull_request: + branches: + - main -# concurrency: -# group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} -# cancel-in-progress: true +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true -# jobs: -# run-examples: -# runs-on: ubuntu-24.04 -# name: ubuntu-24.04-3.12-examples +jobs: + run-examples: + runs-on: ubuntu-24.04 + name: ubuntu-24.04-3.12-examples -# env: -# HF_HUB_DOWNLOAD_TIMEOUT: 60 -# FORCE_COLOR: 1 -# PY_COLORS: 1 -# GS_CACHE_FILE_PATH: ".cache/genesis" -# TI_OFFLINE_CACHE: "1" -# TI_OFFLINE_CACHE_CLEANING_POLICY: "never" -# TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" -# TI_ENABLE_CUDA: "0" -# TI_ENABLE_METAL: "0" -# TI_ENABLE_OPENGL: "0" -# TI_ENABLE_VULKAN: "0" -# TI_DEBUG: "0" + env: + HF_HUB_DOWNLOAD_TIMEOUT: 60 + FORCE_COLOR: 1 + PY_COLORS: 1 + GS_CACHE_FILE_PATH: ".cache/genesis" + TI_OFFLINE_CACHE: "1" + TI_OFFLINE_CACHE_CLEANING_POLICY: "never" + TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" + TI_ENABLE_CUDA: "0" + TI_ENABLE_METAL: "0" + TI_ENABLE_OPENGL: "0" + TI_ENABLE_VULKAN: "0" + TI_DEBUG: "0" -# steps: -# - name: Checkout code -# uses: actions/checkout@v4 -# with: -# fetch-depth: 1 + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 -# - name: Set up Python -# uses: actions/setup-python@v5 -# with: -# python-version: "3.12" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" -# - name: Install Mesa OpenGL driver for headless rendering -# run: | -# sudo apt-get update -# sudo apt install -y \ -# libglu1-mesa \ -# libegl-mesa0 \ -# libgl1-mesa-dev + - name: Install Mesa OpenGL driver for headless rendering + run: | + sudo apt-get update + sudo apt install -y \ + libglu1-mesa \ + libegl-mesa0 \ + libgl1-mesa-dev -# - name: Install Python deps -# run: | -# pip install --upgrade pip setuptools wheel -# pip install torch --index-url https://download.pytorch.org/whl/cpu -# pip install -e '.[dev]' pynput + - name: Install Python deps + run: | + pip install --upgrade pip setuptools wheel + pip install torch --index-url https://download.pytorch.org/whl/cpu + pip install -e '.[dev]' pynput -# - name: Get gstaichi version -# id: gstaichi_version -# shell: bash -# run: | -# GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") -# echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" -# echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT + - name: Get gstaichi version + id: gstaichi_version + shell: bash + run: | + GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") + echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" + echo "GSTAICHI_VERSION=${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT -# - name: Restore cache -# uses: actions/cache/restore@v4 -# with: -# path: .cache -# key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }} -# restore-keys: | -# ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}- + - name: Restore cache + uses: actions/cache/restore@v4 + with: + path: .cache + key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }} + restore-keys: | + ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}- -# - name: Run examples suite -# run: | -# pytest -v -m examples tests/test_examples.py + - name: Run examples suite + run: | + pytest -v -m examples tests/test_examples.py -# - name: Save cache -# if: always() -# uses: actions/cache/save@v4 -# with: -# path: .cache -# key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}-${{ github.run_id }}-${{ github.run_attempt }} + - name: Save cache + if: always() + uses: actions/cache/save@v4 + with: + path: .cache + key: ubuntu-24.04-3.12-examples-${{ steps.gstaichi_version.outputs.GSTAICHI_VERSION }}-${{ github.run_id }}-${{ github.run_attempt }} diff --git a/.github/workflows/generic.yml b/.github/workflows/generic.yml index cd2d71e523..19c8b4ddbe 100644 --- a/.github/workflows/generic.yml +++ b/.github/workflows/generic.yml @@ -1,209 +1,209 @@ -# name: Generic - -# on: -# pull_request: -# branches: -# - main -# release: -# branches: -# - main -# types: [published] - -# concurrency: -# group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} -# cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -# jobs: -# generic-cpu: -# name: ${{ matrix.OS }}-${{ matrix.PYTHON_VERSION }}-${{ matrix.GS_BACKEND }}-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - -# strategy: -# fail-fast: false -# matrix: -# # See official Github documentation for details: https://shorturl.at/NJgsj -# OS: ["ubuntu-24.04", "macos-15"] -# PYTHON_VERSION: ["3.10", "3.11", "3.12", "3.13"] -# GS_BACKEND: ["cpu"] -# GS_ENABLE_NDARRAY: ["1"] -# include: -# # CPU backend - dynamic array (other OSes) -# - OS: "ubuntu-22.04" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "1" -# - OS: "ubuntu-24.04-arm" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "1" -# - OS: "windows-2025" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "1" -# # CPU backend - field array -# - OS: "ubuntu-24.04" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "0" -# - OS: "ubuntu-24.04-arm" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "0" -# - OS: "windows-2025" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "0" -# - OS: "macos-15" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "cpu" -# GS_ENABLE_NDARRAY: "0" -# # GPU backend - field array -# - OS: "macos-15" -# PYTHON_VERSION: "3.12" -# GS_BACKEND: "gpu" -# GS_ENABLE_NDARRAY: "0" - -# env: -# HF_HUB_DOWNLOAD_TIMEOUT: "60" -# FORCE_COLOR: "1" -# PY_COLORS: "1" -# GS_CACHE_FILE_PATH: ".cache/genesis" -# GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} -# GS_TORCH_FORCE_CPU_DEVICE: ${{ startsWith(matrix.OS, 'macos-') && '1' || '0' }} -# TI_OFFLINE_CACHE: "1" -# TI_OFFLINE_CACHE_CLEANING_POLICY: "never" -# TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" -# TI_ENABLE_CUDA: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} -# TI_ENABLE_METAL: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} -# TI_ENABLE_OPENGL: "0" -# TI_ENABLE_VULKAN: "0" -# TI_DEBUG: "0" - -# runs-on: ${{ matrix.OS }} -# if: github.event_name != 'release' - -# steps: -# - name: Print system information (Windows) -# if: startsWith(matrix.OS, 'windows-') -# shell: pwsh -# run: | -# $cpu = Get-CimInstance -ClassName Win32_Processor -# $ram = Get-CimInstance -ClassName Win32_ComputerSystem -# [PSCustomObject]@{ -# CPU_Name = $cpu.Name -# Physical_Cores = ($cpu | Measure-Object -Property NumberOfCores -Sum).Sum -# Logical_Processors = ($cpu | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum -# Total_RAM_GB = [math]::Round($ram.TotalPhysicalMemory / 1GB, 2) -# } - -# - name: Checkout code -# uses: actions/checkout@v4 -# with: -# fetch-depth: 1 - -# - name: Set up Python -# uses: actions/setup-python@v5 -# with: -# python-version: ${{ matrix.PYTHON_VERSION }} - -# - name: Install system dependencies (Windows) -# if: startsWith(matrix.OS, 'windows-') -# shell: bash -# run: | -# curl -L -o mesa.7z https://github.com/pal1000/mesa-dist-win/releases/download/25.1.5/mesa3d-25.1.5-release-msvc.7z -# 7z x mesa.7z -omesa -# mv -v mesa/x64/* /C/Windows/System32/ - -# - name: Install Mesa 25 OpenGL driver (Linux) -# if: startsWith(matrix.OS, 'ubuntu-') -# run: | -# sudo add-apt-repository -y ppa:kisak/kisak-mesa -# sudo apt-get update -# sudo apt install -y \ -# libglu1-mesa \ -# libegl-mesa0 \ -# libgl1-mesa-dev -# - name: Install python dependencies -# run: | -# pip install --upgrade pip setuptools pkg-info wheel -# pip3 install torch --index-url https://download.pytorch.org/whl/cpu - -# - name: Black Format Check -# if: ${{ matrix.OS == 'ubuntu-24.04' && matrix.PYTHON_VERSION == '3.12' && matrix.GS_BACKEND == 'cpu' && matrix.GS_ENABLE_NDARRAY == '1' }} -# run: | -# pip install black -# black --line-length 120 --check . - -# - name: Install Genesis -# shell: bash -# run: | -# PYTHON_DEPS="dev" -# if [[ "${{ matrix.OS }}" != 'ubuntu-24.04-arm' ]] ; then -# PYTHON_DEPS="${PYTHON_DEPS},usd" -# fi -# pip install -e ".[${PYTHON_DEPS}]" - -# - name: Get artifact prefix name -# id: artifact_prefix -# shell: bash -# run: | -# OS_FAMILY=$(python -c "import platform; print(platform.system())") -# MACHINE_ARCH=$(python -c "import platform; print(platform.machine())") -# GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") -# echo "ARTIFACT_PREFIX=${OS_FAMILY}-${MACHINE_ARCH}-${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT - -# - name: Restore Taichi Kernel Cache -# if: ${{ always() && steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} -# uses: actions/cache/restore@v4 -# with: -# path: .cache -# key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }} -# restore-keys: | -# ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}- - -# - name: Run unit tests -# run: | -# pytest -v --logical --dev --backend ${{ matrix.GS_BACKEND }} -m required --forked ./tests - -# - name: Save Updated Taichi Kernel Cache -# if: >- -# ${{ always() && -# (matrix.OS == 'ubuntu-24.04' || matrix.OS == 'ubuntu-24.04-arm' || matrix.OS == 'macos-15' || matrix.OS == 'windows-2025') && -# matrix.PYTHON_VERSION == '3.12' && -# matrix.GS_BACKEND == 'cpu' && -# matrix.GS_ENABLE_NDARRAY == '1' && -# steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} -# uses: actions/cache/save@v4 -# with: -# path: .cache -# # Note that it is necessary to create a new archive systematically for now: -# # See: https://github.com/actions/cache/issues/1594 -# key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}-${{ github.run_id }}-${{ github.run_attempt }} - -# publish-pypi: -# name: Publish on PyPI -# runs-on: ubuntu-24.04 -# permissions: -# id-token: write -# environment: -# name: advance - -# if: github.event_name == 'release' - -# steps: -# - name: Checkout code -# uses: actions/checkout@v4 -# with: -# fetch-depth: 1 - -# - name: Build wheels -# run: | -# pip wheel --no-deps . -w wheelhouse - -# - name: Publish the wheels on PyPI -# uses: pypa/gh-action-pypi-publish@v1.12.4 -# with: -# packages-dir: wheelhouse -# verify-metadata: true -# attestations: true -# print-hash: true -# skip-existing: true +name: Generic + +on: + pull_request: + branches: + - main + release: + branches: + - main + types: [published] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + generic-cpu: + name: ${{ matrix.OS }}-${{ matrix.PYTHON_VERSION }}-${{ matrix.GS_BACKEND }}-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} + + strategy: + fail-fast: false + matrix: + # See official Github documentation for details: https://shorturl.at/NJgsj + OS: ["ubuntu-24.04", "macos-15"] + PYTHON_VERSION: ["3.10", "3.11", "3.12", "3.13"] + GS_BACKEND: ["cpu"] + GS_ENABLE_NDARRAY: ["1"] + include: + # CPU backend - dynamic array (other OSes) + - OS: "ubuntu-22.04" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "1" + - OS: "ubuntu-24.04-arm" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "1" + - OS: "windows-2025" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "1" + # CPU backend - field array + - OS: "ubuntu-24.04" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "0" + - OS: "ubuntu-24.04-arm" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "0" + - OS: "windows-2025" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "0" + - OS: "macos-15" + PYTHON_VERSION: "3.12" + GS_BACKEND: "cpu" + GS_ENABLE_NDARRAY: "0" + # GPU backend - field array + - OS: "macos-15" + PYTHON_VERSION: "3.12" + GS_BACKEND: "gpu" + GS_ENABLE_NDARRAY: "0" + + env: + HF_HUB_DOWNLOAD_TIMEOUT: "60" + FORCE_COLOR: "1" + PY_COLORS: "1" + GS_CACHE_FILE_PATH: ".cache/genesis" + GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} + GS_TORCH_FORCE_CPU_DEVICE: ${{ startsWith(matrix.OS, 'macos-') && '1' || '0' }} + TI_OFFLINE_CACHE: "1" + TI_OFFLINE_CACHE_CLEANING_POLICY: "never" + TI_OFFLINE_CACHE_FILE_PATH: ".cache/taichi" + TI_ENABLE_CUDA: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} + TI_ENABLE_METAL: ${{ matrix.GS_BACKEND == 'gpu' && '1' || '0' }} + TI_ENABLE_OPENGL: "0" + TI_ENABLE_VULKAN: "0" + TI_DEBUG: "0" + + runs-on: ${{ matrix.OS }} + if: github.event_name != 'release' + + steps: + - name: Print system information (Windows) + if: startsWith(matrix.OS, 'windows-') + shell: pwsh + run: | + $cpu = Get-CimInstance -ClassName Win32_Processor + $ram = Get-CimInstance -ClassName Win32_ComputerSystem + [PSCustomObject]@{ + CPU_Name = $cpu.Name + Physical_Cores = ($cpu | Measure-Object -Property NumberOfCores -Sum).Sum + Logical_Processors = ($cpu | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum + Total_RAM_GB = [math]::Round($ram.TotalPhysicalMemory / 1GB, 2) + } + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.PYTHON_VERSION }} + + - name: Install system dependencies (Windows) + if: startsWith(matrix.OS, 'windows-') + shell: bash + run: | + curl -L -o mesa.7z https://github.com/pal1000/mesa-dist-win/releases/download/25.1.5/mesa3d-25.1.5-release-msvc.7z + 7z x mesa.7z -omesa + mv -v mesa/x64/* /C/Windows/System32/ + + - name: Install Mesa 25 OpenGL driver (Linux) + if: startsWith(matrix.OS, 'ubuntu-') + run: | + sudo add-apt-repository -y ppa:kisak/kisak-mesa + sudo apt-get update + sudo apt install -y \ + libglu1-mesa \ + libegl-mesa0 \ + libgl1-mesa-dev + - name: Install python dependencies + run: | + pip install --upgrade pip setuptools pkg-info wheel + pip3 install torch --index-url https://download.pytorch.org/whl/cpu + + - name: Black Format Check + if: ${{ matrix.OS == 'ubuntu-24.04' && matrix.PYTHON_VERSION == '3.12' && matrix.GS_BACKEND == 'cpu' && matrix.GS_ENABLE_NDARRAY == '1' }} + run: | + pip install black + black --line-length 120 --check . + + - name: Install Genesis + shell: bash + run: | + PYTHON_DEPS="dev" + if [[ "${{ matrix.OS }}" != 'ubuntu-24.04-arm' ]] ; then + PYTHON_DEPS="${PYTHON_DEPS},usd" + fi + pip install -e ".[${PYTHON_DEPS}]" + + - name: Get artifact prefix name + id: artifact_prefix + shell: bash + run: | + OS_FAMILY=$(python -c "import platform; print(platform.system())") + MACHINE_ARCH=$(python -c "import platform; print(platform.machine())") + GSTAICHI_VERSION=$(python -c "import importlib.metadata ; print(importlib.metadata.version('gstaichi'))") + echo "ARTIFACT_PREFIX=${OS_FAMILY}-${MACHINE_ARCH}-${GSTAICHI_VERSION}" >> $GITHUB_OUTPUT + + - name: Restore Taichi Kernel Cache + if: ${{ always() && steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} + uses: actions/cache/restore@v4 + with: + path: .cache + key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }} + restore-keys: | + ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}- + + - name: Run unit tests + run: | + pytest -v --logical --dev --backend ${{ matrix.GS_BACKEND }} -m required --forked ./tests + + - name: Save Updated Taichi Kernel Cache + if: >- + ${{ always() && + (matrix.OS == 'ubuntu-24.04' || matrix.OS == 'ubuntu-24.04-arm' || matrix.OS == 'macos-15' || matrix.OS == 'windows-2025') && + matrix.PYTHON_VERSION == '3.12' && + matrix.GS_BACKEND == 'cpu' && + matrix.GS_ENABLE_NDARRAY == '1' && + steps.artifact_prefix.outputs.ARTIFACT_PREFIX != '' }} + uses: actions/cache/save@v4 + with: + path: .cache + # Note that it is necessary to create a new archive systematically for now: + # See: https://github.com/actions/cache/issues/1594 + key: ${{ steps.artifact_prefix.outputs.ARTIFACT_PREFIX }}-${{ github.run_id }}-${{ github.run_attempt }} + + publish-pypi: + name: Publish on PyPI + runs-on: ubuntu-24.04 + permissions: + id-token: write + environment: + name: advance + + if: github.event_name == 'release' + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Build wheels + run: | + pip wheel --no-deps . -w wheelhouse + + - name: Publish the wheels on PyPI + uses: pypa/gh-action-pypi-publish@v1.12.4 + with: + packages-dir: wheelhouse + verify-metadata: true + attestations: true + print-hash: true + skip-existing: true diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index 43284caddf..0ea61ea349 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -28,69 +28,69 @@ env: OMNI_KIT_ALLOW_ROOT: "1" jobs: - # unit-tests: - # name: production-unit_tests-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - - # runs-on: [self-hosted, coreweave, genesis-world] - - # strategy: - # fail-fast: true - # max-parallel: 1 - # matrix: - # GS_ENABLE_NDARRAY: ["0", "1"] - - # env: - # GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} - - # steps: - # - name: Checkout code - # uses: actions/checkout@v4 - # - name: Run unit tests - # if: github.event_name == 'pull_request' - # run: | - # SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" - # echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV - - # mkdir -p "${HOME}/.cache" "${HOME}/.venv" - - # # TODO: USD baking does not currently support Python 3.11 since - # # NVIDIA does not currently release `omniverse-kit==107.3` on PyPI. - # # See: https://github.com/Genesis-Embodied-AI/Genesis/pull/1300 - # srun \ - # --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ - # --container-mounts=\ - # "${HOME}/.venv":/root/.venv,\ - # "${HOME}/.cache":/root/.cache,\ - # "${{ github.workspace }}":/root/workspace \ - # --no-container-mount-home --container-workdir=/root/workspace \ - # --export=NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY} \ - # --partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \ - # --job-name=${SLURM_JOB_NAME} \ - # bash -e -s << 'EOF' - # if test -n "$(find /root/.venv -maxdepth 0 -empty)"; then - # python3 -m venv --system-site-packages /root/.venv - # source /root/.venv/bin/activate - # pip install --no-input --upgrade pip pkg-info wheel - # pip install --no-input --ignore-installed --upgrade blinker pyparsing setuptools - # fi - # source /root/.venv/bin/activate - - # pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit - # pip install --no-input ".[dev,render,usd]" - - # pytest -v -ra --backend gpu --dev --forked ./tests - # EOF - # - name: Kill srun job systematically - # if: always() - # run: | - # if [ -n "${SLURM_JOB_NAME}" ] ; then - # scancel --user=${USER} --name="${SLURM_JOB_NAME}" - # fi + unit-tests: + name: production-unit_tests-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} + + runs-on: [self-hosted, coreweave, genesis-world] + + strategy: + fail-fast: true + max-parallel: 1 + matrix: + GS_ENABLE_NDARRAY: ["0", "1"] + + env: + GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Run unit tests + if: github.event_name == 'pull_request' + run: | + SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" + echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV + + mkdir -p "${HOME}/.cache" "${HOME}/.venv" + + # TODO: USD baking does not currently support Python 3.11 since + # NVIDIA does not currently release `omniverse-kit==107.3` on PyPI. + # See: https://github.com/Genesis-Embodied-AI/Genesis/pull/1300 + srun \ + --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ + --container-mounts=\ + "${HOME}/.venv":/root/.venv,\ + "${HOME}/.cache":/root/.cache,\ + "${{ github.workspace }}":/root/workspace \ + --no-container-mount-home --container-workdir=/root/workspace \ + --export=NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY} \ + --partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \ + --job-name=${SLURM_JOB_NAME} \ + bash -e -s << 'EOF' + if test -n "$(find /root/.venv -maxdepth 0 -empty)"; then + python3 -m venv --system-site-packages /root/.venv + source /root/.venv/bin/activate + pip install --no-input --upgrade pip pkg-info wheel + pip install --no-input --ignore-installed --upgrade blinker pyparsing setuptools + fi + source /root/.venv/bin/activate + + pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit + pip install --no-input ".[dev,render,usd]" + + pytest -v -ra --backend gpu --dev --forked ./tests + EOF + - name: Kill srun job systematically + if: always() + run: | + if [ -n "${SLURM_JOB_NAME}" ] ; then + scancel --user=${USER} --name="${SLURM_JOB_NAME}" + fi benchmarks: name: production-benchmarks-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' || 'ndarray' }} - # needs: unit-tests + needs: unit-tests runs-on: [self-hosted, coreweave, genesis-world] strategy: