From ccdd85c9bd016a0ebed3b9afc4cee248a88590ba Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 29 Dec 2025 13:41:59 +0100 Subject: [PATCH 01/17] Add benchmarks --- Makefile | 3 + acceptance/acceptance_test.go | 20 ++- acceptance/bin/benchmark.py | 79 +++++++++++ acceptance/bin/gen_config.py | 124 ++++++++++++++++++ .../bundle/benchmarks/deploy/out.test.toml | 5 + .../bundle/benchmarks/deploy/output.txt | 0 acceptance/bundle/benchmarks/deploy/script | 5 + .../bundle/benchmarks/plan/out.test.toml | 5 + acceptance/bundle/benchmarks/plan/output.txt | 2 + acceptance/bundle/benchmarks/plan/script | 5 + acceptance/bundle/benchmarks/test.toml | 2 + .../bundle/benchmarks/validate/out.test.toml | 5 + .../bundle/benchmarks/validate/output.txt | 14 ++ acceptance/bundle/benchmarks/validate/script | 5 + 14 files changed, 272 insertions(+), 2 deletions(-) create mode 100755 acceptance/bin/benchmark.py create mode 100755 acceptance/bin/gen_config.py create mode 100644 acceptance/bundle/benchmarks/deploy/out.test.toml create mode 100644 acceptance/bundle/benchmarks/deploy/output.txt create mode 100755 acceptance/bundle/benchmarks/deploy/script create mode 100644 acceptance/bundle/benchmarks/plan/out.test.toml create mode 100644 acceptance/bundle/benchmarks/plan/output.txt create mode 100755 acceptance/bundle/benchmarks/plan/script create mode 100644 acceptance/bundle/benchmarks/test.toml create mode 100644 acceptance/bundle/benchmarks/validate/out.test.toml create mode 100644 acceptance/bundle/benchmarks/validate/output.txt create mode 100755 acceptance/bundle/benchmarks/validate/script diff --git a/Makefile b/Makefile index 6d7978771c..9a85bec929 100644 --- a/Makefile +++ b/Makefile @@ -97,6 +97,9 @@ test-update-aws: test-update-all: test-update test-update-aws +bench: + BENCH_N_JOBS=1000 BENCHMARK_MODE=1 go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + slowest: ${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50 diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index de02e455b8..f38af28483 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -61,6 +61,9 @@ var InprocessMode bool // lines with this prefix are not recorded in output.txt but logged instead const TestLogPrefix = "TESTLOG: " +// In benchmark mode we disable parallel run of all tests that contain work "benchmark" in their path +var benchmarkMode = os.Getenv("BENCHMARK_MODE") != "" + func init() { flag.BoolVar(&InprocessMode, "inprocess", false, "Run CLI in the same process as test (for debugging)") flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run") @@ -328,7 +331,17 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { t.Skip(skipReason) } - if !inprocessMode { + runParallel := true + + if inprocessMode { + runParallel = false + } + + if benchmarkMode && strings.Contains(dir, "benchmark") { + runParallel = false + } + + if runParallel { t.Parallel() } @@ -344,7 +357,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { for ind, envset := range expanded { envname := strings.Join(envset, "/") t.Run(envname, func(t *testing.T) { - if !inprocessMode { + if runParallel { t.Parallel() } runTest(t, dir, ind, coverDir, repls.Clone(), config, envset, envFilters) @@ -689,6 +702,9 @@ func runTest(t *testing.T, skipRepls := false if relPath == internal.MaterializedConfigFile { + if benchmarkMode { + continue + } skipRepls = true } doComparison(t, repls, dir, tmpDir, relPath, &printedRepls, skipRepls) diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py new file mode 100755 index 0000000000..213b7c186a --- /dev/null +++ b/acceptance/bin/benchmark.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +import argparse +import subprocess +import time +import statistics +import sys +import os +import resource + + +def run_benchmark(command, warmup, runs): + times = [] + + if len(command) == 1 and " " in command[0] or ">" in command[0]: + shell = True + command = command[0] + else: + shell = False + + for i in range(runs): + rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN) + + with open("LOG.process", "a") as log: + start = time.perf_counter() + result = subprocess.run(command, shell=shell, stdout=log, stderr=log) + end = time.perf_counter() + + if result.returncode != 0: + print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr) + sys.exit(result.returncode) + + rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) + utime = rusage_after.ru_utime - rusage_before.ru_utime + stime = rusage_after.ru_stime - rusage_before.ru_stime + + run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):" + print( + f"TESTLOG: {run} wall={end - start:.3f} ru_utime={utime:.3f} ru_stime={stime:.3f} ru_maxrss={rusage_after.ru_maxrss}" + ) + + if i >= warmup: + times.append(end - start) + + if not times: + print("No times recorded") + return + + if len(times) > 1: + mean = statistics.mean(times) + stdev = statistics.stdev(times) + min_time = min(times) + max_time = max(times) + + print(f"TESTLOG: Benchmark: {command}") + print(f"TESTLOG: Time (mean ± σ): {mean:.3f} s ± {stdev:.3f} s") + print(f"TESTLOG: Range (min … max): {min_time:.3f} s … {max_time:.3f} s {runs} runs", flush=True) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--warmup", type=int, default=1) + parser.add_argument("--runs", type=int) + parser.add_argument("command", nargs="+") + args = parser.parse_args() + + if args.runs is None: + if os.environ.get("BENCHMARK_MODE"): + args.runs = 5 + else: + args.runs = 1 + + if args.warmup >= args.runs: + args.warmup = min(1, args.runs - 1) + + run_benchmark(args.command, args.warmup, args.runs) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py new file mode 100755 index 0000000000..4e15953d8b --- /dev/null +++ b/acceptance/bin/gen_config.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +import argparse +import json +import copy + +JOB_TEMPLATE_BASE = { + "description": "This job contain multiple tasks that are required to produce the weekly shark sightings report.", + "email_notifications": { + "no_alert_for_skipped_runs": False, + "on_failure": ["user.name@databricks.com"], + "on_success": ["user.name@databricks.com"], + }, + "job_clusters": [ + { + "job_cluster_key": "auto_scaling_cluster", + "new_cluster": { + "autoscale": {"max_workers": 16, "min_workers": 2}, + "node_type_id": "i3.xlarge", + "spark_conf": {"spark.speculation": "true"}, + "spark_version": "13.3.x-scala2.12", + }, + } + ], + "max_concurrent_runs": 10, + "name": "A multitask job", + "notification_settings": {"no_alert_for_canceled_runs": False, "no_alert_for_skipped_runs": False}, + "parameters": [{"default": "users", "name": "table"}], + "tags": {"cost-center": "engineering", "team": "jobs"}, + "tasks": [ + { + "depends_on": [], + "description": "Extracts session data from events", + "job_cluster_key": "auto_scaling_cluster", + "libraries": [{"jar": "dbfs:/mnt/databricks/Sessionize.jar"}], + "max_retries": 3, + "min_retry_interval_millis": 2000, + "retry_on_timeout": False, + "spark_jar_task": { + "main_class_name": "com.databricks.Sessionize", + "parameters": ["--data", "dbfs:/path/to/data.json"], + }, + "task_key": "Sessionize", + "timeout_seconds": 86400, + }, + { + "depends_on": [], + "description": "Ingests order data", + "job_cluster_key": "auto_scaling_cluster", + "libraries": [{"jar": "dbfs:/mnt/databricks/OrderIngest.jar"}], + "max_retries": 3, + "min_retry_interval_millis": 2000, + "retry_on_timeout": False, + "spark_jar_task": { + "main_class_name": "com.databricks.OrdersIngest", + "parameters": ["--data", "dbfs:/path/to/order-data.json"], + }, + "task_key": "Orders_Ingest", + "timeout_seconds": 86400, + }, + { + "depends_on": [{"task_key": "Orders_Ingest"}, {"task_key": "Sessionize"}], + "description": "Matches orders with user sessions", + "max_retries": 3, + "min_retry_interval_millis": 2000, + "new_cluster": { + "autoscale": {"max_workers": 16, "min_workers": 2}, + "node_type_id": "i3.xlarge", + "spark_conf": {"spark.speculation": "true"}, + "spark_version": "13.3.x-scala2.12", + }, + "notebook_task": { + "base_parameters": {"age": "35", "name": "John Doe"}, + "notebook_path": "/Users/user.name@databricks.com/Match", + }, + "retry_on_timeout": False, + "run_if": "ALL_SUCCESS", + "task_key": "Match", + "timeout_seconds": 86400, + }, + ], + "timeout_seconds": 86400, +} + + +def gen_config(n): + jobs = {} + for i in range(n): + job = copy.deepcopy(JOB_TEMPLATE_BASE) + job["name"] = f"job_{i}" + + # Odd jobs use continuous, even jobs use schedule + if i % 2 == 1: + job["continuous"] = {"pause_status": "UNPAUSED"} + else: + job["schedule"] = { + "pause_status": "UNPAUSED", + "quartz_cron_expression": "20 30 * * * ?", + "timezone_id": "Europe/London", + } + + jobs[f"job_{i}"] = job + + config = {"bundle": {"name": "test-bundle"}, "resources": {"jobs": jobs}} + + return config + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--jobs", type=int, required=True, help="Number of jobs to generate") + args = parser.parse_args() + + config = gen_config(args.jobs) + + import yaml + + try: + print(yaml.dump(config, default_flow_style=False, sort_keys=False)) + except ImportError: + print(json.dumps(config, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/benchmarks/deploy/out.test.toml b/acceptance/bundle/benchmarks/deploy/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/benchmarks/deploy/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/benchmarks/deploy/output.txt b/acceptance/bundle/benchmarks/deploy/output.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script new file mode 100755 index 0000000000..eed8651c9b --- /dev/null +++ b/acceptance/bundle/benchmarks/deploy/script @@ -0,0 +1,5 @@ +BENCH_N_JOBS="${BENCH_N_JOBS:-10}" +echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" +gen_config.py --jobs $BENCH_N_JOBS > databricks.yml +wc -l databricks.yml >> LOG +benchmark.py $CLI bundle deploy diff --git a/acceptance/bundle/benchmarks/plan/out.test.toml b/acceptance/bundle/benchmarks/plan/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/benchmarks/plan/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/benchmarks/plan/output.txt b/acceptance/bundle/benchmarks/plan/output.txt new file mode 100644 index 0000000000..89b1365657 --- /dev/null +++ b/acceptance/bundle/benchmarks/plan/output.txt @@ -0,0 +1,2 @@ + +>>> benchmark.py $CLI bundle plan > /dev/null diff --git a/acceptance/bundle/benchmarks/plan/script b/acceptance/bundle/benchmarks/plan/script new file mode 100755 index 0000000000..b219382e6f --- /dev/null +++ b/acceptance/bundle/benchmarks/plan/script @@ -0,0 +1,5 @@ +BENCH_N_JOBS="${BENCH_N_JOBS:-10}" +echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" +gen_config.py --jobs $BENCH_N_JOBS > databricks.yml +wc -l databricks.yml >> LOG +trace benchmark.py '$CLI bundle plan > /dev/null' diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml new file mode 100644 index 0000000000..fbcf2aad73 --- /dev/null +++ b/acceptance/bundle/benchmarks/test.toml @@ -0,0 +1,2 @@ +Timeout = '4h' +Ignore = ["databricks.yml"] diff --git a/acceptance/bundle/benchmarks/validate/out.test.toml b/acceptance/bundle/benchmarks/validate/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/benchmarks/validate/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/benchmarks/validate/output.txt b/acceptance/bundle/benchmarks/validate/output.txt new file mode 100644 index 0000000000..4b7537e96c --- /dev/null +++ b/acceptance/bundle/benchmarks/validate/output.txt @@ -0,0 +1,14 @@ + +>>> head -n 10 databricks.yml +bundle: + name: test-bundle +resources: + jobs: + job_0: + description: This job contain multiple tasks that are required to produce the + weekly shark sightings report. + email_notifications: + no_alert_for_skipped_runs: false + on_failure: + +>>> benchmark.py [CLI] bundle validate diff --git a/acceptance/bundle/benchmarks/validate/script b/acceptance/bundle/benchmarks/validate/script new file mode 100755 index 0000000000..1d5b4ddda3 --- /dev/null +++ b/acceptance/bundle/benchmarks/validate/script @@ -0,0 +1,5 @@ +BENCH_N_JOBS="${BENCH_N_JOBS:-10}" +echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" +gen_config.py --jobs $BENCH_N_JOBS > databricks.yml +trace head -n 10 databricks.yml +trace benchmark.py $CLI bundle validate From 4f5b52e102073f2078ca36b61666bfc5c3695bd8 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 30 Dec 2025 13:44:24 +0100 Subject: [PATCH 02/17] update --- acceptance/bundle/benchmarks/deploy/script | 6 ++---- acceptance/bundle/benchmarks/plan/script | 6 ++---- acceptance/bundle/benchmarks/test.toml | 1 + acceptance/bundle/benchmarks/validate/script | 5 ++--- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script index eed8651c9b..88cd42bced 100755 --- a/acceptance/bundle/benchmarks/deploy/script +++ b/acceptance/bundle/benchmarks/deploy/script @@ -1,5 +1,3 @@ -BENCH_N_JOBS="${BENCH_N_JOBS:-10}" -echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" -gen_config.py --jobs $BENCH_N_JOBS > databricks.yml -wc -l databricks.yml >> LOG +gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml +wc -l databricks.yml >> LOG.wc benchmark.py $CLI bundle deploy diff --git a/acceptance/bundle/benchmarks/plan/script b/acceptance/bundle/benchmarks/plan/script index b219382e6f..e3d6828f19 100755 --- a/acceptance/bundle/benchmarks/plan/script +++ b/acceptance/bundle/benchmarks/plan/script @@ -1,5 +1,3 @@ -BENCH_N_JOBS="${BENCH_N_JOBS:-10}" -echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" -gen_config.py --jobs $BENCH_N_JOBS > databricks.yml -wc -l databricks.yml >> LOG +gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml +wc -l databricks.yml > LOG.wc trace benchmark.py '$CLI bundle plan > /dev/null' diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml index fbcf2aad73..6575d7cf93 100644 --- a/acceptance/bundle/benchmarks/test.toml +++ b/acceptance/bundle/benchmarks/test.toml @@ -1,2 +1,3 @@ Timeout = '4h' Ignore = ["databricks.yml"] +BundleConfig.default_name = "" diff --git a/acceptance/bundle/benchmarks/validate/script b/acceptance/bundle/benchmarks/validate/script index 1d5b4ddda3..07a8f30603 100755 --- a/acceptance/bundle/benchmarks/validate/script +++ b/acceptance/bundle/benchmarks/validate/script @@ -1,5 +1,4 @@ -BENCH_N_JOBS="${BENCH_N_JOBS:-10}" -echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs" -gen_config.py --jobs $BENCH_N_JOBS > databricks.yml +gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml +wc -l databricks.yml > LOG.wc trace head -n 10 databricks.yml trace benchmark.py $CLI bundle validate From cfb78ede393e8a5a71dce8c9b4876fba2407d67d Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Tue, 30 Dec 2025 13:44:41 +0100 Subject: [PATCH 03/17] update --- Makefile | 15 ++++++- acceptance/acceptance_test.go | 2 +- acceptance/bin/benchmark.py | 75 ++++++++++++++++++++++++----------- acceptance/bin/gen_config.py | 2 +- 4 files changed, 66 insertions(+), 28 deletions(-) diff --git a/Makefile b/Makefile index 9a85bec929..deaa0ced97 100644 --- a/Makefile +++ b/Makefile @@ -97,8 +97,19 @@ test-update-aws: test-update-all: test-update test-update-aws -bench: - BENCH_N_JOBS=1000 BENCHMARK_MODE=1 go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m +bench100: + BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + +bench1k: + BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + +bench100_summary: + make bench100 | tee out.bench100.txt + ./tools/bench_compare.py out.bench100.txt + +bench1k_summary: + make bench1k | tee out.bench1k.txt + ./tools/bench_compare.py out.bench1k.txt slowest: ${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50 diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index f38af28483..c4528b5b01 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -62,7 +62,7 @@ var InprocessMode bool const TestLogPrefix = "TESTLOG: " // In benchmark mode we disable parallel run of all tests that contain work "benchmark" in their path -var benchmarkMode = os.Getenv("BENCHMARK_MODE") != "" +var benchmarkMode = os.Getenv("BENCHMARK_PARAMS") != "" func init() { flag.BoolVar(&InprocessMode, "inprocess", false, "Run CLI in the same process as test (for debugging)") diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py index 213b7c186a..576bafc811 100755 --- a/acceptance/bin/benchmark.py +++ b/acceptance/bin/benchmark.py @@ -6,40 +6,31 @@ import sys import os import resource +import json def run_benchmark(command, warmup, runs): times = [] - if len(command) == 1 and " " in command[0] or ">" in command[0]: - shell = True - command = command[0] - else: - shell = False - for i in range(runs): - rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN) + cp = subprocess.run([sys.executable, sys.argv[0], "--once"] + command, stdout=subprocess.PIPE) + if cp.returncode != 0: + sys.exit(cp.returncode) - with open("LOG.process", "a") as log: - start = time.perf_counter() - result = subprocess.run(command, shell=shell, stdout=log, stderr=log) - end = time.perf_counter() + try: + result = json.loads(cp.stdout) + except Exception: + print(f"Failed to parse: {cp.stdout!r}") + raise - if result.returncode != 0: - print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr) - sys.exit(result.returncode) + run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):" - rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) - utime = rusage_after.ru_utime - rusage_before.ru_utime - stime = rusage_after.ru_stime - rusage_before.ru_stime + result_formatted = " ".join(f"{key}={value}" for (key, value) in result.items()) - run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):" - print( - f"TESTLOG: {run} wall={end - start:.3f} ru_utime={utime:.3f} ru_stime={stime:.3f} ru_maxrss={rusage_after.ru_maxrss}" - ) + print(f"TESTLOG: {run} {result_formatted}") if i >= warmup: - times.append(end - start) + times.append(result["wall"]) if not times: print("No times recorded") @@ -53,18 +44,54 @@ def run_benchmark(command, warmup, runs): print(f"TESTLOG: Benchmark: {command}") print(f"TESTLOG: Time (mean ± σ): {mean:.3f} s ± {stdev:.3f} s") - print(f"TESTLOG: Range (min … max): {min_time:.3f} s … {max_time:.3f} s {runs} runs", flush=True) + print(f"TESTLOG: Range (min … max): {min_time:.3f} s … {max_time:.3f} s {len(times)} runs", flush=True) + + +def run_once(command): + if len(command) == 1 and " " in command[0] or ">" in command[0]: + shell = True + command = command[0] + else: + shell = False + + rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN) + + with open("LOG.process", "a") as log: + start = time.perf_counter() + result = subprocess.run(command, shell=shell, stdout=log, stderr=log) + end = time.perf_counter() + + if result.returncode != 0: + print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr) + sys.exit(result.returncode) + + rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) + + return { + "wall": end - start, + "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime, + "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime, + # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process + "ru_maxrss": rusage_after.ru_maxrss, + } def main(): parser = argparse.ArgumentParser() parser.add_argument("--warmup", type=int, default=1) parser.add_argument("--runs", type=int) + parser.add_argument("--once", action="store_true") parser.add_argument("command", nargs="+") args = parser.parse_args() + if args.once: + assert not args.runs + result = run_once(args.command) + print(json.dumps(result)) + return + if args.runs is None: - if os.environ.get("BENCHMARK_MODE"): + if os.environ.get("BENCHMARK_PARAMS"): args.runs = 5 else: args.runs = 1 diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py index 4e15953d8b..d2f6f6b79c 100755 --- a/acceptance/bin/gen_config.py +++ b/acceptance/bin/gen_config.py @@ -107,7 +107,7 @@ def gen_config(n): def main(): parser = argparse.ArgumentParser() - parser.add_argument("--jobs", type=int, required=True, help="Number of jobs to generate") + parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate") args = parser.parse_args() config = gen_config(args.jobs) From 9a2b6ee03a26df15224278bbb10fa98ae6b3d757 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 11:39:25 +0100 Subject: [PATCH 04/17] clean up --- Makefile | 45 ++++++++++----- acceptance/acceptance_test.go | 3 - tools/bench_parse.py | 106 ++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 17 deletions(-) create mode 100755 tools/bench_parse.py diff --git a/Makefile b/Makefile index deaa0ced97..031ad94c3b 100644 --- a/Makefile +++ b/Makefile @@ -97,20 +97,6 @@ test-update-aws: test-update-all: test-update test-update-aws -bench100: - BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m - -bench1k: - BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m - -bench100_summary: - make bench100 | tee out.bench100.txt - ./tools/bench_compare.py out.bench100.txt - -bench1k_summary: - make bench1k | tee out.bench1k.txt - ./tools/bench_compare.py out.bench1k.txt - slowest: ${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50 @@ -198,3 +184,34 @@ test-exp-ssh: test-pipelines: make test TEST_PACKAGES="./cmd/pipelines/..." ACCEPTANCE_TEST_FILTER="TestAccept/pipelines" + + +# Benchmarks: + +bench1k: + BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + +bench100: + BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + +# small benchmark to quickly test benchmark-related code +bench10: + BENCHMARK_PARAMS="--jobs 10" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m + +bench1k.log: + make bench1k | tee $@ + +bench100.log: + make bench100 | tee $@ + +bench10.log: + make bench10 | tee $@ + +bench1k_summary: bench1k.log + ./tools/bench_parse.py $< + +bench100_summary: bench100.log + ./tools/bench_parse.py $< + +bench10_summary: bench10.log + ./tools/bench_parse.py $< diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index c4528b5b01..438b559c5b 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -702,9 +702,6 @@ func runTest(t *testing.T, skipRepls := false if relPath == internal.MaterializedConfigFile { - if benchmarkMode { - continue - } skipRepls = true } doComparison(t, repls, dir, tmpDir, relPath, &printedRepls, skipRepls) diff --git a/tools/bench_parse.py b/tools/bench_parse.py new file mode 100755 index 0000000000..3d7165fe17 --- /dev/null +++ b/tools/bench_parse.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Parses output of benchmark runs (e.g. "make bench100") and prints a summary table. +""" + +import sys +import re +import statistics +from collections import defaultdict + + +def parse_key_values(text): + """Parse key=value pairs from a string. + + >>> parse_key_values("wall=10.316 ru_utime=19.207 ru_stime=0.505 ru_maxrss=573079552") + {'wall': 10.316, 'ru_utime': 19.207, 'ru_stime': 0.505, 'ru_maxrss': 573079552.0} + """ + result = {} + for kv_pair in text.split(): + if "=" in kv_pair: + key, value = kv_pair.split("=", 1) + try: + result[key] = float(value) + except ValueError: + result[key] = value + return result + + +def parse_bench_output(file_path): + """Parse benchmark output and extract test results.""" + results = defaultdict(lambda: defaultdict(list)) + + current_test = None + + with open(file_path) as f: + for line in f: + # Match test name + test_match = re.match(r"=== RUN\s+(.+)", line) + if test_match: + current_test = test_match.group(1) + current_test = current_test.removeprefix("TestAccept/bundle/benchmarks/") + continue + + # Match benchmark run data (only count runs, skip warm) + if "TESTLOG: Run #" in line and "(count)" in line: + if current_test: + # Extract everything after the run label + parts = line.split("(count):") + if len(parts) == 2: + kv_data = parse_key_values(parts[1].strip()) + for key, value in kv_data.items(): + results[current_test][key].append(value) + + return results + + +def calculate_means(results): + """Calculate mean values for each metric.""" + means = {} + for test_name, metrics in results.items(): + means[test_name] = {metric: statistics.mean(values) if values else 0 for metric, values in metrics.items()} + return means + + +def print_results(results): + """Output table for single file.""" + means = calculate_means(results) + + all_metrics = {} + for metrics in means.values(): + for key in metrics: + all_metrics.setdefault(key, None) + all_metrics = list(all_metrics.keys()) + + # Calculate column widths + testname_width = max(len("testname"), max((len(name) for name in means.keys()), default=0)) + metric_width = 12 + + # Print header + header = f"{'testname':<{testname_width}}" + for metric in all_metrics: + header += f" {metric:>{metric_width}}" + print(header) + print("-" * len(header)) + + # Print rows + for test_name in sorted(means.keys()): + m = means[test_name] + row = f"{test_name:<{testname_width}}" + for metric in all_metrics: + value = m.get(metric, 0) + if isinstance(value, float) and value > 1000000: + row += f" {value:>{metric_width}.0f}" + else: + row += f" {value:>{metric_width}.3f}" + print(row) + + +def main(): + for filename in sys.argv[1:]: + results = parse_bench_output(filename) + print_results(results) + + +if __name__ == "__main__": + main() From 146344cf7bdf5666f228570978efd582df38eaf2 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 13:35:42 +0100 Subject: [PATCH 05/17] add a comment --- acceptance/bundle/benchmarks/deploy/script | 3 +++ 1 file changed, 3 insertions(+) diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script index 88cd42bced..5de5a7d955 100755 --- a/acceptance/bundle/benchmarks/deploy/script +++ b/acceptance/bundle/benchmarks/deploy/script @@ -1,3 +1,6 @@ gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml wc -l databricks.yml >> LOG.wc +# Note, since testserver persists state for the duration of the test, .databricks is kept and benchmark.py skips first run as a warmup, this measures time +# it takes for no-changes deploy. +# Note, terraform is set up by the test runner, so this time does not include TF download time. benchmark.py $CLI bundle deploy From 056b6c12cd5e669ca09b482d05f214280cf427a7 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 13:40:48 +0100 Subject: [PATCH 06/17] lint fix --- acceptance/acceptance_test.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 438b559c5b..0885d0c6e4 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -331,11 +331,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int { t.Skip(skipReason) } - runParallel := true - - if inprocessMode { - runParallel = false - } + runParallel := !inprocessMode if benchmarkMode && strings.Contains(dir, "benchmark") { runParallel = false From bcd6c7c3ebaab897c0c1bbb6d5dd06ac92e0600d Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 13:55:54 +0100 Subject: [PATCH 07/17] update --- acceptance/bin/gen_config.py | 41 ++- acceptance/selftest/benchmark/out.test.toml | 5 + acceptance/selftest/benchmark/output.txt | 0 acceptance/selftest/benchmark/script | 1 + acceptance/selftest/gen_config/out.test.toml | 5 + acceptance/selftest/gen_config/output.txt | 266 +++++++++++++++++++ acceptance/selftest/gen_config/script | 1 + 7 files changed, 312 insertions(+), 7 deletions(-) create mode 100644 acceptance/selftest/benchmark/out.test.toml create mode 100644 acceptance/selftest/benchmark/output.txt create mode 100644 acceptance/selftest/benchmark/script create mode 100644 acceptance/selftest/gen_config/out.test.toml create mode 100644 acceptance/selftest/gen_config/output.txt create mode 100644 acceptance/selftest/gen_config/script diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py index d2f6f6b79c..ae7badae5a 100755 --- a/acceptance/bin/gen_config.py +++ b/acceptance/bin/gen_config.py @@ -105,19 +105,46 @@ def gen_config(n): return config +def yaml_dump(obj, indent=0, list_item=False): + lines = [] + indent_str = " " * indent + + if isinstance(obj, dict): + first = True + for key, value in obj.items(): + if list_item and first: + prefix = indent_str + "- " + first = False + elif list_item: + prefix = indent_str + " " + else: + prefix = indent_str + nested_indent = indent + 2 if list_item else indent + 1 + if isinstance(value, (dict, list)) and value: + lines.append(f"{prefix}{key}:") + lines.append(yaml_dump(value, nested_indent)) + else: + lines.append(f"{prefix}{key}: {json.dumps(value)}") + elif isinstance(obj, list): + for item in obj: + if isinstance(item, (dict, list)): + lines.append(yaml_dump(item, indent, list_item=True)) + else: + lines.append(f"{indent_str}- {json.dumps(item)}") + else: + prefix = f"{indent_str}- " if list_item else indent_str + return f"{prefix}{json.dumps(obj)}" + + return "\n".join(line for line in lines if line) + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate") args = parser.parse_args() config = gen_config(args.jobs) - - import yaml - - try: - print(yaml.dump(config, default_flow_style=False, sort_keys=False)) - except ImportError: - print(json.dumps(config, indent=2)) + print(yaml_dump(config)) if __name__ == "__main__": diff --git a/acceptance/selftest/benchmark/out.test.toml b/acceptance/selftest/benchmark/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/benchmark/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/benchmark/output.txt b/acceptance/selftest/benchmark/output.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/acceptance/selftest/benchmark/script b/acceptance/selftest/benchmark/script new file mode 100644 index 0000000000..be2387c22b --- /dev/null +++ b/acceptance/selftest/benchmark/script @@ -0,0 +1 @@ +benchmark.py --runs 3 'true' diff --git a/acceptance/selftest/gen_config/out.test.toml b/acceptance/selftest/gen_config/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/selftest/gen_config/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/selftest/gen_config/output.txt b/acceptance/selftest/gen_config/output.txt new file mode 100644 index 0000000000..15041262c6 --- /dev/null +++ b/acceptance/selftest/gen_config/output.txt @@ -0,0 +1,266 @@ +bundle: + name: "test-bundle" +resources: + jobs: + job_0: + description: "This job contain multiple tasks that are required to produce the weekly shark sightings report." + email_notifications: + no_alert_for_skipped_runs: false + on_failure: + - "user.name@databricks.com" + on_success: + - "user.name@databricks.com" + job_clusters: + - job_cluster_key: "auto_scaling_cluster" + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + max_concurrent_runs: 10 + name: "job_0" + notification_settings: + no_alert_for_canceled_runs: false + no_alert_for_skipped_runs: false + parameters: + - default: "users" + name: "table" + tags: + cost-center: "engineering" + team: "jobs" + tasks: + - depends_on: [] + description: "Extracts session data from events" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/Sessionize.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.Sessionize" + parameters: + - "--data" + - "dbfs:/path/to/data.json" + task_key: "Sessionize" + timeout_seconds: 86400 + - depends_on: [] + description: "Ingests order data" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/OrderIngest.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.OrdersIngest" + parameters: + - "--data" + - "dbfs:/path/to/order-data.json" + task_key: "Orders_Ingest" + timeout_seconds: 86400 + - depends_on: + - task_key: "Orders_Ingest" + - task_key: "Sessionize" + description: "Matches orders with user sessions" + max_retries: 3 + min_retry_interval_millis: 2000 + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + notebook_task: + base_parameters: + age: "35" + name: "John Doe" + notebook_path: "/Users/user.name@databricks.com/Match" + retry_on_timeout: false + run_if: "ALL_SUCCESS" + task_key: "Match" + timeout_seconds: 86400 + timeout_seconds: 86400 + schedule: + pause_status: "UNPAUSED" + quartz_cron_expression: "20 30 * * * ?" + timezone_id: "Europe/London" + job_1: + description: "This job contain multiple tasks that are required to produce the weekly shark sightings report." + email_notifications: + no_alert_for_skipped_runs: false + on_failure: + - "user.name@databricks.com" + on_success: + - "user.name@databricks.com" + job_clusters: + - job_cluster_key: "auto_scaling_cluster" + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + max_concurrent_runs: 10 + name: "job_1" + notification_settings: + no_alert_for_canceled_runs: false + no_alert_for_skipped_runs: false + parameters: + - default: "users" + name: "table" + tags: + cost-center: "engineering" + team: "jobs" + tasks: + - depends_on: [] + description: "Extracts session data from events" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/Sessionize.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.Sessionize" + parameters: + - "--data" + - "dbfs:/path/to/data.json" + task_key: "Sessionize" + timeout_seconds: 86400 + - depends_on: [] + description: "Ingests order data" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/OrderIngest.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.OrdersIngest" + parameters: + - "--data" + - "dbfs:/path/to/order-data.json" + task_key: "Orders_Ingest" + timeout_seconds: 86400 + - depends_on: + - task_key: "Orders_Ingest" + - task_key: "Sessionize" + description: "Matches orders with user sessions" + max_retries: 3 + min_retry_interval_millis: 2000 + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + notebook_task: + base_parameters: + age: "35" + name: "John Doe" + notebook_path: "/Users/user.name@databricks.com/Match" + retry_on_timeout: false + run_if: "ALL_SUCCESS" + task_key: "Match" + timeout_seconds: 86400 + timeout_seconds: 86400 + continuous: + pause_status: "UNPAUSED" + job_2: + description: "This job contain multiple tasks that are required to produce the weekly shark sightings report." + email_notifications: + no_alert_for_skipped_runs: false + on_failure: + - "user.name@databricks.com" + on_success: + - "user.name@databricks.com" + job_clusters: + - job_cluster_key: "auto_scaling_cluster" + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + max_concurrent_runs: 10 + name: "job_2" + notification_settings: + no_alert_for_canceled_runs: false + no_alert_for_skipped_runs: false + parameters: + - default: "users" + name: "table" + tags: + cost-center: "engineering" + team: "jobs" + tasks: + - depends_on: [] + description: "Extracts session data from events" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/Sessionize.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.Sessionize" + parameters: + - "--data" + - "dbfs:/path/to/data.json" + task_key: "Sessionize" + timeout_seconds: 86400 + - depends_on: [] + description: "Ingests order data" + job_cluster_key: "auto_scaling_cluster" + libraries: + - jar: "dbfs:/mnt/databricks/OrderIngest.jar" + max_retries: 3 + min_retry_interval_millis: 2000 + retry_on_timeout: false + spark_jar_task: + main_class_name: "com.databricks.OrdersIngest" + parameters: + - "--data" + - "dbfs:/path/to/order-data.json" + task_key: "Orders_Ingest" + timeout_seconds: 86400 + - depends_on: + - task_key: "Orders_Ingest" + - task_key: "Sessionize" + description: "Matches orders with user sessions" + max_retries: 3 + min_retry_interval_millis: 2000 + new_cluster: + autoscale: + max_workers: 16 + min_workers: 2 + node_type_id: "[NODE_TYPE_ID]" + spark_conf: + spark.speculation: "true" + spark_version: "13.3.x-scala2.12" + notebook_task: + base_parameters: + age: "35" + name: "John Doe" + notebook_path: "/Users/user.name@databricks.com/Match" + retry_on_timeout: false + run_if: "ALL_SUCCESS" + task_key: "Match" + timeout_seconds: 86400 + timeout_seconds: 86400 + schedule: + pause_status: "UNPAUSED" + quartz_cron_expression: "20 30 * * * ?" + timezone_id: "Europe/London" diff --git a/acceptance/selftest/gen_config/script b/acceptance/selftest/gen_config/script new file mode 100644 index 0000000000..c2f3177005 --- /dev/null +++ b/acceptance/selftest/gen_config/script @@ -0,0 +1 @@ +gen_config.py --jobs 3 From f4851700fcbe503349a0b65928095c4d9fac00f7 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 13:57:21 +0100 Subject: [PATCH 08/17] replace dump with print --- acceptance/bin/gen_config.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py index ae7badae5a..84a7262271 100755 --- a/acceptance/bin/gen_config.py +++ b/acceptance/bin/gen_config.py @@ -105,8 +105,7 @@ def gen_config(n): return config -def yaml_dump(obj, indent=0, list_item=False): - lines = [] +def print_yaml(obj, indent=0, list_item=False): indent_str = " " * indent if isinstance(obj, dict): @@ -121,21 +120,19 @@ def yaml_dump(obj, indent=0, list_item=False): prefix = indent_str nested_indent = indent + 2 if list_item else indent + 1 if isinstance(value, (dict, list)) and value: - lines.append(f"{prefix}{key}:") - lines.append(yaml_dump(value, nested_indent)) + print(f"{prefix}{key}:") + print_yaml(value, nested_indent) else: - lines.append(f"{prefix}{key}: {json.dumps(value)}") + print(f"{prefix}{key}: {json.dumps(value)}") elif isinstance(obj, list): for item in obj: if isinstance(item, (dict, list)): - lines.append(yaml_dump(item, indent, list_item=True)) + print_yaml(item, indent, list_item=True) else: - lines.append(f"{indent_str}- {json.dumps(item)}") + print(f"{indent_str}- {json.dumps(item)}") else: prefix = f"{indent_str}- " if list_item else indent_str - return f"{prefix}{json.dumps(obj)}" - - return "\n".join(line for line in lines if line) + print(f"{prefix}{json.dumps(obj)}") def main(): @@ -143,8 +140,7 @@ def main(): parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate") args = parser.parse_args() - config = gen_config(args.jobs) - print(yaml_dump(config)) + print_yaml(gen_config(args.jobs)) if __name__ == "__main__": From 252cf1a9902afa0fcfeb3732842f14338402af20 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 13:59:12 +0100 Subject: [PATCH 09/17] update README --- acceptance/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/acceptance/README.md b/acceptance/README.md index c8c17e310c..8e33d273a7 100644 --- a/acceptance/README.md +++ b/acceptance/README.md @@ -22,6 +22,19 @@ Any file starting with "LOG" will be logged to test log (visible with go test -v See [selftest](./selftest) for more examples. +## Benchmarks + +Benchmarks are regular acceptance test that log measurements in certain format. The output can be fed to `tools/bench_parse.py` to print a summary table. + +Test runner recognizes benchmark as having "benchmark" anywhere in the path. For these tests parallel execution is disabled if and only if BENCHMARK\_PARAMS variable is set. + +The benchmarks make use of two scripts: + +- `gen_config.py —jobs N` to generate a config with N jobs +- `benchmark.py` command to run command a few times and log the time measurements. + +The default number of runs in benchmark.py depends on BENCHMARK\_PARAMS variable. If it’s set, the default number is 5. Otherwise it is 1. + ## Running acceptance tests on Windows To run the acceptance tests from a terminal on Windows (eg. Git Bash from VS Code), From 1ec802f63a78e60b8b8ce5b7643a2405f3a4de05 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 14:02:07 +0100 Subject: [PATCH 10/17] update --- acceptance/bundle/benchmarks/validate/output.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/acceptance/bundle/benchmarks/validate/output.txt b/acceptance/bundle/benchmarks/validate/output.txt index 4b7537e96c..1ef997149b 100644 --- a/acceptance/bundle/benchmarks/validate/output.txt +++ b/acceptance/bundle/benchmarks/validate/output.txt @@ -1,14 +1,14 @@ >>> head -n 10 databricks.yml bundle: - name: test-bundle + name: "test-bundle" resources: jobs: job_0: - description: This job contain multiple tasks that are required to produce the - weekly shark sightings report. + description: "This job contain multiple tasks that are required to produce the weekly shark sightings report." email_notifications: no_alert_for_skipped_runs: false on_failure: + - "user.name@databricks.com" >>> benchmark.py [CLI] bundle validate From 61f673310aa75aa612f0a8442a7acdd5a4f61766 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 14:04:58 +0100 Subject: [PATCH 11/17] clean up[ --- acceptance/bundle/benchmarks/test.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml index 6575d7cf93..fbcf2aad73 100644 --- a/acceptance/bundle/benchmarks/test.toml +++ b/acceptance/bundle/benchmarks/test.toml @@ -1,3 +1,2 @@ Timeout = '4h' Ignore = ["databricks.yml"] -BundleConfig.default_name = "" From 415cf3c733056b32a97473f52f924baaeeea3aed Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 14:09:52 +0100 Subject: [PATCH 12/17] clean up --- tools/bench_parse.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/bench_parse.py b/tools/bench_parse.py index 3d7165fe17..534fbe36af 100755 --- a/tools/bench_parse.py +++ b/tools/bench_parse.py @@ -34,14 +34,12 @@ def parse_bench_output(file_path): with open(file_path) as f: for line in f: - # Match test name test_match = re.match(r"=== RUN\s+(.+)", line) if test_match: current_test = test_match.group(1) current_test = current_test.removeprefix("TestAccept/bundle/benchmarks/") continue - # Match benchmark run data (only count runs, skip warm) if "TESTLOG: Run #" in line and "(count)" in line: if current_test: # Extract everything after the run label @@ -72,18 +70,15 @@ def print_results(results): all_metrics.setdefault(key, None) all_metrics = list(all_metrics.keys()) - # Calculate column widths testname_width = max(len("testname"), max((len(name) for name in means.keys()), default=0)) metric_width = 12 - # Print header header = f"{'testname':<{testname_width}}" for metric in all_metrics: header += f" {metric:>{metric_width}}" print(header) print("-" * len(header)) - # Print rows for test_name in sorted(means.keys()): m = means[test_name] row = f"{test_name:<{testname_width}}" From 348c810956e3e073dbacb48813b7f5031be56d23 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 14:11:10 +0100 Subject: [PATCH 13/17] add a comment --- acceptance/bin/benchmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py index 576bafc811..b6c263ae01 100755 --- a/acceptance/bin/benchmark.py +++ b/acceptance/bin/benchmark.py @@ -13,6 +13,7 @@ def run_benchmark(command, warmup, runs): times = [] for i in range(runs): + # double fork to reset max statistics like ru_maxrss cp = subprocess.run([sys.executable, sys.argv[0], "--once"] + command, stdout=subprocess.PIPE) if cp.returncode != 0: sys.exit(cp.returncode) From a59fa0dc60456d7a63fbdc22cef792ecccfb09b1 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 14:21:05 +0100 Subject: [PATCH 14/17] fix windows - resource is not available --- acceptance/bin/benchmark.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py index b6c263ae01..5d34fc89df 100755 --- a/acceptance/bin/benchmark.py +++ b/acceptance/bin/benchmark.py @@ -5,9 +5,14 @@ import statistics import sys import os -import resource import json +try: + import resource +except ImportError: + # n/a on windows + resource = None + def run_benchmark(command, warmup, runs): times = [] @@ -55,7 +60,8 @@ def run_once(command): else: shell = False - rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN) + if resource: + rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN) with open("LOG.process", "a") as log: start = time.perf_counter() @@ -66,15 +72,19 @@ def run_once(command): print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr) sys.exit(result.returncode) - rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) + result = {"wall": end - start} + + if resource: + rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) + + result.update({ + "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime, + "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime, + # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process + "ru_maxrss": rusage_after.ru_maxrss, + }) - return { - "wall": end - start, - "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime, - "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime, - # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process - "ru_maxrss": rusage_after.ru_maxrss, - } + return result def main(): From 2682b4de27c2d727584f4404ef66c7598ec4da5a Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 2 Jan 2026 16:08:32 +0100 Subject: [PATCH 15/17] formatting --- acceptance/bin/benchmark.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py index 5d34fc89df..bad87fd8b2 100755 --- a/acceptance/bin/benchmark.py +++ b/acceptance/bin/benchmark.py @@ -77,12 +77,14 @@ def run_once(command): if resource: rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN) - result.update({ - "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime, - "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime, - # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process - "ru_maxrss": rusage_after.ru_maxrss, - }) + result.update( + { + "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime, + "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime, + # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process + "ru_maxrss": rusage_after.ru_maxrss, + } + ) return result From 0a604d66a2a4590aeb70f0fdc9cfd19ff6042a6d Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 5 Jan 2026 09:58:01 +0100 Subject: [PATCH 16/17] disable on Windows --- acceptance/bundle/benchmarks/test.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml index fbcf2aad73..9f94c55600 100644 --- a/acceptance/bundle/benchmarks/test.toml +++ b/acceptance/bundle/benchmarks/test.toml @@ -1,2 +1,5 @@ Timeout = '4h' Ignore = ["databricks.yml"] + +# Disabled because it fails on CI. We don't need this to work on Windows. +GOOS.windows = false From 290713ef512124e62bf781406d98f02f3b471060 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Mon, 5 Jan 2026 10:11:27 +0100 Subject: [PATCH 17/17] update out file --- acceptance/bundle/benchmarks/deploy/out.test.toml | 3 +++ acceptance/bundle/benchmarks/plan/out.test.toml | 3 +++ acceptance/bundle/benchmarks/validate/out.test.toml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/acceptance/bundle/benchmarks/deploy/out.test.toml b/acceptance/bundle/benchmarks/deploy/out.test.toml index d560f1de04..40bb0d1047 100644 --- a/acceptance/bundle/benchmarks/deploy/out.test.toml +++ b/acceptance/bundle/benchmarks/deploy/out.test.toml @@ -1,5 +1,8 @@ Local = true Cloud = false +[GOOS] + windows = false + [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/benchmarks/plan/out.test.toml b/acceptance/bundle/benchmarks/plan/out.test.toml index d560f1de04..40bb0d1047 100644 --- a/acceptance/bundle/benchmarks/plan/out.test.toml +++ b/acceptance/bundle/benchmarks/plan/out.test.toml @@ -1,5 +1,8 @@ Local = true Cloud = false +[GOOS] + windows = false + [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/benchmarks/validate/out.test.toml b/acceptance/bundle/benchmarks/validate/out.test.toml index d560f1de04..40bb0d1047 100644 --- a/acceptance/bundle/benchmarks/validate/out.test.toml +++ b/acceptance/bundle/benchmarks/validate/out.test.toml @@ -1,5 +1,8 @@ Local = true Cloud = false +[GOOS] + windows = false + [EnvMatrix] DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]