From ccdd85c9bd016a0ebed3b9afc4cee248a88590ba Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Mon, 29 Dec 2025 13:41:59 +0100
Subject: [PATCH 01/17] Add benchmarks

---
 Makefile                                      |   3 +
 acceptance/acceptance_test.go                 |  20 ++-
 acceptance/bin/benchmark.py                   |  79 +++++++++++
 acceptance/bin/gen_config.py                  | 124 ++++++++++++++++++
 .../bundle/benchmarks/deploy/out.test.toml    |   5 +
 .../bundle/benchmarks/deploy/output.txt       |   0
 acceptance/bundle/benchmarks/deploy/script    |   5 +
 .../bundle/benchmarks/plan/out.test.toml      |   5 +
 acceptance/bundle/benchmarks/plan/output.txt  |   2 +
 acceptance/bundle/benchmarks/plan/script      |   5 +
 acceptance/bundle/benchmarks/test.toml        |   2 +
 .../bundle/benchmarks/validate/out.test.toml  |   5 +
 .../bundle/benchmarks/validate/output.txt     |  14 ++
 acceptance/bundle/benchmarks/validate/script  |   5 +
 14 files changed, 272 insertions(+), 2 deletions(-)
 create mode 100755 acceptance/bin/benchmark.py
 create mode 100755 acceptance/bin/gen_config.py
 create mode 100644 acceptance/bundle/benchmarks/deploy/out.test.toml
 create mode 100644 acceptance/bundle/benchmarks/deploy/output.txt
 create mode 100755 acceptance/bundle/benchmarks/deploy/script
 create mode 100644 acceptance/bundle/benchmarks/plan/out.test.toml
 create mode 100644 acceptance/bundle/benchmarks/plan/output.txt
 create mode 100755 acceptance/bundle/benchmarks/plan/script
 create mode 100644 acceptance/bundle/benchmarks/test.toml
 create mode 100644 acceptance/bundle/benchmarks/validate/out.test.toml
 create mode 100644 acceptance/bundle/benchmarks/validate/output.txt
 create mode 100755 acceptance/bundle/benchmarks/validate/script

diff --git a/Makefile b/Makefile
index 6d7978771c..9a85bec929 100644
--- a/Makefile
+++ b/Makefile
@@ -97,6 +97,9 @@ test-update-aws:
 
 test-update-all: test-update test-update-aws
 
+bench:
+	BENCH_N_JOBS=1000 BENCHMARK_MODE=1 go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
 slowest:
 	${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50
 
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index de02e455b8..f38af28483 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -61,6 +61,9 @@ var InprocessMode bool
 // lines with this prefix are not recorded in output.txt but logged instead
 const TestLogPrefix = "TESTLOG: "
 
+// In benchmark mode we disable parallel run of all tests that contain work "benchmark" in their path
+var benchmarkMode = os.Getenv("BENCHMARK_MODE") != ""
+
 func init() {
 	flag.BoolVar(&InprocessMode, "inprocess", false, "Run CLI in the same process as test (for debugging)")
 	flag.BoolVar(&KeepTmp, "keeptmp", false, "Do not delete TMP directory after run")
@@ -328,7 +331,17 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int {
 				t.Skip(skipReason)
 			}
 
-			if !inprocessMode {
+			runParallel := true
+
+			if inprocessMode {
+				runParallel = false
+			}
+
+			if benchmarkMode && strings.Contains(dir, "benchmark") {
+				runParallel = false
+			}
+
+			if runParallel {
 				t.Parallel()
 			}
 
@@ -344,7 +357,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int {
 				for ind, envset := range expanded {
 					envname := strings.Join(envset, "/")
 					t.Run(envname, func(t *testing.T) {
-						if !inprocessMode {
+						if runParallel {
 							t.Parallel()
 						}
 						runTest(t, dir, ind, coverDir, repls.Clone(), config, envset, envFilters)
@@ -689,6 +702,9 @@ func runTest(t *testing.T,
 
 		skipRepls := false
 		if relPath == internal.MaterializedConfigFile {
+			if benchmarkMode {
+				continue
+			}
 			skipRepls = true
 		}
 		doComparison(t, repls, dir, tmpDir, relPath, &printedRepls, skipRepls)
diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py
new file mode 100755
index 0000000000..213b7c186a
--- /dev/null
+++ b/acceptance/bin/benchmark.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+import argparse
+import subprocess
+import time
+import statistics
+import sys
+import os
+import resource
+
+
+def run_benchmark(command, warmup, runs):
+    times = []
+
+    if len(command) == 1 and " " in command[0] or ">" in command[0]:
+        shell = True
+        command = command[0]
+    else:
+        shell = False
+
+    for i in range(runs):
+        rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN)
+
+        with open("LOG.process", "a") as log:
+            start = time.perf_counter()
+            result = subprocess.run(command, shell=shell, stdout=log, stderr=log)
+            end = time.perf_counter()
+
+        if result.returncode != 0:
+            print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr)
+            sys.exit(result.returncode)
+
+        rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
+        utime = rusage_after.ru_utime - rusage_before.ru_utime
+        stime = rusage_after.ru_stime - rusage_before.ru_stime
+
+        run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):"
+        print(
+            f"TESTLOG: {run} wall={end - start:.3f}  ru_utime={utime:.3f}  ru_stime={stime:.3f}  ru_maxrss={rusage_after.ru_maxrss}"
+        )
+
+        if i >= warmup:
+            times.append(end - start)
+
+    if not times:
+        print("No times recorded")
+        return
+
+    if len(times) > 1:
+        mean = statistics.mean(times)
+        stdev = statistics.stdev(times)
+        min_time = min(times)
+        max_time = max(times)
+
+        print(f"TESTLOG: Benchmark: {command}")
+        print(f"TESTLOG:  Time (mean ± σ):     {mean:.3f} s ±  {stdev:.3f} s")
+        print(f"TESTLOG:  Range (min … max):   {min_time:.3f} s … {max_time:.3f} s    {runs} runs", flush=True)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--warmup", type=int, default=1)
+    parser.add_argument("--runs", type=int)
+    parser.add_argument("command", nargs="+")
+    args = parser.parse_args()
+
+    if args.runs is None:
+        if os.environ.get("BENCHMARK_MODE"):
+            args.runs = 5
+        else:
+            args.runs = 1
+
+    if args.warmup >= args.runs:
+        args.warmup = min(1, args.runs - 1)
+
+    run_benchmark(args.command, args.warmup, args.runs)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py
new file mode 100755
index 0000000000..4e15953d8b
--- /dev/null
+++ b/acceptance/bin/gen_config.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import copy
+
+JOB_TEMPLATE_BASE = {
+    "description": "This job contain multiple tasks that are required to produce the weekly shark sightings report.",
+    "email_notifications": {
+        "no_alert_for_skipped_runs": False,
+        "on_failure": ["user.name@databricks.com"],
+        "on_success": ["user.name@databricks.com"],
+    },
+    "job_clusters": [
+        {
+            "job_cluster_key": "auto_scaling_cluster",
+            "new_cluster": {
+                "autoscale": {"max_workers": 16, "min_workers": 2},
+                "node_type_id": "i3.xlarge",
+                "spark_conf": {"spark.speculation": "true"},
+                "spark_version": "13.3.x-scala2.12",
+            },
+        }
+    ],
+    "max_concurrent_runs": 10,
+    "name": "A multitask job",
+    "notification_settings": {"no_alert_for_canceled_runs": False, "no_alert_for_skipped_runs": False},
+    "parameters": [{"default": "users", "name": "table"}],
+    "tags": {"cost-center": "engineering", "team": "jobs"},
+    "tasks": [
+        {
+            "depends_on": [],
+            "description": "Extracts session data from events",
+            "job_cluster_key": "auto_scaling_cluster",
+            "libraries": [{"jar": "dbfs:/mnt/databricks/Sessionize.jar"}],
+            "max_retries": 3,
+            "min_retry_interval_millis": 2000,
+            "retry_on_timeout": False,
+            "spark_jar_task": {
+                "main_class_name": "com.databricks.Sessionize",
+                "parameters": ["--data", "dbfs:/path/to/data.json"],
+            },
+            "task_key": "Sessionize",
+            "timeout_seconds": 86400,
+        },
+        {
+            "depends_on": [],
+            "description": "Ingests order data",
+            "job_cluster_key": "auto_scaling_cluster",
+            "libraries": [{"jar": "dbfs:/mnt/databricks/OrderIngest.jar"}],
+            "max_retries": 3,
+            "min_retry_interval_millis": 2000,
+            "retry_on_timeout": False,
+            "spark_jar_task": {
+                "main_class_name": "com.databricks.OrdersIngest",
+                "parameters": ["--data", "dbfs:/path/to/order-data.json"],
+            },
+            "task_key": "Orders_Ingest",
+            "timeout_seconds": 86400,
+        },
+        {
+            "depends_on": [{"task_key": "Orders_Ingest"}, {"task_key": "Sessionize"}],
+            "description": "Matches orders with user sessions",
+            "max_retries": 3,
+            "min_retry_interval_millis": 2000,
+            "new_cluster": {
+                "autoscale": {"max_workers": 16, "min_workers": 2},
+                "node_type_id": "i3.xlarge",
+                "spark_conf": {"spark.speculation": "true"},
+                "spark_version": "13.3.x-scala2.12",
+            },
+            "notebook_task": {
+                "base_parameters": {"age": "35", "name": "John Doe"},
+                "notebook_path": "/Users/user.name@databricks.com/Match",
+            },
+            "retry_on_timeout": False,
+            "run_if": "ALL_SUCCESS",
+            "task_key": "Match",
+            "timeout_seconds": 86400,
+        },
+    ],
+    "timeout_seconds": 86400,
+}
+
+
+def gen_config(n):
+    jobs = {}
+    for i in range(n):
+        job = copy.deepcopy(JOB_TEMPLATE_BASE)
+        job["name"] = f"job_{i}"
+
+        # Odd jobs use continuous, even jobs use schedule
+        if i % 2 == 1:
+            job["continuous"] = {"pause_status": "UNPAUSED"}
+        else:
+            job["schedule"] = {
+                "pause_status": "UNPAUSED",
+                "quartz_cron_expression": "20 30 * * * ?",
+                "timezone_id": "Europe/London",
+            }
+
+        jobs[f"job_{i}"] = job
+
+    config = {"bundle": {"name": "test-bundle"}, "resources": {"jobs": jobs}}
+
+    return config
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--jobs", type=int, required=True, help="Number of jobs to generate")
+    args = parser.parse_args()
+
+    config = gen_config(args.jobs)
+
+    import yaml
+
+    try:
+        print(yaml.dump(config, default_flow_style=False, sort_keys=False))
+    except ImportError:
+        print(json.dumps(config, indent=2))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/acceptance/bundle/benchmarks/deploy/out.test.toml b/acceptance/bundle/benchmarks/deploy/out.test.toml
new file mode 100644
index 0000000000..d560f1de04
--- /dev/null
+++ b/acceptance/bundle/benchmarks/deploy/out.test.toml
@@ -0,0 +1,5 @@
+Local = true
+Cloud = false
+
+[EnvMatrix]
+  DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/bundle/benchmarks/deploy/output.txt b/acceptance/bundle/benchmarks/deploy/output.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script
new file mode 100755
index 0000000000..eed8651c9b
--- /dev/null
+++ b/acceptance/bundle/benchmarks/deploy/script
@@ -0,0 +1,5 @@
+BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
+echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
+gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
+wc -l databricks.yml >> LOG
+benchmark.py $CLI bundle deploy
diff --git a/acceptance/bundle/benchmarks/plan/out.test.toml b/acceptance/bundle/benchmarks/plan/out.test.toml
new file mode 100644
index 0000000000..d560f1de04
--- /dev/null
+++ b/acceptance/bundle/benchmarks/plan/out.test.toml
@@ -0,0 +1,5 @@
+Local = true
+Cloud = false
+
+[EnvMatrix]
+  DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/bundle/benchmarks/plan/output.txt b/acceptance/bundle/benchmarks/plan/output.txt
new file mode 100644
index 0000000000..89b1365657
--- /dev/null
+++ b/acceptance/bundle/benchmarks/plan/output.txt
@@ -0,0 +1,2 @@
+
+>>> benchmark.py $CLI bundle plan > /dev/null
diff --git a/acceptance/bundle/benchmarks/plan/script b/acceptance/bundle/benchmarks/plan/script
new file mode 100755
index 0000000000..b219382e6f
--- /dev/null
+++ b/acceptance/bundle/benchmarks/plan/script
@@ -0,0 +1,5 @@
+BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
+echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
+gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
+wc -l databricks.yml >> LOG
+trace benchmark.py '$CLI bundle plan > /dev/null'
diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml
new file mode 100644
index 0000000000..fbcf2aad73
--- /dev/null
+++ b/acceptance/bundle/benchmarks/test.toml
@@ -0,0 +1,2 @@
+Timeout = '4h'
+Ignore = ["databricks.yml"]
diff --git a/acceptance/bundle/benchmarks/validate/out.test.toml b/acceptance/bundle/benchmarks/validate/out.test.toml
new file mode 100644
index 0000000000..d560f1de04
--- /dev/null
+++ b/acceptance/bundle/benchmarks/validate/out.test.toml
@@ -0,0 +1,5 @@
+Local = true
+Cloud = false
+
+[EnvMatrix]
+  DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/bundle/benchmarks/validate/output.txt b/acceptance/bundle/benchmarks/validate/output.txt
new file mode 100644
index 0000000000..4b7537e96c
--- /dev/null
+++ b/acceptance/bundle/benchmarks/validate/output.txt
@@ -0,0 +1,14 @@
+
+>>> head -n 10 databricks.yml
+bundle:
+  name: test-bundle
+resources:
+  jobs:
+    job_0:
+      description: This job contain multiple tasks that are required to produce the
+        weekly shark sightings report.
+      email_notifications:
+        no_alert_for_skipped_runs: false
+        on_failure:
+
+>>> benchmark.py [CLI] bundle validate
diff --git a/acceptance/bundle/benchmarks/validate/script b/acceptance/bundle/benchmarks/validate/script
new file mode 100755
index 0000000000..1d5b4ddda3
--- /dev/null
+++ b/acceptance/bundle/benchmarks/validate/script
@@ -0,0 +1,5 @@
+BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
+echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
+gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
+trace head -n 10 databricks.yml
+trace benchmark.py $CLI bundle validate

From 4f5b52e102073f2078ca36b61666bfc5c3695bd8 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Tue, 30 Dec 2025 13:44:24 +0100
Subject: [PATCH 02/17] update

---
 acceptance/bundle/benchmarks/deploy/script   | 6 ++----
 acceptance/bundle/benchmarks/plan/script     | 6 ++----
 acceptance/bundle/benchmarks/test.toml       | 1 +
 acceptance/bundle/benchmarks/validate/script | 5 ++---
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script
index eed8651c9b..88cd42bced 100755
--- a/acceptance/bundle/benchmarks/deploy/script
+++ b/acceptance/bundle/benchmarks/deploy/script
@@ -1,5 +1,3 @@
-BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
-echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
-gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
-wc -l databricks.yml >> LOG
+gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml
+wc -l databricks.yml >> LOG.wc
 benchmark.py $CLI bundle deploy
diff --git a/acceptance/bundle/benchmarks/plan/script b/acceptance/bundle/benchmarks/plan/script
index b219382e6f..e3d6828f19 100755
--- a/acceptance/bundle/benchmarks/plan/script
+++ b/acceptance/bundle/benchmarks/plan/script
@@ -1,5 +1,3 @@
-BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
-echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
-gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
-wc -l databricks.yml >> LOG
+gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml
+wc -l databricks.yml > LOG.wc
 trace benchmark.py '$CLI bundle plan > /dev/null'
diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml
index fbcf2aad73..6575d7cf93 100644
--- a/acceptance/bundle/benchmarks/test.toml
+++ b/acceptance/bundle/benchmarks/test.toml
@@ -1,2 +1,3 @@
 Timeout = '4h'
 Ignore = ["databricks.yml"]
+BundleConfig.default_name = ""
diff --git a/acceptance/bundle/benchmarks/validate/script b/acceptance/bundle/benchmarks/validate/script
index 1d5b4ddda3..07a8f30603 100755
--- a/acceptance/bundle/benchmarks/validate/script
+++ b/acceptance/bundle/benchmarks/validate/script
@@ -1,5 +1,4 @@
-BENCH_N_JOBS="${BENCH_N_JOBS:-10}"
-echo "TESTLOG: Generating config with $BENCH_N_JOBS jobs"
-gen_config.py --jobs $BENCH_N_JOBS > databricks.yml
+gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml
+wc -l databricks.yml > LOG.wc
 trace head -n 10 databricks.yml
 trace benchmark.py $CLI bundle validate

From cfb78ede393e8a5a71dce8c9b4876fba2407d67d Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Tue, 30 Dec 2025 13:44:41 +0100
Subject: [PATCH 03/17] update

---
 Makefile                      | 15 ++++++-
 acceptance/acceptance_test.go |  2 +-
 acceptance/bin/benchmark.py   | 75 ++++++++++++++++++++++++-----------
 acceptance/bin/gen_config.py  |  2 +-
 4 files changed, 66 insertions(+), 28 deletions(-)

diff --git a/Makefile b/Makefile
index 9a85bec929..deaa0ced97 100644
--- a/Makefile
+++ b/Makefile
@@ -97,8 +97,19 @@ test-update-aws:
 
 test-update-all: test-update test-update-aws
 
-bench:
-	BENCH_N_JOBS=1000 BENCHMARK_MODE=1 go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+bench100:
+	BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
+bench1k:
+	BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
+bench100_summary:
+	make bench100 | tee out.bench100.txt
+	./tools/bench_compare.py out.bench100.txt
+
+bench1k_summary:
+	make bench1k | tee out.bench1k.txt
+	./tools/bench_compare.py out.bench1k.txt
 
 slowest:
 	${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index f38af28483..c4528b5b01 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -62,7 +62,7 @@ var InprocessMode bool
 const TestLogPrefix = "TESTLOG: "
 
 // In benchmark mode we disable parallel run of all tests that contain work "benchmark" in their path
-var benchmarkMode = os.Getenv("BENCHMARK_MODE") != ""
+var benchmarkMode = os.Getenv("BENCHMARK_PARAMS") != ""
 
 func init() {
 	flag.BoolVar(&InprocessMode, "inprocess", false, "Run CLI in the same process as test (for debugging)")
diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py
index 213b7c186a..576bafc811 100755
--- a/acceptance/bin/benchmark.py
+++ b/acceptance/bin/benchmark.py
@@ -6,40 +6,31 @@
 import sys
 import os
 import resource
+import json
 
 
 def run_benchmark(command, warmup, runs):
     times = []
 
-    if len(command) == 1 and " " in command[0] or ">" in command[0]:
-        shell = True
-        command = command[0]
-    else:
-        shell = False
-
     for i in range(runs):
-        rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN)
+        cp = subprocess.run([sys.executable, sys.argv[0], "--once"] + command, stdout=subprocess.PIPE)
+        if cp.returncode != 0:
+            sys.exit(cp.returncode)
 
-        with open("LOG.process", "a") as log:
-            start = time.perf_counter()
-            result = subprocess.run(command, shell=shell, stdout=log, stderr=log)
-            end = time.perf_counter()
+        try:
+            result = json.loads(cp.stdout)
+        except Exception:
+            print(f"Failed to parse: {cp.stdout!r}")
+            raise
 
-        if result.returncode != 0:
-            print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr)
-            sys.exit(result.returncode)
+        run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):"
 
-        rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
-        utime = rusage_after.ru_utime - rusage_before.ru_utime
-        stime = rusage_after.ru_stime - rusage_before.ru_stime
+        result_formatted = "  ".join(f"{key}={value}" for (key, value) in result.items())
 
-        run = f"Run #{i} (warm): " if i < warmup else f"Run #{i} (count):"
-        print(
-            f"TESTLOG: {run} wall={end - start:.3f}  ru_utime={utime:.3f}  ru_stime={stime:.3f}  ru_maxrss={rusage_after.ru_maxrss}"
-        )
+        print(f"TESTLOG: {run} {result_formatted}")
 
         if i >= warmup:
-            times.append(end - start)
+            times.append(result["wall"])
 
     if not times:
         print("No times recorded")
@@ -53,18 +44,54 @@ def run_benchmark(command, warmup, runs):
 
         print(f"TESTLOG: Benchmark: {command}")
         print(f"TESTLOG:  Time (mean ± σ):     {mean:.3f} s ±  {stdev:.3f} s")
-        print(f"TESTLOG:  Range (min … max):   {min_time:.3f} s … {max_time:.3f} s    {runs} runs", flush=True)
+        print(f"TESTLOG:  Range (min … max):   {min_time:.3f} s … {max_time:.3f} s    {len(times)} runs", flush=True)
+
+
+def run_once(command):
+    if len(command) == 1 and " " in command[0] or ">" in command[0]:
+        shell = True
+        command = command[0]
+    else:
+        shell = False
+
+    rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN)
+
+    with open("LOG.process", "a") as log:
+        start = time.perf_counter()
+        result = subprocess.run(command, shell=shell, stdout=log, stderr=log)
+        end = time.perf_counter()
+
+    if result.returncode != 0:
+        print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr)
+        sys.exit(result.returncode)
+
+    rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
+
+    return {
+        "wall": end - start,
+        "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime,
+        "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime,
+        # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process
+        "ru_maxrss": rusage_after.ru_maxrss,
+    }
 
 
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--warmup", type=int, default=1)
     parser.add_argument("--runs", type=int)
+    parser.add_argument("--once", action="store_true")
     parser.add_argument("command", nargs="+")
     args = parser.parse_args()
 
+    if args.once:
+        assert not args.runs
+        result = run_once(args.command)
+        print(json.dumps(result))
+        return
+
     if args.runs is None:
-        if os.environ.get("BENCHMARK_MODE"):
+        if os.environ.get("BENCHMARK_PARAMS"):
             args.runs = 5
         else:
             args.runs = 1
diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py
index 4e15953d8b..d2f6f6b79c 100755
--- a/acceptance/bin/gen_config.py
+++ b/acceptance/bin/gen_config.py
@@ -107,7 +107,7 @@ def gen_config(n):
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--jobs", type=int, required=True, help="Number of jobs to generate")
+    parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate")
     args = parser.parse_args()
 
     config = gen_config(args.jobs)

From 9a2b6ee03a26df15224278bbb10fa98ae6b3d757 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 11:39:25 +0100
Subject: [PATCH 04/17] clean up

---
 Makefile                      |  45 ++++++++++-----
 acceptance/acceptance_test.go |   3 -
 tools/bench_parse.py          | 106 ++++++++++++++++++++++++++++++++++
 3 files changed, 137 insertions(+), 17 deletions(-)
 create mode 100755 tools/bench_parse.py

diff --git a/Makefile b/Makefile
index deaa0ced97..031ad94c3b 100644
--- a/Makefile
+++ b/Makefile
@@ -97,20 +97,6 @@ test-update-aws:
 
 test-update-all: test-update test-update-aws
 
-bench100:
-	BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
-
-bench1k:
-	BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
-
-bench100_summary:
-	make bench100 | tee out.bench100.txt
-	./tools/bench_compare.py out.bench100.txt
-
-bench1k_summary:
-	make bench1k | tee out.bench1k.txt
-	./tools/bench_compare.py out.bench1k.txt
-
 slowest:
 	${GO_TOOL} gotestsum tool slowest --jsonfile test-output.json --threshold 1s --num 50
 
@@ -198,3 +184,34 @@ test-exp-ssh:
 
 test-pipelines:
 	make test TEST_PACKAGES="./cmd/pipelines/..." ACCEPTANCE_TEST_FILTER="TestAccept/pipelines"
+
+
+# Benchmarks:
+
+bench1k:
+	BENCHMARK_PARAMS="--jobs 1000" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
+bench100:
+	BENCHMARK_PARAMS="--jobs 100" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
+# small benchmark to quickly test benchmark-related code
+bench10:
+	BENCHMARK_PARAMS="--jobs 10" go test ./acceptance -v -tail -run TestAccept/bundle/benchmarks -timeout=120m
+
+bench1k.log:
+	make bench1k | tee $@
+
+bench100.log:
+	make bench100 | tee $@
+
+bench10.log:
+	make bench10 | tee $@
+
+bench1k_summary: bench1k.log
+	./tools/bench_parse.py $<
+
+bench100_summary: bench100.log
+	./tools/bench_parse.py $<
+
+bench10_summary: bench10.log
+	./tools/bench_parse.py $<
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index c4528b5b01..438b559c5b 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -702,9 +702,6 @@ func runTest(t *testing.T,
 
 		skipRepls := false
 		if relPath == internal.MaterializedConfigFile {
-			if benchmarkMode {
-				continue
-			}
 			skipRepls = true
 		}
 		doComparison(t, repls, dir, tmpDir, relPath, &printedRepls, skipRepls)
diff --git a/tools/bench_parse.py b/tools/bench_parse.py
new file mode 100755
index 0000000000..3d7165fe17
--- /dev/null
+++ b/tools/bench_parse.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+"""
+Parses output of benchmark runs (e.g. "make bench100") and prints a summary table.
+"""
+
+import sys
+import re
+import statistics
+from collections import defaultdict
+
+
+def parse_key_values(text):
+    """Parse key=value pairs from a string.
+
+    >>> parse_key_values("wall=10.316  ru_utime=19.207  ru_stime=0.505  ru_maxrss=573079552")
+    {'wall': 10.316, 'ru_utime': 19.207, 'ru_stime': 0.505, 'ru_maxrss': 573079552.0}
+    """
+    result = {}
+    for kv_pair in text.split():
+        if "=" in kv_pair:
+            key, value = kv_pair.split("=", 1)
+            try:
+                result[key] = float(value)
+            except ValueError:
+                result[key] = value
+    return result
+
+
+def parse_bench_output(file_path):
+    """Parse benchmark output and extract test results."""
+    results = defaultdict(lambda: defaultdict(list))
+
+    current_test = None
+
+    with open(file_path) as f:
+        for line in f:
+            # Match test name
+            test_match = re.match(r"=== RUN\s+(.+)", line)
+            if test_match:
+                current_test = test_match.group(1)
+                current_test = current_test.removeprefix("TestAccept/bundle/benchmarks/")
+                continue
+
+            # Match benchmark run data (only count runs, skip warm)
+            if "TESTLOG: Run #" in line and "(count)" in line:
+                if current_test:
+                    # Extract everything after the run label
+                    parts = line.split("(count):")
+                    if len(parts) == 2:
+                        kv_data = parse_key_values(parts[1].strip())
+                        for key, value in kv_data.items():
+                            results[current_test][key].append(value)
+
+    return results
+
+
+def calculate_means(results):
+    """Calculate mean values for each metric."""
+    means = {}
+    for test_name, metrics in results.items():
+        means[test_name] = {metric: statistics.mean(values) if values else 0 for metric, values in metrics.items()}
+    return means
+
+
+def print_results(results):
+    """Output table for single file."""
+    means = calculate_means(results)
+
+    all_metrics = {}
+    for metrics in means.values():
+        for key in metrics:
+            all_metrics.setdefault(key, None)
+    all_metrics = list(all_metrics.keys())
+
+    # Calculate column widths
+    testname_width = max(len("testname"), max((len(name) for name in means.keys()), default=0))
+    metric_width = 12
+
+    # Print header
+    header = f"{'testname':<{testname_width}}"
+    for metric in all_metrics:
+        header += f"  {metric:>{metric_width}}"
+    print(header)
+    print("-" * len(header))
+
+    # Print rows
+    for test_name in sorted(means.keys()):
+        m = means[test_name]
+        row = f"{test_name:<{testname_width}}"
+        for metric in all_metrics:
+            value = m.get(metric, 0)
+            if isinstance(value, float) and value > 1000000:
+                row += f"  {value:>{metric_width}.0f}"
+            else:
+                row += f"  {value:>{metric_width}.3f}"
+        print(row)
+
+
+def main():
+    for filename in sys.argv[1:]:
+        results = parse_bench_output(filename)
+        print_results(results)
+
+
+if __name__ == "__main__":
+    main()

From 146344cf7bdf5666f228570978efd582df38eaf2 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 13:35:42 +0100
Subject: [PATCH 05/17] add a comment

---
 acceptance/bundle/benchmarks/deploy/script | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/acceptance/bundle/benchmarks/deploy/script b/acceptance/bundle/benchmarks/deploy/script
index 88cd42bced..5de5a7d955 100755
--- a/acceptance/bundle/benchmarks/deploy/script
+++ b/acceptance/bundle/benchmarks/deploy/script
@@ -1,3 +1,6 @@
 gen_config.py ${BENCHMARK_PARAMS:-} > databricks.yml
 wc -l databricks.yml >> LOG.wc
+# Note, since testserver persists state for the duration of the test, .databricks is kept and benchmark.py skips first run as a warmup, this measures time
+# it takes for no-changes deploy.
+# Note, terraform is set up by the test runner, so this time does not include TF download time.
 benchmark.py $CLI bundle deploy

From 056b6c12cd5e669ca09b482d05f214280cf427a7 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 13:40:48 +0100
Subject: [PATCH 06/17] lint fix

---
 acceptance/acceptance_test.go | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
index 438b559c5b..0885d0c6e4 100644
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@@ -331,11 +331,7 @@ func testAccept(t *testing.T, inprocessMode bool, singleTest string) int {
 				t.Skip(skipReason)
 			}
 
-			runParallel := true
-
-			if inprocessMode {
-				runParallel = false
-			}
+			runParallel := !inprocessMode
 
 			if benchmarkMode && strings.Contains(dir, "benchmark") {
 				runParallel = false

From bcd6c7c3ebaab897c0c1bbb6d5dd06ac92e0600d Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 13:55:54 +0100
Subject: [PATCH 07/17] update

---
 acceptance/bin/gen_config.py                 |  41 ++-
 acceptance/selftest/benchmark/out.test.toml  |   5 +
 acceptance/selftest/benchmark/output.txt     |   0
 acceptance/selftest/benchmark/script         |   1 +
 acceptance/selftest/gen_config/out.test.toml |   5 +
 acceptance/selftest/gen_config/output.txt    | 266 +++++++++++++++++++
 acceptance/selftest/gen_config/script        |   1 +
 7 files changed, 312 insertions(+), 7 deletions(-)
 create mode 100644 acceptance/selftest/benchmark/out.test.toml
 create mode 100644 acceptance/selftest/benchmark/output.txt
 create mode 100644 acceptance/selftest/benchmark/script
 create mode 100644 acceptance/selftest/gen_config/out.test.toml
 create mode 100644 acceptance/selftest/gen_config/output.txt
 create mode 100644 acceptance/selftest/gen_config/script

diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py
index d2f6f6b79c..ae7badae5a 100755
--- a/acceptance/bin/gen_config.py
+++ b/acceptance/bin/gen_config.py
@@ -105,19 +105,46 @@ def gen_config(n):
     return config
 
 
+def yaml_dump(obj, indent=0, list_item=False):
+    lines = []
+    indent_str = "  " * indent
+
+    if isinstance(obj, dict):
+        first = True
+        for key, value in obj.items():
+            if list_item and first:
+                prefix = indent_str + "- "
+                first = False
+            elif list_item:
+                prefix = indent_str + "  "
+            else:
+                prefix = indent_str
+            nested_indent = indent + 2 if list_item else indent + 1
+            if isinstance(value, (dict, list)) and value:
+                lines.append(f"{prefix}{key}:")
+                lines.append(yaml_dump(value, nested_indent))
+            else:
+                lines.append(f"{prefix}{key}: {json.dumps(value)}")
+    elif isinstance(obj, list):
+        for item in obj:
+            if isinstance(item, (dict, list)):
+                lines.append(yaml_dump(item, indent, list_item=True))
+            else:
+                lines.append(f"{indent_str}- {json.dumps(item)}")
+    else:
+        prefix = f"{indent_str}- " if list_item else indent_str
+        return f"{prefix}{json.dumps(obj)}"
+
+    return "\n".join(line for line in lines if line)
+
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate")
     args = parser.parse_args()
 
     config = gen_config(args.jobs)
-
-    import yaml
-
-    try:
-        print(yaml.dump(config, default_flow_style=False, sort_keys=False))
-    except ImportError:
-        print(json.dumps(config, indent=2))
+    print(yaml_dump(config))
 
 
 if __name__ == "__main__":
diff --git a/acceptance/selftest/benchmark/out.test.toml b/acceptance/selftest/benchmark/out.test.toml
new file mode 100644
index 0000000000..d560f1de04
--- /dev/null
+++ b/acceptance/selftest/benchmark/out.test.toml
@@ -0,0 +1,5 @@
+Local = true
+Cloud = false
+
+[EnvMatrix]
+  DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/selftest/benchmark/output.txt b/acceptance/selftest/benchmark/output.txt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/acceptance/selftest/benchmark/script b/acceptance/selftest/benchmark/script
new file mode 100644
index 0000000000..be2387c22b
--- /dev/null
+++ b/acceptance/selftest/benchmark/script
@@ -0,0 +1 @@
+benchmark.py --runs 3 'true'
diff --git a/acceptance/selftest/gen_config/out.test.toml b/acceptance/selftest/gen_config/out.test.toml
new file mode 100644
index 0000000000..d560f1de04
--- /dev/null
+++ b/acceptance/selftest/gen_config/out.test.toml
@@ -0,0 +1,5 @@
+Local = true
+Cloud = false
+
+[EnvMatrix]
+  DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/selftest/gen_config/output.txt b/acceptance/selftest/gen_config/output.txt
new file mode 100644
index 0000000000..15041262c6
--- /dev/null
+++ b/acceptance/selftest/gen_config/output.txt
@@ -0,0 +1,266 @@
+bundle:
+  name: "test-bundle"
+resources:
+  jobs:
+    job_0:
+      description: "This job contain multiple tasks that are required to produce the weekly shark sightings report."
+      email_notifications:
+        no_alert_for_skipped_runs: false
+        on_failure:
+          - "user.name@databricks.com"
+        on_success:
+          - "user.name@databricks.com"
+      job_clusters:
+        - job_cluster_key: "auto_scaling_cluster"
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+      max_concurrent_runs: 10
+      name: "job_0"
+      notification_settings:
+        no_alert_for_canceled_runs: false
+        no_alert_for_skipped_runs: false
+      parameters:
+        - default: "users"
+          name: "table"
+      tags:
+        cost-center: "engineering"
+        team: "jobs"
+      tasks:
+        - depends_on: []
+          description: "Extracts session data from events"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/Sessionize.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.Sessionize"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/data.json"
+          task_key: "Sessionize"
+          timeout_seconds: 86400
+        - depends_on: []
+          description: "Ingests order data"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/OrderIngest.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.OrdersIngest"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/order-data.json"
+          task_key: "Orders_Ingest"
+          timeout_seconds: 86400
+        - depends_on:
+            - task_key: "Orders_Ingest"
+            - task_key: "Sessionize"
+          description: "Matches orders with user sessions"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+          notebook_task:
+            base_parameters:
+              age: "35"
+              name: "John Doe"
+            notebook_path: "/Users/user.name@databricks.com/Match"
+          retry_on_timeout: false
+          run_if: "ALL_SUCCESS"
+          task_key: "Match"
+          timeout_seconds: 86400
+      timeout_seconds: 86400
+      schedule:
+        pause_status: "UNPAUSED"
+        quartz_cron_expression: "20 30 * * * ?"
+        timezone_id: "Europe/London"
+    job_1:
+      description: "This job contain multiple tasks that are required to produce the weekly shark sightings report."
+      email_notifications:
+        no_alert_for_skipped_runs: false
+        on_failure:
+          - "user.name@databricks.com"
+        on_success:
+          - "user.name@databricks.com"
+      job_clusters:
+        - job_cluster_key: "auto_scaling_cluster"
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+      max_concurrent_runs: 10
+      name: "job_1"
+      notification_settings:
+        no_alert_for_canceled_runs: false
+        no_alert_for_skipped_runs: false
+      parameters:
+        - default: "users"
+          name: "table"
+      tags:
+        cost-center: "engineering"
+        team: "jobs"
+      tasks:
+        - depends_on: []
+          description: "Extracts session data from events"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/Sessionize.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.Sessionize"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/data.json"
+          task_key: "Sessionize"
+          timeout_seconds: 86400
+        - depends_on: []
+          description: "Ingests order data"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/OrderIngest.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.OrdersIngest"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/order-data.json"
+          task_key: "Orders_Ingest"
+          timeout_seconds: 86400
+        - depends_on:
+            - task_key: "Orders_Ingest"
+            - task_key: "Sessionize"
+          description: "Matches orders with user sessions"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+          notebook_task:
+            base_parameters:
+              age: "35"
+              name: "John Doe"
+            notebook_path: "/Users/user.name@databricks.com/Match"
+          retry_on_timeout: false
+          run_if: "ALL_SUCCESS"
+          task_key: "Match"
+          timeout_seconds: 86400
+      timeout_seconds: 86400
+      continuous:
+        pause_status: "UNPAUSED"
+    job_2:
+      description: "This job contain multiple tasks that are required to produce the weekly shark sightings report."
+      email_notifications:
+        no_alert_for_skipped_runs: false
+        on_failure:
+          - "user.name@databricks.com"
+        on_success:
+          - "user.name@databricks.com"
+      job_clusters:
+        - job_cluster_key: "auto_scaling_cluster"
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+      max_concurrent_runs: 10
+      name: "job_2"
+      notification_settings:
+        no_alert_for_canceled_runs: false
+        no_alert_for_skipped_runs: false
+      parameters:
+        - default: "users"
+          name: "table"
+      tags:
+        cost-center: "engineering"
+        team: "jobs"
+      tasks:
+        - depends_on: []
+          description: "Extracts session data from events"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/Sessionize.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.Sessionize"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/data.json"
+          task_key: "Sessionize"
+          timeout_seconds: 86400
+        - depends_on: []
+          description: "Ingests order data"
+          job_cluster_key: "auto_scaling_cluster"
+          libraries:
+            - jar: "dbfs:/mnt/databricks/OrderIngest.jar"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          retry_on_timeout: false
+          spark_jar_task:
+            main_class_name: "com.databricks.OrdersIngest"
+            parameters:
+              - "--data"
+              - "dbfs:/path/to/order-data.json"
+          task_key: "Orders_Ingest"
+          timeout_seconds: 86400
+        - depends_on:
+            - task_key: "Orders_Ingest"
+            - task_key: "Sessionize"
+          description: "Matches orders with user sessions"
+          max_retries: 3
+          min_retry_interval_millis: 2000
+          new_cluster:
+            autoscale:
+              max_workers: 16
+              min_workers: 2
+            node_type_id: "[NODE_TYPE_ID]"
+            spark_conf:
+              spark.speculation: "true"
+            spark_version: "13.3.x-scala2.12"
+          notebook_task:
+            base_parameters:
+              age: "35"
+              name: "John Doe"
+            notebook_path: "/Users/user.name@databricks.com/Match"
+          retry_on_timeout: false
+          run_if: "ALL_SUCCESS"
+          task_key: "Match"
+          timeout_seconds: 86400
+      timeout_seconds: 86400
+      schedule:
+        pause_status: "UNPAUSED"
+        quartz_cron_expression: "20 30 * * * ?"
+        timezone_id: "Europe/London"
diff --git a/acceptance/selftest/gen_config/script b/acceptance/selftest/gen_config/script
new file mode 100644
index 0000000000..c2f3177005
--- /dev/null
+++ b/acceptance/selftest/gen_config/script
@@ -0,0 +1 @@
+gen_config.py --jobs 3

From f4851700fcbe503349a0b65928095c4d9fac00f7 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 13:57:21 +0100
Subject: [PATCH 08/17] replace dump with print

---
 acceptance/bin/gen_config.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/acceptance/bin/gen_config.py b/acceptance/bin/gen_config.py
index ae7badae5a..84a7262271 100755
--- a/acceptance/bin/gen_config.py
+++ b/acceptance/bin/gen_config.py
@@ -105,8 +105,7 @@ def gen_config(n):
     return config
 
 
-def yaml_dump(obj, indent=0, list_item=False):
-    lines = []
+def print_yaml(obj, indent=0, list_item=False):
     indent_str = "  " * indent
 
     if isinstance(obj, dict):
@@ -121,21 +120,19 @@ def yaml_dump(obj, indent=0, list_item=False):
                 prefix = indent_str
             nested_indent = indent + 2 if list_item else indent + 1
             if isinstance(value, (dict, list)) and value:
-                lines.append(f"{prefix}{key}:")
-                lines.append(yaml_dump(value, nested_indent))
+                print(f"{prefix}{key}:")
+                print_yaml(value, nested_indent)
             else:
-                lines.append(f"{prefix}{key}: {json.dumps(value)}")
+                print(f"{prefix}{key}: {json.dumps(value)}")
     elif isinstance(obj, list):
         for item in obj:
             if isinstance(item, (dict, list)):
-                lines.append(yaml_dump(item, indent, list_item=True))
+                print_yaml(item, indent, list_item=True)
             else:
-                lines.append(f"{indent_str}- {json.dumps(item)}")
+                print(f"{indent_str}- {json.dumps(item)}")
     else:
         prefix = f"{indent_str}- " if list_item else indent_str
-        return f"{prefix}{json.dumps(obj)}"
-
-    return "\n".join(line for line in lines if line)
+        print(f"{prefix}{json.dumps(obj)}")
 
 
 def main():
@@ -143,8 +140,7 @@ def main():
     parser.add_argument("--jobs", type=int, default=10, help="Number of jobs to generate")
     args = parser.parse_args()
 
-    config = gen_config(args.jobs)
-    print(yaml_dump(config))
+    print_yaml(gen_config(args.jobs))
 
 
 if __name__ == "__main__":

From 252cf1a9902afa0fcfeb3732842f14338402af20 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 13:59:12 +0100
Subject: [PATCH 09/17] update README

---
 acceptance/README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/acceptance/README.md b/acceptance/README.md
index c8c17e310c..8e33d273a7 100644
--- a/acceptance/README.md
+++ b/acceptance/README.md
@@ -22,6 +22,19 @@ Any file starting with "LOG" will be logged to test log (visible with go test -v
 
 See [selftest](./selftest) for more examples.
 
+## Benchmarks
+
+Benchmarks are regular acceptance test that log measurements in certain format. The output can be fed to `tools/bench_parse.py` to print a summary table.
+
+Test runner recognizes benchmark as having "benchmark" anywhere in the path. For these tests parallel execution is disabled if and only if BENCHMARK\_PARAMS variable is set.
+
+The benchmarks make use of two scripts:
+
+- `gen_config.py —jobs N` to generate a config with N jobs
+- `benchmark.py` command to run command a few times and log the time measurements.
+
+The default number of runs in benchmark.py depends on BENCHMARK\_PARAMS variable. If it’s set, the default number is 5. Otherwise it is 1.
+
 ## Running acceptance tests on Windows
 
 To run the acceptance tests from a terminal on Windows (eg. Git Bash from VS Code),

From 1ec802f63a78e60b8b8ce5b7643a2405f3a4de05 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 14:02:07 +0100
Subject: [PATCH 10/17] update

---
 acceptance/bundle/benchmarks/validate/output.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/acceptance/bundle/benchmarks/validate/output.txt b/acceptance/bundle/benchmarks/validate/output.txt
index 4b7537e96c..1ef997149b 100644
--- a/acceptance/bundle/benchmarks/validate/output.txt
+++ b/acceptance/bundle/benchmarks/validate/output.txt
@@ -1,14 +1,14 @@
 
 >>> head -n 10 databricks.yml
 bundle:
-  name: test-bundle
+  name: "test-bundle"
 resources:
   jobs:
     job_0:
-      description: This job contain multiple tasks that are required to produce the
-        weekly shark sightings report.
+      description: "This job contain multiple tasks that are required to produce the weekly shark sightings report."
       email_notifications:
         no_alert_for_skipped_runs: false
         on_failure:
+          - "user.name@databricks.com"
 
 >>> benchmark.py [CLI] bundle validate

From 61f673310aa75aa612f0a8442a7acdd5a4f61766 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 14:04:58 +0100
Subject: [PATCH 11/17] clean up[

---
 acceptance/bundle/benchmarks/test.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml
index 6575d7cf93..fbcf2aad73 100644
--- a/acceptance/bundle/benchmarks/test.toml
+++ b/acceptance/bundle/benchmarks/test.toml
@@ -1,3 +1,2 @@
 Timeout = '4h'
 Ignore = ["databricks.yml"]
-BundleConfig.default_name = ""

From 415cf3c733056b32a97473f52f924baaeeea3aed Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 14:09:52 +0100
Subject: [PATCH 12/17] clean up

---
 tools/bench_parse.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tools/bench_parse.py b/tools/bench_parse.py
index 3d7165fe17..534fbe36af 100755
--- a/tools/bench_parse.py
+++ b/tools/bench_parse.py
@@ -34,14 +34,12 @@ def parse_bench_output(file_path):
 
     with open(file_path) as f:
         for line in f:
-            # Match test name
             test_match = re.match(r"=== RUN\s+(.+)", line)
             if test_match:
                 current_test = test_match.group(1)
                 current_test = current_test.removeprefix("TestAccept/bundle/benchmarks/")
                 continue
 
-            # Match benchmark run data (only count runs, skip warm)
             if "TESTLOG: Run #" in line and "(count)" in line:
                 if current_test:
                     # Extract everything after the run label
@@ -72,18 +70,15 @@ def print_results(results):
             all_metrics.setdefault(key, None)
     all_metrics = list(all_metrics.keys())
 
-    # Calculate column widths
     testname_width = max(len("testname"), max((len(name) for name in means.keys()), default=0))
     metric_width = 12
 
-    # Print header
     header = f"{'testname':<{testname_width}}"
     for metric in all_metrics:
         header += f"  {metric:>{metric_width}}"
     print(header)
     print("-" * len(header))
 
-    # Print rows
     for test_name in sorted(means.keys()):
         m = means[test_name]
         row = f"{test_name:<{testname_width}}"

From 348c810956e3e073dbacb48813b7f5031be56d23 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 14:11:10 +0100
Subject: [PATCH 13/17] add a comment

---
 acceptance/bin/benchmark.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py
index 576bafc811..b6c263ae01 100755
--- a/acceptance/bin/benchmark.py
+++ b/acceptance/bin/benchmark.py
@@ -13,6 +13,7 @@ def run_benchmark(command, warmup, runs):
     times = []
 
     for i in range(runs):
+        # double fork to reset max statistics like ru_maxrss
         cp = subprocess.run([sys.executable, sys.argv[0], "--once"] + command, stdout=subprocess.PIPE)
         if cp.returncode != 0:
             sys.exit(cp.returncode)

From a59fa0dc60456d7a63fbdc22cef792ecccfb09b1 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 14:21:05 +0100
Subject: [PATCH 14/17] fix windows - resource is not available

---
 acceptance/bin/benchmark.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py
index b6c263ae01..5d34fc89df 100755
--- a/acceptance/bin/benchmark.py
+++ b/acceptance/bin/benchmark.py
@@ -5,9 +5,14 @@
 import statistics
 import sys
 import os
-import resource
 import json
 
+try:
+    import resource
+except ImportError:
+    # n/a on windows
+    resource = None
+
 
 def run_benchmark(command, warmup, runs):
     times = []
@@ -55,7 +60,8 @@ def run_once(command):
     else:
         shell = False
 
-    rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN)
+    if resource:
+        rusage_before = resource.getrusage(resource.RUSAGE_CHILDREN)
 
     with open("LOG.process", "a") as log:
         start = time.perf_counter()
@@ -66,15 +72,19 @@ def run_once(command):
         print(f"Error: command failed with exit code {result.returncode}", file=sys.stderr)
         sys.exit(result.returncode)
 
-    rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
+    result = {"wall": end - start}
+
+    if resource:
+        rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
+
+        result.update({
+            "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime,
+            "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime,
+            # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process
+            "ru_maxrss": rusage_after.ru_maxrss,
+        })
 
-    return {
-        "wall": end - start,
-        "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime,
-        "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime,
-        # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process
-        "ru_maxrss": rusage_after.ru_maxrss,
-    }
+    return result
 
 
 def main():

From 2682b4de27c2d727584f4404ef66c7598ec4da5a Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Fri, 2 Jan 2026 16:08:32 +0100
Subject: [PATCH 15/17] formatting

---
 acceptance/bin/benchmark.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/acceptance/bin/benchmark.py b/acceptance/bin/benchmark.py
index 5d34fc89df..bad87fd8b2 100755
--- a/acceptance/bin/benchmark.py
+++ b/acceptance/bin/benchmark.py
@@ -77,12 +77,14 @@ def run_once(command):
     if resource:
         rusage_after = resource.getrusage(resource.RUSAGE_CHILDREN)
 
-        result.update({
-            "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime,
-            "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime,
-            # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process
-            "ru_maxrss": rusage_after.ru_maxrss,
-        })
+        result.update(
+            {
+                "ru_utime": rusage_after.ru_utime - rusage_before.ru_utime,
+                "ru_stime": rusage_after.ru_stime - rusage_before.ru_stime,
+                # maxrss returns largest process, so subtracting is not correct since rusage_before will be reporting different process
+                "ru_maxrss": rusage_after.ru_maxrss,
+            }
+        )
 
     return result
 

From 0a604d66a2a4590aeb70f0fdc9cfd19ff6042a6d Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Mon, 5 Jan 2026 09:58:01 +0100
Subject: [PATCH 16/17] disable on Windows

---
 acceptance/bundle/benchmarks/test.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/acceptance/bundle/benchmarks/test.toml b/acceptance/bundle/benchmarks/test.toml
index fbcf2aad73..9f94c55600 100644
--- a/acceptance/bundle/benchmarks/test.toml
+++ b/acceptance/bundle/benchmarks/test.toml
@@ -1,2 +1,5 @@
 Timeout = '4h'
 Ignore = ["databricks.yml"]
+
+# Disabled because it fails on CI. We don't need this to work on Windows.
+GOOS.windows = false

From 290713ef512124e62bf781406d98f02f3b471060 Mon Sep 17 00:00:00 2001
From: Denis Bilenko <denis.bilenko@databricks.com>
Date: Mon, 5 Jan 2026 10:11:27 +0100
Subject: [PATCH 17/17] update out file

---
 acceptance/bundle/benchmarks/deploy/out.test.toml   | 3 +++
 acceptance/bundle/benchmarks/plan/out.test.toml     | 3 +++
 acceptance/bundle/benchmarks/validate/out.test.toml | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/acceptance/bundle/benchmarks/deploy/out.test.toml b/acceptance/bundle/benchmarks/deploy/out.test.toml
index d560f1de04..40bb0d1047 100644
--- a/acceptance/bundle/benchmarks/deploy/out.test.toml
+++ b/acceptance/bundle/benchmarks/deploy/out.test.toml
@@ -1,5 +1,8 @@
 Local = true
 Cloud = false
 
+[GOOS]
+  windows = false
+
 [EnvMatrix]
   DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/bundle/benchmarks/plan/out.test.toml b/acceptance/bundle/benchmarks/plan/out.test.toml
index d560f1de04..40bb0d1047 100644
--- a/acceptance/bundle/benchmarks/plan/out.test.toml
+++ b/acceptance/bundle/benchmarks/plan/out.test.toml
@@ -1,5 +1,8 @@
 Local = true
 Cloud = false
 
+[GOOS]
+  windows = false
+
 [EnvMatrix]
   DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]
diff --git a/acceptance/bundle/benchmarks/validate/out.test.toml b/acceptance/bundle/benchmarks/validate/out.test.toml
index d560f1de04..40bb0d1047 100644
--- a/acceptance/bundle/benchmarks/validate/out.test.toml
+++ b/acceptance/bundle/benchmarks/validate/out.test.toml
@@ -1,5 +1,8 @@
 Local = true
 Cloud = false
 
+[GOOS]
+  windows = false
+
 [EnvMatrix]
   DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"]