dev: Add benchmark for compilation profiles (#17754)

2010YOUY01 · web-flow · commit 691dd4727d43 · 2025-09-26T11:21:19.000Z
* Add benchmark for compilation profiles

* add apache header

* add apache header
diff --git a/Cargo.toml b/Cargo.toml
@@ -179,13 +179,56 @@ testcontainers-modules = { version = "0.12" }
 tokio = { version = "1.47", features = ["macros", "rt", "sync"] }
 url = "2.5.7"
 
+[workspace.lints.clippy]
+# Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
+large_futures = "warn"
+used_underscore_binding = "warn"
+or_fun_call = "warn"
+unnecessary_lazy_evaluations = "warn"
+uninlined_format_args = "warn"
+inefficient_to_string = "warn"
+
+[workspace.lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = [
+    'cfg(datafusion_coop, values("tokio", "tokio_fallback", "per_stream"))',
+    "cfg(tarpaulin)",
+    "cfg(tarpaulin_include)",
+] }
+unused_qualifications = "deny"
+
+# --------------------
+# Compilation Profiles
+# --------------------
+#  A Cargo profile is a preset for the compiler/linker knobs that trade off:
+# - Build time: how quickly code compiles and links
+# - Runtime performance: how fast the resulting binaries execute
+# - Binary size: how large the executables end up
+# - Debuggability: how much debug information is preserved for debugging and profiling
+#
+# Profiles available:
+# - dev: default debug build; fastest to compile, slowest to run, full debug info
+#     for everyday development.
+#     Run: cargo run
+# - release: optimized build; slowest to compile, fastest to run, smallest
+#     binaries for public releases.
+#     Run: cargo run --release
+# - release-nonlto: skips LTO, so it builds quicker while staying close to
+#     release performance. It is useful when developing performance optimizations.
+#     Run: cargo run --profile release-nonlto
+# - profiling: inherits release optimizations but retains debug info to support
+#     profiling tools and flamegraphs.
+#     Run: cargo run --profile profiling
+# - ci: derived from `dev` but disables incremental builds and strips dependency
+#     symbols to keep CI artifacts small and reproducible.
+#     Run: cargo run --profile ci
+#
+# If you want to optimize compilation, the `compile_profile` benchmark can be useful.
+# See `benchmarks/README.md` for more details.
 [profile.release]
 codegen-units = 1
 lto = true
 strip = true      # Eliminate debug information to minimize binary size
 
-# the release profile takes a long time to build so we can use this profile during development to save time
-# cargo build --profile release-nonlto
 [profile.release-nonlto]
 codegen-units = 16
 debug-assertions = false
@@ -202,33 +245,16 @@ debug = false
 inherits = "dev"
 incremental = false
 
-# ci turns off debug info, etc. for dependencies to allow for smaller binaries making caching more effective
+# This rule applies to every package except workspace members (dependencies
+# such as `arrow` and `tokio`). It disables debug info and related features on
+# dependencies so their binaries stay smaller, improving cache reuse.
 [profile.ci.package."*"]
 debug = false
 debug-assertions = false
 strip = "debuginfo"
 incremental = false
 
-# release inherited profile keeping debug information and symbols
-# for mem/cpu profiling
 [profile.profiling]
 inherits = "release"
 debug = true
 strip = false
-
-[workspace.lints.clippy]
-# Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
-large_futures = "warn"
-used_underscore_binding = "warn"
-or_fun_call = "warn"
-unnecessary_lazy_evaluations = "warn"
-uninlined_format_args = "warn"
-inefficient_to_string = "warn"
-
-[workspace.lints.rust]
-unexpected_cfgs = { level = "warn", check-cfg = [
-    'cfg(datafusion_coop, values("tokio", "tokio_fallback", "per_stream"))',
-    "cfg(tarpaulin)",
-    "cfg(tarpaulin_include)",
-] }
-unused_qualifications = "deny"
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -87,6 +87,39 @@ To run for specific query, for example Q21
 ./bench.sh run tpch10 21
 ```
 
+## Compile profile benchmark
+
+Generate the data required for the compile profile helper (TPC-H SF=1):
+
+```shell
+./bench.sh data compile_profile
+```
+
+Run the benchmark across all default Cargo profiles (`dev`, `release`, `ci`, `release-nonlto`):
+
+```shell
+./bench.sh run compile_profile
+```
+
+Limit the run to a single profile:
+
+```shell
+./bench.sh run compile_profile dev
+```
+
+Or specify a subset of profiles:
+
+```shell
+./bench.sh run compile_profile dev release
+```
+
+You can also invoke the helper directly if you need to customise arguments further:
+
+```shell
+./benchmarks/compile_profile.py --profiles dev release --data /path/to/tpch_sf1
+```
+
+
 ## Benchmark with modified configurations
 
 ### Select join algorithm
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
@@ -125,6 +125,7 @@ imdb:                   Join Order Benchmark (JOB) using the IMDB dataset conver
 # Micro-Benchmarks (specific operators and features)
 cancellation:           How long cancelling a query takes
 nlj:                    Benchmark for simple nested loop joins, testing various join scenarios
+compile_profile:        Compile and execute TPC-H across selected Cargo profiles, reporting timing and binary size
 
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Supported Configuration (Environment Variables)
@@ -304,6 +305,9 @@ main() {
                     # nlj uses range() function, no data generation needed
                     echo "NLJ benchmark does not require data generation"
                     ;;
+                compile_profile)
+                    data_tpch "1"
+                    ;;
                 *)
                     echo "Error: unknown benchmark '$BENCHMARK' for data generation"
                     usage
@@ -313,20 +317,32 @@ main() {
         run)
             # Parse positional parameters
             BENCHMARK=${ARG2:-"${BENCHMARK}"}
+            EXTRA_ARGS=("${POSITIONAL_ARGS[@]:2}")
+            PROFILE_ARGS=()
+            QUERY=""
+            QUERY_ARG=""
+            if [ "$BENCHMARK" = "compile_profile" ]; then
+                PROFILE_ARGS=("${EXTRA_ARGS[@]}")
+            else
+                QUERY=${EXTRA_ARGS[0]}
+                if [ -n "$QUERY" ]; then
+                    QUERY_ARG="--query ${QUERY}"
+                fi
+            fi
             BRANCH_NAME=$(cd "${DATAFUSION_DIR}" && git rev-parse --abbrev-ref HEAD)
             BRANCH_NAME=${BRANCH_NAME//\//_} # mind blowing syntax to replace / with _
             RESULTS_NAME=${RESULTS_NAME:-"${BRANCH_NAME}"}
             RESULTS_DIR=${RESULTS_DIR:-"$SCRIPT_DIR/results/$RESULTS_NAME"}
 
-            # Optional query filter to run specific query
-            QUERY=${ARG3}
-            QUERY_ARG=$([ -n "$QUERY" ] && echo "--query ${QUERY}" || echo "")
-
             echo "***************************"
             echo "DataFusion Benchmark Script"
             echo "COMMAND: ${COMMAND}"
             echo "BENCHMARK: ${BENCHMARK}"
-            echo "QUERY: ${QUERY:-All}"
+            if [ "$BENCHMARK" = "compile_profile" ]; then
+                echo "PROFILES: ${PROFILE_ARGS[*]:-All}"
+            else
+                echo "QUERY: ${QUERY:-All}"
+            fi
             echo "DATAFUSION_DIR: ${DATAFUSION_DIR}"
             echo "BRANCH_NAME: ${BRANCH_NAME}"
             echo "DATA_DIR: ${DATA_DIR}"
@@ -468,6 +484,9 @@ main() {
                 nlj)
                     run_nlj
                     ;;
+                compile_profile)
+                    run_compile_profile "${PROFILE_ARGS[@]}"
+                    ;;
                 *)
                     echo "Error: unknown benchmark '$BENCHMARK' for run"
                     usage
@@ -593,6 +612,20 @@ run_tpch_mem() {
     debug_run $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" ${QUERY_ARG}
 }
 
+# Runs the compile profile benchmark helper
+run_compile_profile() {
+    local profiles=("$@")
+    local runner="${SCRIPT_DIR}/compile_profile.py"
+    local data_path="${DATA_DIR}/tpch_sf1"
+
+    echo "Running compile profile benchmark..."
+    local cmd=(python3 "${runner}" --data "${data_path}")
+    if [ ${#profiles[@]} -gt 0 ]; then
+        cmd+=(--profiles "${profiles[@]}")
+    fi
+    debug_run "${cmd[@]}"
+}
+
 # Runs the cancellation benchmark
 run_cancellation() {
     RESULTS_FILE="${RESULTS_DIR}/cancellation.json"
diff --git a/benchmarks/compile_profile.py b/benchmarks/compile_profile.py