diff --git a/benchmarks/Dockerfile b/benchmarks/Dockerfile
deleted file mode 100644
index 704c863d20..0000000000
--- a/benchmarks/Dockerfile
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-FROM apache/datafusion-comet:0.7.0-spark3.5.5-scala2.12-java11
-
-RUN apt update \
-    && apt install -y git python3 python3-pip \
-    && apt clean
-
-RUN cd /opt \
-    && git clone https://github.com/apache/datafusion-benchmarks.git
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 7e2dfc9f2b..f1e8d39db6 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -17,88 +17,177 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# Running Comet Benchmarks in Microk8s
+# Comet Benchmark Suite
+
+Unified benchmark infrastructure for Apache DataFusion Comet. Supports
+TPC-H/TPC-DS and shuffle benchmarks across multiple engines (Spark, Comet,
+Gluten) with composable configuration and optional memory profiling.
+
+## Quick Start
+
+```bash
+# Run TPC-H with Comet on a standalone cluster
+python benchmarks/run.py \
+    --engine comet --profile standalone-tpch --restart-cluster \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --iterations 1
+
+# Preview the spark-submit command without executing
+python benchmarks/run.py \
+    --engine comet --profile standalone-tpch --dry-run \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --iterations 1
+```
+
+## Directory Layout
+
+```
+benchmarks/
+├── run.py                 # Entry point — builds and runs spark-submit
+├── conf/
+│   ├── engines/           # Per-engine configs (comet, spark, gluten, ...)
+│   └── profiles/          # Per-environment configs (local, standalone, docker)
+├── runner/
+│   ├── cli.py             # Python CLI passed to spark-submit (subcommands: tpc, shuffle, micro)
+│   ├── config.py          # Config file loader and merger
+│   ├── spark_session.py   # SparkSession builder
+│   └── profiling.py       # Level 1 JVM metrics via Spark REST API
+├── suites/
+│   ├── tpc.py             # TPC-H / TPC-DS benchmark suite
+│   ├── shuffle.py         # Shuffle benchmark suite (hash, round-robin)
+│   └── micro.py           # Microbenchmark suite (string expressions, ...)
+├── analysis/
+│   ├── compare.py         # Generate comparison charts from result JSON
+│   └── memory_report.py   # Generate memory reports from profiling CSV
+├── infra/
+│   ├── docker/            # Dockerfile, docker-compose, metrics collector
+├── create-iceberg-tpch.py # Utility: convert TPC-H Parquet to Iceberg tables
+└── drop-caches.sh         # Utility: drop OS page caches before benchmarks
+```
 
-This guide explains how to run benchmarks derived from TPC-H and TPC-DS in Apache DataFusion Comet deployed in a
-local Microk8s cluster.
+## How It Works
 
-## Use Microk8s locally
+`run.py` is the single entry point. It:
 
-Install Micro8s following the instructions at https://microk8s.io/docs/getting-started and then perform these
-additional steps, ensuring that any existing kube config is backed up first.
+1. Reads a **profile** config (cluster shape, memory, master URL)
+2. Reads an **engine** config (plugin JARs, shuffle manager, engine-specific settings)
+3. Applies any `--conf key=value` CLI overrides (highest precedence)
+4. Builds and executes the `spark-submit` command
 
-```shell
-mkdir -p ~/.kube
-microk8s config > ~/.kube/config
+The merge order is: **profile < engine < CLI overrides**, so engine configs
+can override profile defaults (e.g., an engine can set `offHeap.enabled=false`
+even though the profile enables it).
 
-microk8s enable dns
-microk8s enable registry
+### Wrapper arguments (before `--`)
 
-microk8s kubectl create serviceaccount spark
-```
+| Flag                | Description                                     |
+| ------------------- | ----------------------------------------------- |
+| `--engine NAME`     | Engine config from `conf/engines/NAME.conf`     |
+| `--profile NAME`    | Profile config from `conf/profiles/NAME.conf`   |
+| `--conf key=value`  | Extra Spark/runner config override (repeatable) |
+| `--restart-cluster` | Stop/start Spark standalone master + worker     |
+| `--dry-run`         | Print spark-submit command without executing    |
+
+### Suite arguments (after `--`)
+
+Everything after `--` is passed to `runner/cli.py`. See per-suite docs:
+
+- [TPC-H / TPC-DS](suites/TPC.md)
+- [Shuffle](suites/SHUFFLE.md)
+- [Microbenchmarks](suites/MICRO.md)
+
+## Available Engines
+
+| Engine                 | Config file                         | Description                       |
+| ---------------------- | ----------------------------------- | --------------------------------- |
+| `spark`                | `engines/spark.conf`                | Vanilla Spark (no accelerator)    |
+| `comet`                | `engines/comet.conf`                | DataFusion Comet with native scan |
+| `comet-iceberg`        | `engines/comet-iceberg.conf`        | Comet + native Iceberg scanning   |
+| `gluten`               | `engines/gluten.conf`               | Gluten (Velox backend) — Java 8   |
+| `spark-shuffle`        | `engines/spark-shuffle.conf`        | Spark baseline for shuffle tests  |
+| `comet-jvm-shuffle`    | `engines/comet-jvm-shuffle.conf`    | Comet with JVM shuffle mode       |
+| `comet-native-shuffle` | `engines/comet-native-shuffle.conf` | Comet with native shuffle         |
+
+## Available Profiles
 
-## Build Comet Docker Image
+| Profile            | Config file                      | Description                    |
+| ------------------ | -------------------------------- | ------------------------------ |
+| `local`            | `profiles/local.conf`            | `local[*]` mode, no cluster    |
+| `standalone-tpch`  | `profiles/standalone-tpch.conf`  | 1 executor, 8 cores, S3A       |
+| `standalone-tpcds` | `profiles/standalone-tpcds.conf` | 2 executors, 16 cores, S3A     |
+| `docker`           | `profiles/docker.conf`           | For docker-compose deployments |
 
-Run the following command from the root of this repository to build the Comet Docker image, or use a published
-Docker image from https://github.com/orgs/apache/packages?repo_name=datafusion-comet
+## Environment Variables
 
-```shell
-docker build -t apache/datafusion-comet -f kube/Dockerfile .
+The config files use `${VAR}` references that are expanded from the
+environment at load time:
+
+| Variable       | Used by              | Description                       |
+| -------------- | -------------------- | --------------------------------- |
+| `SPARK_HOME`   | `run.py`             | Path to Spark installation        |
+| `SPARK_MASTER` | standalone profiles  | Spark master URL                  |
+| `COMET_JAR`    | comet engines        | Path to Comet JAR                 |
+| `GLUTEN_JAR`   | gluten engine        | Path to Gluten JAR                |
+| `ICEBERG_JAR`  | comet-iceberg engine | Path to Iceberg Spark runtime JAR |
+
+## Profiling
+
+Add `--profile` (the flag, not the config) to any suite command to enable
+Level 1 JVM metrics collection via the Spark REST API:
+
+```bash
+python benchmarks/run.py --engine comet --profile standalone-tpch \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --iterations 1 --profile --profile-interval 1.0
 ```
 
-## Build Comet Benchmark Docker Image
+This writes a `{name}-{benchmark}-metrics.csv` alongside the result JSON.
+
+For container-level memory profiling, use the constrained docker-compose
+overlay — see [Docker infrastructure](infra/docker/).
+
+## Generating Charts
 
-Build the benchmark Docker image and push to the Microk8s Docker registry.
+```bash
+# Compare two result JSON files
+python -m benchmarks.analysis.compare \
+    comet-tpch-*.json spark-tpch-*.json \
+    --labels Comet Spark --benchmark tpch \
+    --title "TPC-H SF100" --output-dir ./charts
 
-```shell
-docker build -t apache/datafusion-comet-tpcbench  .
-docker tag apache/datafusion-comet-tpcbench localhost:32000/apache/datafusion-comet-tpcbench:latest
-docker push localhost:32000/apache/datafusion-comet-tpcbench:latest
+# Generate memory reports
+python -m benchmarks.analysis.memory_report \
+    --spark-csv comet-tpch-metrics.csv \
+    --container-csv container-metrics.csv \
+    --output-dir ./charts
 ```
 
-## Run benchmarks
-
-```shell
-export SPARK_MASTER=k8s://https://127.0.0.1:16443
-export COMET_DOCKER_IMAGE=localhost:32000/apache/datafusion-comet-tpcbench:latest
-# Location of Comet JAR within the Docker image
-export COMET_JAR=/opt/spark/jars/comet-spark-spark3.4_2.12-0.5.0-SNAPSHOT.jar
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --deploy-mode cluster  \
-    --name comet-tpcbench \
-    --driver-memory 8G \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.memory=32G \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.task.cpus=1 \
-    --conf spark.executor.memoryOverhead=3G \
-    --jars local://$COMET_JAR \
-    --conf spark.executor.extraClassPath=$COMET_JAR \
-    --conf spark.driver.extraClassPath=$COMET_JAR \
-    --conf spark.plugins=org.apache.spark.CometPlugin \
-    --conf spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions \
-    --conf spark.comet.enabled=true \
-    --conf spark.comet.exec.enabled=true \
-    --conf spark.comet.exec.all.enabled=true \
-    --conf spark.comet.cast.allowIncompatible=true \
-    --conf spark.comet.exec.shuffle.enabled=true \
-    --conf spark.comet.exec.shuffle.mode=auto \
-    --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-    --conf spark.kubernetes.namespace=default \
-    --conf spark.kubernetes.driver.pod.name=tpcbench  \
-    --conf spark.kubernetes.container.image=$COMET_DOCKER_IMAGE \
-    --conf spark.kubernetes.driver.volumes.hostPath.tpcdata.mount.path=/mnt/bigdata/tpcds/sf100/ \
-    --conf spark.kubernetes.driver.volumes.hostPath.tpcdata.options.path=/mnt/bigdata/tpcds/sf100/ \
-    --conf spark.kubernetes.executor.volumes.hostPath.tpcdata.mount.path=/mnt/bigdata/tpcds/sf100/ \
-    --conf spark.kubernetes.executor.volumes.hostPath.tpcdata.options.path=/mnt/bigdata/tpcds/sf100/ \
-    --conf spark.kubernetes.authenticate.caCertFile=/var/snap/microk8s/current/certs/ca.crt \
-    local:///opt/datafusion-benchmarks/runners/datafusion-comet/tpcbench.py \
-    --benchmark tpcds \
-    --data /mnt/bigdata/tpcds/sf100/ \
-    --queries /opt/datafusion-benchmarks/tpcds/queries-spark \
-    --iterations 1
+## Running in Docker
+
+See [infra/docker/](infra/docker/) for docker-compose setup with optional
+memory-constrained overlays and cgroup metrics collection.
+
+The Docker image includes both Java 8 and Java 17 runtimes. Java 17 is the
+default (`JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64`), which is required
+by Comet. Gluten requires Java 8, so override `JAVA_HOME` for all containers
+when running Gluten benchmarks:
+
+```bash
+# Start the cluster with Java 8 for Gluten
+docker compose -f benchmarks/infra/docker/docker-compose.yml up -d
+
+# Run Gluten benchmark (override JAVA_HOME on all containers)
+docker compose -f benchmarks/infra/docker/docker-compose.yml run --rm \
+    -e JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 \
+    -e GLUTEN_JAR=/jars/gluten.jar \
+    bench bash -c 'python3 /opt/benchmarks/run.py \
+        --engine gluten --profile docker \
+        -- tpc --name gluten --benchmark tpch --data /data \
+           --queries /queries --output /results --iterations 1'
 ```
+
+> **Note:** The Spark worker must also run Java 8 for Gluten. Use a
+> docker-compose override file to set `JAVA_HOME` on `spark-master` and
+> `spark-worker` services before starting the cluster, or restart the
+> cluster between engine switches.
+
diff --git a/benchmarks/analysis/__init__.py b/benchmarks/analysis/__init__.py
new file mode 100644
index 0000000000..0ccbeeeafb
--- /dev/null
+++ b/benchmarks/analysis/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmarks/analysis/compare.py b/benchmarks/analysis/compare.py
new file mode 100644
index 0000000000..b9a24acc57
--- /dev/null
+++ b/benchmarks/analysis/compare.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Compare benchmark results and generate charts.
+
+Reads the JSON output produced by ``suites/tpc.py`` (integer query keys
+serialised as strings by ``json.dumps``).
+
+Usage::
+
+    python -m benchmarks.analysis.compare \\
+        comet-tpch-*.json spark-tpch-*.json \\
+        --labels comet spark --benchmark tpch --title "SF100" \\
+        --output-dir ./charts
+"""
+
+import argparse
+import json
+import os
+import sys
+from typing import Any, Dict, List, Sequence, Tuple
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+QUERY_COUNTS = {"tpch": 22, "tpcds": 99}
+
+
+def _query_range(benchmark: str) -> range:
+    n = QUERY_COUNTS.get(benchmark)
+    if n is None:
+        raise ValueError(f"Unknown benchmark: {benchmark}")
+    return range(1, n + 1)
+
+
+def _median(timings: List[float]) -> float:
+    return float(np.median(np.array(timings)))
+
+
+# ---------------------------------------------------------------------------
+# Chart generators
+# ---------------------------------------------------------------------------
+
+def generate_summary_chart(
+    results: Sequence[Dict[str, Any]],
+    labels: Sequence[str],
+    benchmark: str,
+    title: str,
+    output_dir: str = ".",
+) -> str:
+    """Total wall-clock bar chart.  Returns the output path."""
+    num_queries = QUERY_COUNTS[benchmark]
+    timings = [0.0] * len(results)
+    for query in _query_range(benchmark):
+        for i, r in enumerate(results):
+            timings[i] += _median(r[str(query)])
+
+    fig, ax = plt.subplots(figsize=(10, 6))
+    ax.set_title(title)
+    ax.set_ylabel(
+        f"Time in seconds to run all {num_queries} {benchmark} queries "
+        f"(lower is better)"
+    )
+    times = [round(x, 0) for x in timings]
+    bars = ax.bar(labels, times, color="skyblue", width=0.8)
+    for bar in bars:
+        yval = bar.get_height()
+        ax.text(
+            bar.get_x() + bar.get_width() / 2.0, yval, f"{yval}",
+            va="bottom", ha="center",
+        )
+    path = os.path.join(output_dir, f"{benchmark}_allqueries.png")
+    plt.savefig(path, format="png")
+    plt.close(fig)
+    return path
+
+
+def generate_comparison_chart(
+    results: Sequence[Dict[str, Any]],
+    labels: Sequence[str],
+    benchmark: str,
+    title: str,
+    output_dir: str = ".",
+) -> str:
+    """Per-query grouped bar chart.  Returns the output path."""
+    queries: List[str] = []
+    benches: List[List[float]] = [[] for _ in results]
+    for query in _query_range(benchmark):
+        queries.append(f"q{query}")
+        for i, r in enumerate(results):
+            benches[i].append(_median(r[str(query)]))
+
+    bar_width = 0.3
+    index = np.arange(len(queries)) * 1.5
+    fig_w = 15 if benchmark == "tpch" else 35
+    fig, ax = plt.subplots(figsize=(fig_w, 6))
+
+    for i, label in enumerate(labels):
+        ax.bar(index + i * bar_width, benches[i], bar_width, label=label)
+
+    ax.set_title(title)
+    ax.set_xlabel("Queries")
+    ax.set_ylabel("Query Time (seconds)")
+    ax.set_xticks(index + bar_width / 2)
+    ax.set_xticklabels(queries)
+    ax.legend()
+
+    path = os.path.join(output_dir, f"{benchmark}_queries_compare.png")
+    plt.savefig(path, format="png")
+    plt.close(fig)
+    return path
+
+
+def _speedup_data(
+    baseline: Dict, comparison: Dict, benchmark: str, absolute: bool,
+) -> Tuple[List[str], List[float]]:
+    """Compute per-query speedup (relative % or absolute seconds)."""
+    rows: List[Tuple[str, float]] = []
+    for query in _query_range(benchmark):
+        a = _median(baseline[str(query)])
+        b = _median(comparison[str(query)])
+        if absolute:
+            rows.append((f"q{query}", round(a - b, 1)))
+        else:
+            if a > b:
+                speedup = a / b - 1
+            else:
+                speedup = -(1 / (a / b) - 1)
+            rows.append((f"q{query}", round(speedup * 100, 0)))
+    rows.sort(key=lambda x: -x[1])
+    qs, vals = zip(*rows)
+    return list(qs), list(vals)
+
+
+def generate_speedup_chart(
+    baseline: Dict[str, Any],
+    comparison: Dict[str, Any],
+    label1: str,
+    label2: str,
+    benchmark: str,
+    title: str,
+    absolute: bool = False,
+    output_dir: str = ".",
+) -> str:
+    """Relative (%) or absolute (seconds) speedup chart.  Returns path."""
+    queries, speedups = _speedup_data(baseline, comparison, benchmark, absolute)
+
+    fig_w = 10 if benchmark == "tpch" else 35
+    fig_h = 6 if benchmark == "tpch" else 10
+    fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+    bars = ax.bar(queries, speedups, color="skyblue")
+
+    for bar, val in zip(bars, speedups):
+        yval = bar.get_height()
+        fmt = f"{val:.1f}" if absolute else f"{val:.0f}%"
+        va = "bottom" if yval >= 0 else "top"
+        y = min(800, yval + 5) if yval >= 0 else yval
+        ax.text(
+            bar.get_x() + bar.get_width() / 2.0, y, fmt,
+            va=va, ha="center", fontsize=8, color="blue", rotation=90,
+        )
+
+    kind = "seconds" if absolute else "percentage"
+    suffix = "abs" if absolute else "rel"
+    ylabel = "Speedup (in seconds)" if absolute else "Speedup Percentage (100% speedup = 2x faster)"
+    ax.set_title(f"{label2} speedup over {label1} ({title})")
+    ax.set_ylabel(ylabel)
+    ax.set_xlabel("Query")
+    ax.axhline(0, color="black", linewidth=0.8)
+    ax.yaxis.grid(True)
+
+    if not absolute:
+        min_val = (min(speedups) // 100) * 100
+        max_val = ((max(speedups) // 100) + 1) * 100 + 50
+        if benchmark == "tpch":
+            ax.set_ylim(min_val, max_val)
+        else:
+            ax.set_ylim(-250, 300)
+    else:
+        ax.set_ylim(min(speedups) * 2 - 20, max(speedups) * 1.5)
+
+    path = os.path.join(output_dir, f"{benchmark}_queries_speedup_{suffix}.png")
+    plt.savefig(path, format="png")
+    plt.close(fig)
+    return path
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def compare(
+    files: Sequence[str],
+    labels: Sequence[str],
+    benchmark: str,
+    title: str,
+    output_dir: str = ".",
+) -> List[str]:
+    """Run all applicable charts.  Returns list of output file paths."""
+    os.makedirs(output_dir, exist_ok=True)
+    results = []
+    for filename in files:
+        with open(filename) as f:
+            results.append(json.load(f))
+
+    paths = [
+        generate_summary_chart(results, labels, benchmark, title, output_dir),
+        generate_comparison_chart(results, labels, benchmark, title, output_dir),
+    ]
+
+    if len(files) == 2:
+        paths.append(
+            generate_speedup_chart(
+                results[0], results[1], labels[0], labels[1],
+                benchmark, title, absolute=True, output_dir=output_dir,
+            )
+        )
+        paths.append(
+            generate_speedup_chart(
+                results[0], results[1], labels[0], labels[1],
+                benchmark, title, absolute=False, output_dir=output_dir,
+            )
+        )
+
+    for p in paths:
+        print(f"Wrote {p}")
+    return paths
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description="Compare benchmark results and generate charts",
+    )
+    parser.add_argument("filenames", nargs="+", help="JSON result files")
+    parser.add_argument("--labels", nargs="+", required=True, help="Labels for each file")
+    parser.add_argument("--benchmark", required=True, help="tpch or tpcds")
+    parser.add_argument("--title", required=True, help="Chart title")
+    parser.add_argument("--output-dir", default=".", help="Directory for chart PNGs")
+    args = parser.parse_args(argv)
+
+    if len(args.filenames) != len(args.labels):
+        parser.error("Number of filenames must match number of labels")
+
+    compare(args.filenames, args.labels, args.benchmark, args.title, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/analysis/memory_report.py b/benchmarks/analysis/memory_report.py
new file mode 100644
index 0000000000..e77ff10dc0
--- /dev/null
+++ b/benchmarks/analysis/memory_report.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Parse profiling output and generate memory utilisation reports.
+
+Supports two data sources:
+
+1. **Spark REST API metrics** — CSV written by ``runner/profiling.py``
+   (``SparkMetricsProfiler``).  Columns include ``elapsed_secs``,
+   ``executor_id``, ``memoryUsed``, ``maxMemory``, and various peak metrics.
+
+2. **Container cgroup metrics** — CSV written by
+   ``infra/docker/collect-metrics.sh``.  Columns:
+   ``timestamp_ms, memory_usage_bytes, memory_limit_bytes, rss_bytes,
+   cache_bytes, swap_bytes``.
+
+Usage::
+
+    python -m benchmarks.analysis.memory_report \\
+        --spark-csv results/comet-tpch-metrics.csv \\
+        --container-csv results/container-metrics.csv \\
+        --output-dir ./charts
+"""
+
+import argparse
+import csv
+import os
+import sys
+from typing import Dict, List, Optional
+
+import matplotlib.pyplot as plt
+
+
+# ---------------------------------------------------------------------------
+# Spark REST API metrics
+# ---------------------------------------------------------------------------
+
+def parse_spark_csv(path: str) -> Dict[str, List[Dict]]:
+    """Parse a SparkMetricsProfiler CSV into per-executor time series.
+
+    Returns ``{executor_id: [{elapsed_secs, memoryUsed, maxMemory, ...}]}``
+    """
+    executors: Dict[str, List[Dict]] = {}
+    with open(path, newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            eid = row.get("executor_id", "unknown")
+            parsed = {}
+            for k, v in row.items():
+                try:
+                    parsed[k] = float(v)
+                except (ValueError, TypeError):
+                    parsed[k] = v
+            executors.setdefault(eid, []).append(parsed)
+    return executors
+
+
+def generate_spark_memory_chart(
+    spark_csv: str,
+    output_dir: str = ".",
+) -> List[str]:
+    """Generate per-executor memory usage over time.  Returns output paths."""
+    executors = parse_spark_csv(spark_csv)
+    paths = []
+
+    for eid, samples in executors.items():
+        elapsed = [s.get("elapsed_secs", 0) for s in samples]
+        used = [s.get("memoryUsed", 0) / (1024 ** 2) for s in samples]  # MB
+        max_mem = [s.get("maxMemory", 0) / (1024 ** 2) for s in samples]
+
+        fig, ax = plt.subplots(figsize=(12, 5))
+        ax.plot(elapsed, used, label="memoryUsed", linewidth=1.5)
+        if any(m > 0 for m in max_mem):
+            ax.plot(elapsed, max_mem, label="maxMemory", linestyle="--", alpha=0.6)
+        ax.set_xlabel("Elapsed (seconds)")
+        ax.set_ylabel("Memory (MB)")
+        ax.set_title(f"Executor {eid} — JVM Memory Usage")
+        ax.legend()
+        ax.grid(True, alpha=0.3)
+
+        fname = f"spark_memory_executor_{eid}.png"
+        path = os.path.join(output_dir, fname)
+        plt.savefig(path, format="png")
+        plt.close(fig)
+        paths.append(path)
+
+    # Peak memory bar chart across executors
+    if executors:
+        fig, ax = plt.subplots(figsize=(max(6, len(executors) * 1.5), 5))
+        eids = list(executors.keys())
+        peaks = []
+        for eid in eids:
+            peak = max(
+                (s.get("peak_JVMHeapMemory", 0) + s.get("peak_JVMOffHeapMemory", 0))
+                for s in executors[eid]
+            ) / (1024 ** 2)
+            peaks.append(peak)
+
+        bars = ax.bar(eids, peaks, color="coral")
+        for bar, val in zip(bars, peaks):
+            ax.text(
+                bar.get_x() + bar.get_width() / 2.0, val,
+                f"{val:.0f}", va="bottom", ha="center", fontsize=9,
+            )
+        ax.set_xlabel("Executor")
+        ax.set_ylabel("Peak JVM Memory (MB)")
+        ax.set_title("Peak JVM Memory by Executor")
+        ax.grid(True, axis="y", alpha=0.3)
+
+        path = os.path.join(output_dir, "spark_memory_peak.png")
+        plt.savefig(path, format="png")
+        plt.close(fig)
+        paths.append(path)
+
+    for p in paths:
+        print(f"Wrote {p}")
+    return paths
+
+
+# ---------------------------------------------------------------------------
+# Container cgroup metrics
+# ---------------------------------------------------------------------------
+
+def parse_container_csv(path: str) -> List[Dict[str, float]]:
+    """Parse a collect-metrics.sh CSV into a list of samples."""
+    samples = []
+    with open(path, newline="") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            parsed = {}
+            for k, v in row.items():
+                try:
+                    parsed[k] = float(v)
+                except (ValueError, TypeError):
+                    parsed[k] = v
+            samples.append(parsed)
+    return samples
+
+
+def generate_container_memory_chart(
+    container_csv: str,
+    output_dir: str = ".",
+) -> List[str]:
+    """Generate container memory usage over time.  Returns output paths."""
+    samples = parse_container_csv(container_csv)
+    if not samples:
+        print("No container metrics samples found")
+        return []
+
+    t0 = samples[0].get("timestamp_ms", 0)
+    elapsed = [(s.get("timestamp_ms", 0) - t0) / 1000.0 for s in samples]
+    usage_mb = [s.get("memory_usage_bytes", 0) / (1024 ** 2) for s in samples]
+    rss_mb = [s.get("rss_bytes", 0) / (1024 ** 2) for s in samples]
+    cache_mb = [s.get("cache_bytes", 0) / (1024 ** 2) for s in samples]
+    limit_mb = [s.get("memory_limit_bytes", 0) / (1024 ** 2) for s in samples]
+
+    fig, ax = plt.subplots(figsize=(12, 5))
+    ax.plot(elapsed, usage_mb, label="total usage", linewidth=1.5)
+    ax.plot(elapsed, rss_mb, label="RSS", linewidth=1.2)
+    ax.plot(elapsed, cache_mb, label="cache", linewidth=1.0, alpha=0.7)
+    if any(m > 0 for m in limit_mb):
+        ax.axhline(
+            limit_mb[0], color="red", linestyle="--", linewidth=1.0,
+            label=f"limit ({limit_mb[0]:.0f} MB)",
+        )
+    ax.set_xlabel("Elapsed (seconds)")
+    ax.set_ylabel("Memory (MB)")
+    ax.set_title("Container Memory Usage (cgroup)")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    paths = []
+    path = os.path.join(output_dir, "container_memory.png")
+    plt.savefig(path, format="png")
+    plt.close(fig)
+    paths.append(path)
+
+    # Summary stats
+    peak_usage = max(usage_mb)
+    peak_rss = max(rss_mb)
+    limit = limit_mb[0] if limit_mb else 0
+    print(f"Container memory summary:")
+    print(f"  Peak usage:  {peak_usage:.0f} MB")
+    print(f"  Peak RSS:    {peak_rss:.0f} MB")
+    if limit > 0:
+        print(f"  Limit:       {limit:.0f} MB")
+        print(f"  Peak % used: {peak_usage / limit * 100:.1f}%")
+
+    for p in paths:
+        print(f"Wrote {p}")
+    return paths
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description="Generate memory utilisation reports from profiling data",
+    )
+    parser.add_argument(
+        "--spark-csv", help="Path to SparkMetricsProfiler CSV",
+    )
+    parser.add_argument(
+        "--container-csv", help="Path to collect-metrics.sh CSV",
+    )
+    parser.add_argument(
+        "--output-dir", default=".", help="Directory for chart PNGs",
+    )
+    args = parser.parse_args(argv)
+
+    if not args.spark_csv and not args.container_csv:
+        parser.error("At least one of --spark-csv or --container-csv is required")
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    if args.spark_csv:
+        generate_spark_memory_chart(args.spark_csv, args.output_dir)
+    if args.container_csv:
+        generate_container_memory_chart(args.container_csv, args.output_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/conf/engines/comet-iceberg.conf b/benchmarks/conf/engines/comet-iceberg.conf
new file mode 100644
index 0000000000..bfdfd4ccf6
--- /dev/null
+++ b/benchmarks/conf/engines/comet-iceberg.conf
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# DataFusion Comet with native Iceberg scanning.
+# Catalog configs (spark.sql.catalog.*, spark.sql.defaultCatalog) should be
+# passed via --conf CLI overrides since the catalog name is user-specific.
+runner.name=comet-iceberg
+runner.jars=${COMET_JAR},${ICEBERG_JAR}
+
+spark.driver.extraClassPath=${COMET_JAR}:${ICEBERG_JAR}
+spark.executor.extraClassPath=${COMET_JAR}:${ICEBERG_JAR}
+spark.plugins=org.apache.spark.CometPlugin
+spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
+spark.comet.exec.replaceSortMergeJoin=true
+spark.comet.expression.Cast.allowIncompatible=true
+spark.comet.enabled=true
+spark.comet.exec.enabled=true
+spark.comet.scan.icebergNative.enabled=true
+spark.comet.explainFallback.enabled=true
diff --git a/benchmarks/conf/engines/comet-jvm-shuffle.conf b/benchmarks/conf/engines/comet-jvm-shuffle.conf
new file mode 100644
index 0000000000..12b3d23a18
--- /dev/null
+++ b/benchmarks/conf/engines/comet-jvm-shuffle.conf
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Comet with JVM shuffle mode — for shuffle benchmarks.
+runner.name=comet-jvm-shuffle
+runner.jars=${COMET_JAR}
+
+spark.driver.extraClassPath=${COMET_JAR}
+spark.executor.extraClassPath=${COMET_JAR}
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
+spark.comet.enabled=true
+spark.comet.operator.DataWritingCommandExec.allowIncompatible=true
+spark.comet.parquet.write.enabled=true
+spark.comet.logFallbackReasons.enabled=true
+spark.comet.explainFallback.enabled=true
+spark.comet.shuffle.mode=jvm
+spark.comet.exec.shuffle.mode=jvm
+spark.comet.exec.replaceSortMergeJoin=true
+spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
+spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions
+spark.comet.cast.allowIncompatible=true
diff --git a/benchmarks/conf/engines/comet-native-shuffle.conf b/benchmarks/conf/engines/comet-native-shuffle.conf
new file mode 100644
index 0000000000..0df2eac0c6
--- /dev/null
+++ b/benchmarks/conf/engines/comet-native-shuffle.conf
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Comet with native shuffle mode — for shuffle benchmarks.
+runner.name=comet-native-shuffle
+runner.jars=${COMET_JAR}
+
+spark.driver.extraClassPath=${COMET_JAR}
+spark.executor.extraClassPath=${COMET_JAR}
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
+spark.comet.enabled=true
+spark.comet.operator.DataWritingCommandExec.allowIncompatible=true
+spark.comet.parquet.write.enabled=true
+spark.comet.logFallbackReasons.enabled=true
+spark.comet.explainFallback.enabled=true
+spark.comet.exec.shuffle.mode=native
+spark.comet.exec.replaceSortMergeJoin=true
+spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
+spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions
+spark.comet.cast.allowIncompatible=true
diff --git a/benchmarks/conf/engines/comet.conf b/benchmarks/conf/engines/comet.conf
new file mode 100644
index 0000000000..257fd7dd56
--- /dev/null
+++ b/benchmarks/conf/engines/comet.conf
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# DataFusion Comet accelerator.
+runner.name=comet
+runner.jars=${COMET_JAR}
+
+spark.driver.extraClassPath=${COMET_JAR}
+spark.executor.extraClassPath=${COMET_JAR}
+spark.plugins=org.apache.spark.CometPlugin
+spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
+spark.comet.scan.impl=native_datafusion
+spark.comet.exec.replaceSortMergeJoin=true
+spark.comet.expression.Cast.allowIncompatible=true
diff --git a/benchmarks/conf/engines/gluten.conf b/benchmarks/conf/engines/gluten.conf
new file mode 100644
index 0000000000..91599c5bde
--- /dev/null
+++ b/benchmarks/conf/engines/gluten.conf
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Gluten accelerator.
+runner.name=gluten
+runner.jars=${GLUTEN_JAR}
+runner.env.TZ=UTC
+
+spark.driver.extraClassPath=${GLUTEN_JAR}
+spark.executor.extraClassPath=${GLUTEN_JAR}
+spark.plugins=org.apache.gluten.GlutenPlugin
+spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager
+spark.gluten.sql.columnar.forceShuffledHashJoin=true
+spark.sql.session.timeZone=UTC
diff --git a/benchmarks/conf/engines/spark-shuffle.conf b/benchmarks/conf/engines/spark-shuffle.conf
new file mode 100644
index 0000000000..2b087a129b
--- /dev/null
+++ b/benchmarks/conf/engines/spark-shuffle.conf
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Vanilla Spark baseline for shuffle benchmarks — Comet explicitly disabled.
+runner.name=spark-shuffle
+
+spark.comet.enabled=false
+spark.comet.exec.shuffle.enabled=false
diff --git a/benchmarks/conf/engines/spark.conf b/benchmarks/conf/engines/spark.conf
new file mode 100644
index 0000000000..e1831c4ae5
--- /dev/null
+++ b/benchmarks/conf/engines/spark.conf
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Vanilla Spark — no accelerator plugin.
+runner.name=spark
diff --git a/benchmarks/conf/profiles/docker.conf b/benchmarks/conf/profiles/docker.conf
new file mode 100644
index 0000000000..9b2bec6841
--- /dev/null
+++ b/benchmarks/conf/profiles/docker.conf
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Profile for running inside docker-compose (see infra/docker/).
+# Data is mounted at /data, queries at /queries, results at /results.
+runner.master=${SPARK_MASTER}
+
+spark.driver.memory=8G
+spark.executor.instances=1
+spark.executor.cores=8
+spark.cores.max=8
+spark.executor.memory=16g
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
+spark.eventLog.enabled=true
diff --git a/benchmarks/conf/profiles/local.conf b/benchmarks/conf/profiles/local.conf
new file mode 100644
index 0000000000..75bb8454b3
--- /dev/null
+++ b/benchmarks/conf/profiles/local.conf
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Local mode — runs on local[*] with no master URL.
+runner.master=local[*]
+
+spark.driver.memory=8G
+spark.executor.memory=16g
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
diff --git a/benchmarks/conf/profiles/standalone-tpcds.conf b/benchmarks/conf/profiles/standalone-tpcds.conf
new file mode 100644
index 0000000000..c892a7e77f
--- /dev/null
+++ b/benchmarks/conf/profiles/standalone-tpcds.conf
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Standalone cluster profile for TPC-DS: 2 executors, 16 cores total.
+runner.master=${SPARK_MASTER}
+
+spark.driver.memory=8G
+spark.executor.instances=2
+spark.executor.cores=8
+spark.cores.max=16
+spark.executor.memory=16g
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
+spark.eventLog.enabled=true
+spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
+spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain
diff --git a/benchmarks/conf/profiles/standalone-tpch.conf b/benchmarks/conf/profiles/standalone-tpch.conf
new file mode 100644
index 0000000000..024a7364f3
--- /dev/null
+++ b/benchmarks/conf/profiles/standalone-tpch.conf
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Standalone cluster profile for TPC-H: 1 executor, 8 cores.
+runner.master=${SPARK_MASTER}
+
+spark.driver.memory=8G
+spark.executor.instances=1
+spark.executor.cores=8
+spark.cores.max=8
+spark.executor.memory=16g
+spark.memory.offHeap.enabled=true
+spark.memory.offHeap.size=16g
+spark.eventLog.enabled=true
+spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
+spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain
diff --git a/dev/benchmarks/create-iceberg-tpch.py b/benchmarks/create-iceberg-tpch.py
similarity index 100%
rename from dev/benchmarks/create-iceberg-tpch.py
rename to benchmarks/create-iceberg-tpch.py
diff --git a/dev/benchmarks/drop-caches.sh b/benchmarks/drop-caches.sh
similarity index 100%
rename from dev/benchmarks/drop-caches.sh
rename to benchmarks/drop-caches.sh
diff --git a/benchmarks/pyspark/generate_data.py b/benchmarks/generate_shuffle_data.py
old mode 100755
new mode 100644
similarity index 100%
rename from benchmarks/pyspark/generate_data.py
rename to benchmarks/generate_shuffle_data.py
diff --git a/benchmarks/infra/docker/Dockerfile b/benchmarks/infra/docker/Dockerfile
new file mode 100644
index 0000000000..861411819a
--- /dev/null
+++ b/benchmarks/infra/docker/Dockerfile
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Unified benchmark image for running TPC and shuffle benchmarks across
+# engines (Spark, Comet, Gluten).
+#
+# Build:
+#   docker build -t comet-bench -f benchmarks/infra/docker/Dockerfile .
+#
+# The build context should be the repository root so that benchmarks/ is
+# available.
+
+ARG SPARK_IMAGE=apache/spark:3.5.2-python3
+FROM ${SPARK_IMAGE}
+
+USER root
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends python3 python3-pip procps \
+       openjdk-8-jre-headless openjdk-17-jre-headless \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Default to Java 17 (Comet). Override with JAVA_HOME for other engines.
+ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
+
+# Copy the unified benchmark runner into the image.
+COPY benchmarks/conf     /opt/benchmarks/conf
+COPY benchmarks/runner   /opt/benchmarks/runner
+COPY benchmarks/suites   /opt/benchmarks/suites
+COPY benchmarks/queries  /opt/benchmarks/queries
+COPY benchmarks/run.py   /opt/benchmarks/run.py
+
+# Copy the metrics collector script.
+COPY benchmarks/infra/docker/collect-metrics.sh /opt/benchmarks/collect-metrics.sh
+RUN chmod +x /opt/benchmarks/collect-metrics.sh
+
+# Engine JARs are bind-mounted or copied in at runtime via --jars.
+# Data and query paths are also bind-mounted.
+
+ENV PYTHONPATH="/opt:${PYTHONPATH}"
+
+WORKDIR /opt/benchmarks
+
+USER ${spark_uid}
diff --git a/benchmarks/infra/docker/collect-metrics.sh b/benchmarks/infra/docker/collect-metrics.sh
new file mode 100755
index 0000000000..fd9c1d848f
--- /dev/null
+++ b/benchmarks/infra/docker/collect-metrics.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Container-level memory metrics collector.
+#
+# Polls cgroup memory stats at a fixed interval and writes a CSV with
+# columns: timestamp, memory_usage_bytes, memory_limit_bytes, rss_bytes,
+# cache_bytes, swap_bytes.
+#
+# Works with both cgroup v1 and v2.
+#
+# Usage:
+#   collect-metrics.sh [INTERVAL_SECS] [OUTPUT_CSV]
+#
+# Defaults: interval=1, output=/results/container-metrics.csv
+
+set -e
+
+INTERVAL="${1:-1}"
+OUTPUT="${2:-/results/container-metrics.csv}"
+
+# Detect cgroup version
+if [ -f /sys/fs/cgroup/memory/memory.usage_in_bytes ]; then
+    CGROUP_VERSION=1
+elif [ -f /sys/fs/cgroup/memory.current ]; then
+    CGROUP_VERSION=2
+else
+    echo "Warning: cannot detect cgroup memory files; polling disabled" >&2
+    # Still write a header so downstream tools don't break on a missing file.
+    echo "timestamp_ms,memory_usage_bytes,memory_limit_bytes,rss_bytes,cache_bytes,swap_bytes" > "$OUTPUT"
+    # Sleep forever so the container stays up (compose expects it to keep running).
+    exec sleep infinity
+fi
+
+# ---- helpers ----
+
+read_file() {
+    # Return the contents of a file, or "0" if it doesn't exist.
+    if [ -f "$1" ]; then cat "$1"; else echo "0"; fi
+}
+
+read_stat() {
+    # Extract a named field from memory.stat (cgroup v1 format: "key value").
+    grep "^$1 " "$2" 2>/dev/null | awk '{print $2}' || echo "0"
+}
+
+poll_v1() {
+    local usage limit rss cache swap
+    usage=$(read_file /sys/fs/cgroup/memory/memory.usage_in_bytes)
+    limit=$(read_file /sys/fs/cgroup/memory/memory.limit_in_bytes)
+    local stat=/sys/fs/cgroup/memory/memory.stat
+    rss=$(read_stat total_rss "$stat")
+    cache=$(read_stat total_cache "$stat")
+    swap=$(read_file /sys/fs/cgroup/memory/memory.memsw.usage_in_bytes)
+    # swap file reports memory+swap; subtract memory to get swap only
+    if [ "$swap" != "0" ]; then
+        swap=$((swap - usage))
+        [ "$swap" -lt 0 ] && swap=0
+    fi
+    echo "$usage,$limit,$rss,$cache,$swap"
+}
+
+poll_v2() {
+    local usage limit rss cache swap
+    usage=$(read_file /sys/fs/cgroup/memory.current)
+    limit=$(read_file /sys/fs/cgroup/memory.max)
+    [ "$limit" = "max" ] && limit=0
+    local stat=/sys/fs/cgroup/memory.stat
+    rss=$(read_stat anon "$stat")
+    cache=$(read_stat file "$stat")
+    swap=$(read_file /sys/fs/cgroup/memory.swap.current)
+    echo "$usage,$limit,$rss,$cache,$swap"
+}
+
+# ---- main loop ----
+
+echo "timestamp_ms,memory_usage_bytes,memory_limit_bytes,rss_bytes,cache_bytes,swap_bytes" > "$OUTPUT"
+echo "Collecting container memory metrics every ${INTERVAL}s -> ${OUTPUT} (cgroup v${CGROUP_VERSION})" >&2
+
+while true; do
+    ts=$(date +%s%3N 2>/dev/null || python3 -c 'import time; print(int(time.time()*1000))')
+    if [ "$CGROUP_VERSION" = "1" ]; then
+        vals=$(poll_v1)
+    else
+        vals=$(poll_v2)
+    fi
+    echo "${ts},${vals}" >> "$OUTPUT"
+    sleep "$INTERVAL"
+done
diff --git a/benchmarks/infra/docker/docker-compose.constrained.yml b/benchmarks/infra/docker/docker-compose.constrained.yml
new file mode 100644
index 0000000000..eff730d0e3
--- /dev/null
+++ b/benchmarks/infra/docker/docker-compose.constrained.yml
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Constrained memory overlay.
+#
+# Apply on top of docker-compose.yml to enforce hard memory limits and
+# enable the metrics-collector sidecar:
+#
+#   docker compose -f docker-compose.yml -f docker-compose.constrained.yml up -d
+#
+# Environment variables:
+#   WORKER_MEM_LIMIT   - Hard memory limit for the worker (default: 6g)
+#   BENCH_MEM_LIMIT    - Hard memory limit for the bench runner (default: 10g)
+#   METRICS_INTERVAL   - Collection interval in seconds (default: 1)
+
+services:
+  spark-worker:
+    mem_limit: ${WORKER_MEM_LIMIT:-6g}
+    memswap_limit: ${WORKER_MEM_LIMIT:-6g}   # same as mem_limit → no swap
+
+  bench:
+    mem_limit: ${BENCH_MEM_LIMIT:-10g}
+    memswap_limit: ${BENCH_MEM_LIMIT:-10g}
+
+  metrics-collector:
+    image: ${BENCH_IMAGE:-comet-bench}
+    container_name: metrics-collector
+    pid: "service:spark-worker"                # share PID namespace with worker
+    command:
+      - /opt/benchmarks/collect-metrics.sh
+      - "${METRICS_INTERVAL:-1}"
+      - /results/container-metrics.csv
+    volumes:
+      - ${RESULTS_DIR:-/tmp/bench-results}:/results
+    depends_on:
+      - spark-worker
diff --git a/benchmarks/infra/docker/docker-compose.yml b/benchmarks/infra/docker/docker-compose.yml
new file mode 100644
index 0000000000..36261e3ded
--- /dev/null
+++ b/benchmarks/infra/docker/docker-compose.yml
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Spark standalone cluster for benchmarks.
+#
+# Usage:
+#   docker compose -f benchmarks/infra/docker/docker-compose.yml up -d
+#
+# Override with constrained memory limits:
+#   docker compose -f benchmarks/infra/docker/docker-compose.yml \
+#                  -f benchmarks/infra/docker/docker-compose.constrained.yml up -d
+#
+# Environment variables (set in .env or export before running):
+#   BENCH_IMAGE        - Docker image to use (default: comet-bench)
+#   DATA_DIR           - Host path to TPC data (default: /tmp/tpc-data)
+#   QUERIES_DIR        - Host path to query SQL files (default: /tmp/tpc-queries)
+#   RESULTS_DIR        - Host path for results output (default: /tmp/bench-results)
+#   ENGINE_JARS_DIR    - Host path containing engine JARs (default: /tmp/engine-jars)
+
+services:
+  spark-master:
+    image: ${BENCH_IMAGE:-comet-bench}
+    container_name: spark-master
+    hostname: spark-master
+    command: /opt/spark/sbin/start-master.sh --host spark-master
+    ports:
+      - "7077:7077"
+      - "8080:8080"
+    volumes:
+      - ${DATA_DIR:-/tmp/tpc-data}:/data:ro
+      - ${QUERIES_DIR:-/tmp/tpc-queries}:/queries:ro
+      - ${RESULTS_DIR:-/tmp/bench-results}:/results
+      - ${ENGINE_JARS_DIR:-/tmp/engine-jars}:/jars:ro
+    environment:
+      - SPARK_MASTER_HOST=spark-master
+      - SPARK_NO_DAEMONIZE=true
+
+  spark-worker:
+    image: ${BENCH_IMAGE:-comet-bench}
+    container_name: spark-worker
+    hostname: spark-worker
+    depends_on:
+      - spark-master
+    command: /opt/spark/sbin/start-worker.sh spark://spark-master:7077
+    ports:
+      - "8081:8081"
+    volumes:
+      - ${DATA_DIR:-/tmp/tpc-data}:/data:ro
+      - ${QUERIES_DIR:-/tmp/tpc-queries}:/queries:ro
+      - ${RESULTS_DIR:-/tmp/bench-results}:/results
+      - ${ENGINE_JARS_DIR:-/tmp/engine-jars}:/jars:ro
+    environment:
+      - SPARK_WORKER_CORES=${WORKER_CORES:-8}
+      - SPARK_WORKER_MEMORY=${WORKER_MEMORY:-16g}
+      - SPARK_NO_DAEMONIZE=true
+
+  bench:
+    image: ${BENCH_IMAGE:-comet-bench}
+    container_name: bench-runner
+    depends_on:
+      - spark-master
+      - spark-worker
+    # Override 'command' to run a specific benchmark, e.g.:
+    #   docker compose run bench python /opt/benchmarks/run.py \
+    #       --engine comet --profile docker -- tpc ...
+    command: ["echo", "Use 'docker compose run bench python /opt/benchmarks/run.py ...' to run benchmarks"]
+    volumes:
+      - ${DATA_DIR:-/tmp/tpc-data}:/data:ro
+      - ${QUERIES_DIR:-/tmp/tpc-queries}:/queries:ro
+      - ${RESULTS_DIR:-/tmp/bench-results}:/results
+      - ${ENGINE_JARS_DIR:-/tmp/engine-jars}:/jars:ro
+    environment:
+      - SPARK_HOME=/opt/spark
+      - SPARK_MASTER=spark://spark-master:7077
+      - COMET_JAR=/jars/comet.jar
+      - PYTHONPATH=/opt
diff --git a/benchmarks/pyspark/README.md b/benchmarks/pyspark/README.md
deleted file mode 100644
index 3fc55123f0..0000000000
--- a/benchmarks/pyspark/README.md
+++ /dev/null
@@ -1,178 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-# PySpark Benchmarks
-
-A suite of PySpark benchmarks for comparing performance between Spark, Comet JVM, and Comet Native implementations.
-
-## Available Benchmarks
-
-Run `python run_benchmark.py --list-benchmarks` to see all available benchmarks:
-
-- **shuffle-hash** - Shuffle all columns using hash partitioning on group_key
-- **shuffle-roundrobin** - Shuffle all columns using round-robin partitioning
-
-## Prerequisites
-
-- Apache Spark cluster (standalone, YARN, or Kubernetes)
-- PySpark installed
-- Comet JAR built
-
-## Build Comet JAR
-
-```bash
-cd /path/to/datafusion-comet
-make release
-```
-
-## Step 1: Generate Test Data
-
-Generate test data with realistic 50-column schema (nested structs, arrays, maps):
-
-```bash
-spark-submit \
-  --master spark://master:7077 \
-  --executor-memory 16g \
-  generate_data.py \
-  --output /tmp/shuffle-benchmark-data \
-  --rows 10000000 \
-  --partitions 200
-```
-
-### Data Generation Options
-
-| Option               | Default    | Description                  |
-| -------------------- | ---------- | ---------------------------- |
-| `--output`, `-o`     | (required) | Output path for parquet data |
-| `--rows`, `-r`       | 10000000   | Number of rows               |
-| `--partitions`, `-p` | 200        | Number of output partitions  |
-
-## Step 2: Run Benchmarks
-
-### List Available Benchmarks
-
-```bash
-python run_benchmark.py --list-benchmarks
-```
-
-### Run Individual Benchmarks
-
-You can run specific benchmarks by name:
-
-```bash
-# Hash partitioning shuffle - Spark baseline
-spark-submit --master spark://master:7077 \
-  run_benchmark.py --data /tmp/shuffle-benchmark-data --mode spark --benchmark shuffle-hash
-
-# Round-robin shuffle - Spark baseline
-spark-submit --master spark://master:7077 \
-  run_benchmark.py --data /tmp/shuffle-benchmark-data --mode spark --benchmark shuffle-roundrobin
-
-# Hash partitioning - Comet JVM shuffle
-spark-submit --master spark://master:7077 \
-  --jars /path/to/comet.jar \
-  --conf spark.comet.enabled=true \
-  --conf spark.comet.exec.shuffle.enabled=true \
-  --conf spark.comet.shuffle.mode=jvm \
-  --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-  run_benchmark.py --data /tmp/shuffle-benchmark-data --mode jvm --benchmark shuffle-hash
-
-# Round-robin - Comet Native shuffle
-spark-submit --master spark://master:7077 \
-  --jars /path/to/comet.jar \
-  --conf spark.comet.enabled=true \
-  --conf spark.comet.exec.shuffle.enabled=true \
-  --conf spark.comet.exec.shuffle.mode=native \
-  --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-  run_benchmark.py --data /tmp/shuffle-benchmark-data --mode native --benchmark shuffle-roundrobin
-```
-
-### Run All Benchmarks
-
-Use the provided script to run all benchmarks across all modes:
-
-```bash
-SPARK_MASTER=spark://master:7077 \
-EXECUTOR_MEMORY=16g \
-./run_all_benchmarks.sh /tmp/shuffle-benchmark-data
-```
-
-## Checking Results
-
-Open the Spark UI (default: http://localhost:4040) during each benchmark run to compare shuffle write sizes in the Stages tab.
-
-## Adding New Benchmarks
-
-The benchmark framework makes it easy to add new benchmarks:
-
-1. **Create a benchmark class** in `benchmarks/` directory (or add to existing file):
-
-```python
-from benchmarks.base import Benchmark
-
-class MyBenchmark(Benchmark):
-    @classmethod
-    def name(cls) -> str:
-        return "my-benchmark"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Description of what this benchmark does"
-
-    def run(self) -> Dict[str, Any]:
-        # Read data
-        df = self.spark.read.parquet(self.data_path)
-
-        # Run your benchmark operation
-        def benchmark_operation():
-            result = df.filter(...).groupBy(...).agg(...)
-            result.write.mode("overwrite").parquet("/tmp/output")
-
-        # Time it
-        duration_ms = self._time_operation(benchmark_operation)
-
-        return {
-            'duration_ms': duration_ms,
-            # Add any other metrics you want to track
-        }
-```
-
-2. **Register the benchmark** in `benchmarks/__init__.py`:
-
-```python
-from .my_module import MyBenchmark
-
-_BENCHMARK_REGISTRY = {
-    # ... existing benchmarks
-    MyBenchmark.name(): MyBenchmark,
-}
-```
-
-3. **Run your new benchmark**:
-
-```bash
-python run_benchmark.py --data /path/to/data --mode spark --benchmark my-benchmark
-```
-
-The base `Benchmark` class provides:
-
-- Automatic timing via `_time_operation()`
-- Standard output formatting via `execute_timed()`
-- Access to SparkSession, data path, and mode
-- Spark configuration printing
diff --git a/benchmarks/pyspark/benchmarks/__init__.py b/benchmarks/pyspark/benchmarks/__init__.py
deleted file mode 100644
index 7d913a7d6d..0000000000
--- a/benchmarks/pyspark/benchmarks/__init__.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Benchmark registry for PySpark benchmarks.
-
-This module provides a central registry for discovering and running benchmarks.
-"""
-
-from typing import Dict, Type, List
-
-from .base import Benchmark
-from .shuffle import ShuffleHashBenchmark, ShuffleRoundRobinBenchmark
-
-
-# Registry of all available benchmarks
-_BENCHMARK_REGISTRY: Dict[str, Type[Benchmark]] = {
-    ShuffleHashBenchmark.name(): ShuffleHashBenchmark,
-    ShuffleRoundRobinBenchmark.name(): ShuffleRoundRobinBenchmark,
-}
-
-
-def get_benchmark(name: str) -> Type[Benchmark]:
-    """
-    Get a benchmark class by name.
-
-    Args:
-        name: Benchmark name
-
-    Returns:
-        Benchmark class
-
-    Raises:
-        KeyError: If benchmark name is not found
-    """
-    if name not in _BENCHMARK_REGISTRY:
-        available = ", ".join(sorted(_BENCHMARK_REGISTRY.keys()))
-        raise KeyError(
-            f"Unknown benchmark: {name}. Available benchmarks: {available}"
-        )
-    return _BENCHMARK_REGISTRY[name]
-
-
-def list_benchmarks() -> List[tuple[str, str]]:
-    """
-    List all available benchmarks.
-
-    Returns:
-        List of (name, description) tuples
-    """
-    benchmarks = []
-    for name in sorted(_BENCHMARK_REGISTRY.keys()):
-        benchmark_cls = _BENCHMARK_REGISTRY[name]
-        benchmarks.append((name, benchmark_cls.description()))
-    return benchmarks
-
-
-__all__ = [
-    'Benchmark',
-    'get_benchmark',
-    'list_benchmarks',
-    'ShuffleHashBenchmark',
-    'ShuffleRoundRobinBenchmark',
-]
diff --git a/benchmarks/pyspark/benchmarks/base.py b/benchmarks/pyspark/benchmarks/base.py
deleted file mode 100644
index 7e8e8db5a9..0000000000
--- a/benchmarks/pyspark/benchmarks/base.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python3
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Base benchmark class providing common functionality for all benchmarks.
-"""
-
-import time
-from abc import ABC, abstractmethod
-from typing import Dict, Any
-
-from pyspark.sql import SparkSession
-
-
-class Benchmark(ABC):
-    """Base class for all PySpark benchmarks."""
-
-    def __init__(self, spark: SparkSession, data_path: str, mode: str):
-        """
-        Initialize benchmark.
-
-        Args:
-            spark: SparkSession instance
-            data_path: Path to input data
-            mode: Execution mode (spark, jvm, native)
-        """
-        self.spark = spark
-        self.data_path = data_path
-        self.mode = mode
-
-    @classmethod
-    @abstractmethod
-    def name(cls) -> str:
-        """Return the benchmark name (used for CLI)."""
-        pass
-
-    @classmethod
-    @abstractmethod
-    def description(cls) -> str:
-        """Return a short description of the benchmark."""
-        pass
-
-    @abstractmethod
-    def run(self) -> Dict[str, Any]:
-        """
-        Run the benchmark and return results.
-
-        Returns:
-            Dictionary containing benchmark results (must include 'duration_ms')
-        """
-        pass
-
-    def execute_timed(self) -> Dict[str, Any]:
-        """
-        Execute the benchmark with timing and standard output.
-
-        Returns:
-            Dictionary containing benchmark results
-        """
-        print(f"\n{'=' * 80}")
-        print(f"Benchmark: {self.name()}")
-        print(f"Mode: {self.mode.upper()}")
-        print(f"{'=' * 80}")
-        print(f"Data path: {self.data_path}")
-
-        # Print relevant Spark configuration
-        self._print_spark_config()
-
-        # Clear cache before running
-        self.spark.catalog.clearCache()
-
-        # Run the benchmark
-        print(f"\nRunning benchmark...")
-        results = self.run()
-
-        # Print results
-        print(f"\nDuration: {results['duration_ms']:,} ms")
-        if 'row_count' in results:
-            print(f"Rows processed: {results['row_count']:,}")
-
-        # Print any additional metrics
-        for key, value in results.items():
-            if key not in ['duration_ms', 'row_count']:
-                print(f"{key}: {value}")
-
-        print(f"{'=' * 80}\n")
-
-        return results
-
-    def _print_spark_config(self):
-        """Print relevant Spark configuration."""
-        conf = self.spark.sparkContext.getConf()
-        print(f"Shuffle manager: {conf.get('spark.shuffle.manager', 'default')}")
-        print(f"Comet enabled: {conf.get('spark.comet.enabled', 'false')}")
-        print(f"Comet shuffle enabled: {conf.get('spark.comet.exec.shuffle.enabled', 'false')}")
-        print(f"Comet shuffle mode: {conf.get('spark.comet.shuffle.mode', 'not set')}")
-        print(f"Spark UI: {self.spark.sparkContext.uiWebUrl}")
-
-    def _time_operation(self, operation_fn):
-        """
-        Time an operation and return duration in milliseconds.
-
-        Args:
-            operation_fn: Function to time (takes no arguments)
-
-        Returns:
-            Duration in milliseconds
-        """
-        start_time = time.time()
-        operation_fn()
-        duration_ms = int((time.time() - start_time) * 1000)
-        return duration_ms
diff --git a/benchmarks/pyspark/benchmarks/shuffle.py b/benchmarks/pyspark/benchmarks/shuffle.py
deleted file mode 100644
index 0facd2340d..0000000000
--- a/benchmarks/pyspark/benchmarks/shuffle.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python3
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Shuffle benchmarks for comparing shuffle file sizes and performance.
-
-These benchmarks test different partitioning strategies (hash, round-robin)
-across Spark, Comet JVM, and Comet Native shuffle implementations.
-"""
-
-from typing import Dict, Any
-from pyspark.sql import DataFrame
-
-from .base import Benchmark
-
-
-class ShuffleBenchmark(Benchmark):
-    """Base class for shuffle benchmarks with common repartitioning logic."""
-
-    def __init__(self, spark, data_path: str, mode: str, num_partitions: int = 200):
-        """
-        Initialize shuffle benchmark.
-
-        Args:
-            spark: SparkSession instance
-            data_path: Path to input parquet data
-            mode: Execution mode (spark, jvm, native)
-            num_partitions: Number of partitions to shuffle to
-        """
-        super().__init__(spark, data_path, mode)
-        self.num_partitions = num_partitions
-
-    def _read_and_count(self) -> tuple[DataFrame, int]:
-        """Read input data and count rows."""
-        df = self.spark.read.parquet(self.data_path)
-        row_count = df.count()
-        return df, row_count
-
-    def _repartition(self, df: DataFrame) -> DataFrame:
-        """
-        Repartition dataframe using the strategy defined by subclass.
-
-        Args:
-            df: Input dataframe
-
-        Returns:
-            Repartitioned dataframe
-        """
-        raise NotImplementedError("Subclasses must implement _repartition")
-
-    def _write_output(self, df: DataFrame, output_path: str):
-        """Write repartitioned data to parquet."""
-        df.write.mode("overwrite").parquet(output_path)
-
-    def run(self) -> Dict[str, Any]:
-        """
-        Run the shuffle benchmark.
-
-        Returns:
-            Dictionary with duration_ms and row_count
-        """
-        # Read input data
-        df, row_count = self._read_and_count()
-        print(f"Number of rows: {row_count:,}")
-
-        # Define the benchmark operation
-        def benchmark_operation():
-            # Repartition using the specific strategy
-            repartitioned = self._repartition(df)
-
-            # Write to parquet to force materialization
-            output_path = f"/tmp/shuffle-benchmark-output-{self.mode}-{self.name()}"
-            self._write_output(repartitioned, output_path)
-            print(f"Wrote repartitioned data to: {output_path}")
-
-        # Time the operation
-        duration_ms = self._time_operation(benchmark_operation)
-
-        return {
-            'duration_ms': duration_ms,
-            'row_count': row_count,
-            'num_partitions': self.num_partitions,
-        }
-
-
-class ShuffleHashBenchmark(ShuffleBenchmark):
-    """Shuffle benchmark using hash partitioning on a key column."""
-
-    @classmethod
-    def name(cls) -> str:
-        return "shuffle-hash"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Shuffle all columns using hash partitioning on group_key"
-
-    def _repartition(self, df: DataFrame) -> DataFrame:
-        """Repartition using hash partitioning on group_key."""
-        return df.repartition(self.num_partitions, "group_key")
-
-
-class ShuffleRoundRobinBenchmark(ShuffleBenchmark):
-    """Shuffle benchmark using round-robin partitioning."""
-
-    @classmethod
-    def name(cls) -> str:
-        return "shuffle-roundrobin"
-
-    @classmethod
-    def description(cls) -> str:
-        return "Shuffle all columns using round-robin partitioning"
-
-    def _repartition(self, df: DataFrame) -> DataFrame:
-        """Repartition using round-robin (no partition columns specified)."""
-        return df.repartition(self.num_partitions)
diff --git a/benchmarks/pyspark/run_all_benchmarks.sh b/benchmarks/pyspark/run_all_benchmarks.sh
deleted file mode 100755
index 81eb044884..0000000000
--- a/benchmarks/pyspark/run_all_benchmarks.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# Run all shuffle benchmarks (Spark, Comet JVM, Comet Native)
-# Check the Spark UI during each run to compare shuffle sizes
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-DATA_PATH="${1:-/tmp/shuffle-benchmark-data}"
-COMET_JAR="${COMET_JAR:-$SCRIPT_DIR/../../spark/target/comet-spark-spark3.5_2.12-0.14.0-SNAPSHOT.jar}"
-SPARK_MASTER="${SPARK_MASTER:-local[*]}"
-EXECUTOR_MEMORY="${EXECUTOR_MEMORY:-16g}"
-EVENT_LOG_DIR="${EVENT_LOG_DIR:-/tmp/spark-events}"
-
-# Create event log directory
-mkdir -p "$EVENT_LOG_DIR"
-
-echo "========================================"
-echo "Shuffle Size Comparison Benchmark"
-echo "========================================"
-echo "Data path:       $DATA_PATH"
-echo "Comet JAR:       $COMET_JAR"
-echo "Spark master:    $SPARK_MASTER"
-echo "Executor memory: $EXECUTOR_MEMORY"
-echo "Event log dir:   $EVENT_LOG_DIR"
-echo "========================================"
-
-# Run Spark baseline (no Comet)
-echo ""
-echo ">>> Running SPARK shuffle benchmark..."
-$SPARK_HOME/bin/spark-submit \
-  --master "$SPARK_MASTER" \
-  --executor-memory "$EXECUTOR_MEMORY" \
-  --conf spark.eventLog.enabled=true \
-  --conf spark.eventLog.dir="$EVENT_LOG_DIR" \
-  --conf spark.comet.enabled=false \
-  --conf spark.comet.exec.shuffle.enabled=false \
-  "$SCRIPT_DIR/run_benchmark.py" \
-  --data "$DATA_PATH" \
-  --mode spark
-
-# Run Comet JVM shuffle
-echo ""
-echo ">>> Running COMET JVM shuffle benchmark..."
-$SPARK_HOME/bin/spark-submit \
-  --master "$SPARK_MASTER" \
-  --executor-memory "$EXECUTOR_MEMORY" \
-  --jars "$COMET_JAR" \
-  --driver-class-path "$COMET_JAR" \
-  --conf spark.executor.extraClassPath="$COMET_JAR" \
-  --conf spark.eventLog.enabled=true \
-  --conf spark.eventLog.dir="$EVENT_LOG_DIR" \
-  --conf spark.memory.offHeap.enabled=true \
-  --conf spark.memory.offHeap.size=16g \
-  --conf spark.comet.enabled=true \
-  --conf spark.comet.operator.DataWritingCommandExec.allowIncompatible=true \
-  --conf spark.comet.parquet.write.enabled=true \
-  --conf spark.comet.logFallbackReasons.enabled=true \
-  --conf spark.comet.explainFallback.enabled=true \
-  --conf spark.comet.shuffle.mode=jvm \
-  --conf spark.comet.exec.shuffle.mode=jvm \
-  --conf spark.comet.exec.replaceSortMergeJoin=true \
-  --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-  --conf spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions \
-  --conf spark.comet.cast.allowIncompatible=true \
-  "$SCRIPT_DIR/run_benchmark.py" \
-  --data "$DATA_PATH" \
-  --mode jvm
-
-# Run Comet Native shuffle
-echo ""
-echo ">>> Running COMET NATIVE shuffle benchmark..."
-$SPARK_HOME/bin/spark-submit \
-  --master "$SPARK_MASTER" \
-  --executor-memory "$EXECUTOR_MEMORY" \
-  --jars "$COMET_JAR" \
-  --driver-class-path "$COMET_JAR" \
-  --conf spark.executor.extraClassPath="$COMET_JAR" \
-  --conf spark.eventLog.enabled=true \
-  --conf spark.eventLog.dir="$EVENT_LOG_DIR" \
-  --conf spark.memory.offHeap.enabled=true \
-  --conf spark.memory.offHeap.size=16g \
-  --conf spark.comet.enabled=true \
-  --conf spark.comet.operator.DataWritingCommandExec.allowIncompatible=true \
-  --conf spark.comet.parquet.write.enabled=true \
-  --conf spark.comet.logFallbackReasons.enabled=true \
-  --conf spark.comet.explainFallback.enabled=true \
-  --conf spark.comet.exec.shuffle.mode=native \
-  --conf spark.comet.exec.replaceSortMergeJoin=true \
-  --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-  --conf spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions \
-  --conf spark.comet.cast.allowIncompatible=true \
-  "$SCRIPT_DIR/run_benchmark.py" \
-  --data "$DATA_PATH" \
-  --mode native
-
-echo ""
-echo "========================================"
-echo "BENCHMARK COMPLETE"
-echo "========================================"
-echo "Event logs written to: $EVENT_LOG_DIR"
-echo ""
diff --git a/benchmarks/pyspark/run_benchmark.py b/benchmarks/pyspark/run_benchmark.py
deleted file mode 100755
index 6713f0ff21..0000000000
--- a/benchmarks/pyspark/run_benchmark.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Run PySpark benchmarks.
-
-Run benchmarks by name with appropriate spark-submit configs for different modes
-(spark, jvm, native). Check the Spark UI to compare results between modes.
-"""
-
-import argparse
-import sys
-
-from pyspark.sql import SparkSession
-
-from benchmarks import get_benchmark, list_benchmarks
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Run PySpark benchmarks",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Run hash partitioning shuffle benchmark in Spark mode
-  python run_benchmark.py --data /path/to/data --mode spark --benchmark shuffle-hash
-
-  # Run round-robin shuffle benchmark in Comet native mode
-  python run_benchmark.py --data /path/to/data --mode native --benchmark shuffle-roundrobin
-
-  # List all available benchmarks
-  python run_benchmark.py --list-benchmarks
-        """
-    )
-    parser.add_argument(
-        "--data", "-d",
-        help="Path to input parquet data"
-    )
-    parser.add_argument(
-        "--mode", "-m",
-        choices=["spark", "jvm", "native"],
-        help="Shuffle mode being tested"
-    )
-    parser.add_argument(
-        "--benchmark", "-b",
-        default="shuffle-hash",
-        help="Benchmark to run (default: shuffle-hash)"
-    )
-    parser.add_argument(
-        "--list-benchmarks",
-        action="store_true",
-        help="List all available benchmarks and exit"
-    )
-
-    args = parser.parse_args()
-
-    # Handle --list-benchmarks
-    if args.list_benchmarks:
-        print("Available benchmarks:\n")
-        for name, description in list_benchmarks():
-            print(f"  {name:25s} - {description}")
-        return 0
-
-    # Validate required arguments
-    if not args.data:
-        parser.error("--data is required when running a benchmark")
-    if not args.mode:
-        parser.error("--mode is required when running a benchmark")
-
-    # Get the benchmark class
-    try:
-        benchmark_cls = get_benchmark(args.benchmark)
-    except KeyError as e:
-        print(f"Error: {e}", file=sys.stderr)
-        print("\nUse --list-benchmarks to see available benchmarks", file=sys.stderr)
-        return 1
-
-    # Create Spark session
-    spark = SparkSession.builder \
-        .appName(f"{benchmark_cls.name()}-{args.mode.upper()}") \
-        .getOrCreate()
-
-    try:
-        # Create and run the benchmark
-        benchmark = benchmark_cls(spark, args.data, args.mode)
-        results = benchmark.execute_timed()
-
-        print("\nCheck Spark UI for shuffle sizes and detailed metrics")
-        return 0
-
-    finally:
-        spark.stop()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/benchmarks/queries/tpcds/q1.sql b/benchmarks/queries/tpcds/q1.sql
new file mode 100644
index 0000000000..00328875ab
--- /dev/null
+++ b/benchmarks/queries/tpcds/q1.sql
@@ -0,0 +1,26 @@
+-- SQLBench-DS query 1 derived from TPC-DS query 1 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_RETURN_AMT_INC_TAX) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =1999
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'TN'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q10.sql b/benchmarks/queries/tpcds/q10.sql
new file mode 100644
index 0000000000..3a47920e04
--- /dev/null
+++ b/benchmarks/queries/tpcds/q10.sql
@@ -0,0 +1,60 @@
+-- SQLBench-DS query 10 derived from TPC-DS query 10 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_county in ('Clinton County','Platte County','Franklin County','Louisa County','Harmon County') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2002 and
+                d_moy between 3 and 3+3) and
+   (exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 3 ANd 3+3) or 
+    exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 3 and 3+3))
+ group by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ order by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q11.sql b/benchmarks/queries/tpcds/q11.sql
new file mode 100644
index 0000000000..7ffd3094f9
--- /dev/null
+++ b/benchmarks/queries/tpcds/q11.sql
@@ -0,0 +1,82 @@
+-- SQLBench-DS query 11 derived from TPC-DS query 11 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag 
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year 
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag 
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+         )
+  select  
+                  t_s_secyear.customer_id
+                 ,t_s_secyear.customer_first_name
+                 ,t_s_secyear.customer_last_name
+                 ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+         and t_s_firstyear.customer_id = t_w_secyear.customer_id
+         and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+         and t_s_firstyear.sale_type = 's'
+         and t_w_firstyear.sale_type = 'w'
+         and t_s_secyear.sale_type = 's'
+         and t_w_secyear.sale_type = 'w'
+         and t_s_firstyear.dyear = 1999
+         and t_s_secyear.dyear = 1999+1
+         and t_w_firstyear.dyear = 1999
+         and t_w_secyear.dyear = 1999+1
+         and t_s_firstyear.year_total > 0
+         and t_w_firstyear.year_total > 0
+         and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
+             > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
+ order by t_s_secyear.customer_id
+         ,t_s_secyear.customer_first_name
+         ,t_s_secyear.customer_last_name
+         ,t_s_secyear.customer_email_address
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q12.sql b/benchmarks/queries/tpcds/q12.sql
new file mode 100644
index 0000000000..eb267ca64b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q12.sql
@@ -0,0 +1,35 @@
+-- SQLBench-DS query 12 derived from TPC-DS query 12 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id
+      ,i_item_desc 
+      ,i_category 
+      ,i_class 
+      ,i_current_price
+      ,sum(ws_ext_sales_price) as itemrevenue 
+      ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
+          (partition by i_class) as revenueratio
+from	
+	web_sales
+    	,item 
+    	,date_dim
+where 
+	ws_item_sk = i_item_sk 
+  	and i_category in ('Jewelry', 'Books', 'Women')
+  	and ws_sold_date_sk = d_date_sk
+	and d_date between cast('2002-03-22' as date) 
+				and (cast('2002-03-22' as date) + INTERVAL '30 DAYS')
+group by 
+	i_item_id
+        ,i_item_desc 
+        ,i_category
+        ,i_class
+        ,i_current_price
+order by 
+	i_category
+        ,i_class
+        ,i_item_id
+        ,i_item_desc
+        ,revenueratio
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q13.sql b/benchmarks/queries/tpcds/q13.sql
new file mode 100644
index 0000000000..31b1171b9e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q13.sql
@@ -0,0 +1,53 @@
+-- SQLBench-DS query 13 derived from TPC-DS query 13 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select avg(ss_quantity)
+       ,avg(ss_ext_sales_price)
+       ,avg(ss_ext_wholesale_cost)
+       ,sum(ss_ext_wholesale_cost)
+ from store_sales
+     ,store
+     ,customer_demographics
+     ,household_demographics
+     ,customer_address
+     ,date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 2001
+ and((ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'U'
+  and cd_education_status = '4 yr Degree'
+  and ss_sales_price between 100.00 and 150.00
+  and hd_dep_count = 3   
+     )or
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'S'
+  and cd_education_status = 'Unknown'
+  and ss_sales_price between 50.00 and 100.00   
+  and hd_dep_count = 1
+     ) or 
+     (ss_hdemo_sk=hd_demo_sk
+  and cd_demo_sk = ss_cdemo_sk
+  and cd_marital_status = 'D'
+  and cd_education_status = '2 yr Degree'
+  and ss_sales_price between 150.00 and 200.00 
+  and hd_dep_count = 1  
+     ))
+ and((ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('CO', 'MI', 'MN')
+  and ss_net_profit between 100 and 200  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('NC', 'NY', 'TX')
+  and ss_net_profit between 150 and 300  
+     ) or
+     (ss_addr_sk = ca_address_sk
+  and ca_country = 'United States'
+  and ca_state in ('CA', 'NE', 'TN')
+  and ss_net_profit between 50 and 250  
+     ))
+;
+
diff --git a/benchmarks/queries/tpcds/q14.sql b/benchmarks/queries/tpcds/q14.sql
new file mode 100644
index 0000000000..119791f59d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q14.sql
@@ -0,0 +1,211 @@
+-- SQLBench-DS query 14 derived from TPC-DS query 14 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with  cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+     ,iss.i_class_id class_id
+     ,iss.i_category_id category_id
+ from store_sales
+     ,item iss
+     ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+   and ss_sold_date_sk = d1.d_date_sk
+   and d1.d_year between 1999 AND 1999 + 2
+ intersect 
+ select ics.i_brand_id
+     ,ics.i_class_id
+     ,ics.i_category_id
+ from catalog_sales
+     ,item ics
+     ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+   and cs_sold_date_sk = d2.d_date_sk
+   and d2.d_year between 1999 AND 1999 + 2
+ intersect
+ select iws.i_brand_id
+     ,iws.i_class_id
+     ,iws.i_category_id
+ from web_sales
+     ,item iws
+     ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+   and ws_sold_date_sk = d3.d_date_sk
+   and d3.d_year between 1999 AND 1999 + 2)
+ where i_brand_id = brand_id
+      and i_class_id = class_id
+      and i_category_id = category_id
+),
+ avg_sales as
+ (select avg(quantity*list_price) average_sales
+  from (select ss_quantity quantity
+             ,ss_list_price list_price
+       from store_sales
+           ,date_dim
+       where ss_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2
+       union all 
+       select cs_quantity quantity 
+             ,cs_list_price list_price
+       from catalog_sales
+           ,date_dim
+       where cs_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2 
+       union all
+       select ws_quantity quantity
+             ,ws_list_price list_price
+       from web_sales
+           ,date_dim
+       where ws_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2) x)
+  select  channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales)
+ from(
+       select 'store' channel, i_brand_id,i_class_id
+             ,i_category_id,sum(ss_quantity*ss_list_price) sales
+             , count(*) number_sales
+       from store_sales
+           ,item
+           ,date_dim
+       where ss_item_sk in (select ss_item_sk from cross_items)
+         and ss_item_sk = i_item_sk
+         and ss_sold_date_sk = d_date_sk
+         and d_year = 1999+2 
+         and d_moy = 11
+       group by i_brand_id,i_class_id,i_category_id
+       having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)
+       union all
+       select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales
+       from catalog_sales
+           ,item
+           ,date_dim
+       where cs_item_sk in (select ss_item_sk from cross_items)
+         and cs_item_sk = i_item_sk
+         and cs_sold_date_sk = d_date_sk
+         and d_year = 1999+2 
+         and d_moy = 11
+       group by i_brand_id,i_class_id,i_category_id
+       having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales)
+       union all
+       select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales
+       from web_sales
+           ,item
+           ,date_dim
+       where ws_item_sk in (select ss_item_sk from cross_items)
+         and ws_item_sk = i_item_sk
+         and ws_sold_date_sk = d_date_sk
+         and d_year = 1999+2
+         and d_moy = 11
+       group by i_brand_id,i_class_id,i_category_id
+       having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales)
+ ) y
+ group by rollup (channel, i_brand_id,i_class_id,i_category_id)
+ order by channel,i_brand_id,i_class_id,i_category_id
+  LIMIT 100;
+with  cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+     ,iss.i_class_id class_id
+     ,iss.i_category_id category_id
+ from store_sales
+     ,item iss
+     ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+   and ss_sold_date_sk = d1.d_date_sk
+   and d1.d_year between 1999 AND 1999 + 2
+ intersect
+ select ics.i_brand_id
+     ,ics.i_class_id
+     ,ics.i_category_id
+ from catalog_sales
+     ,item ics
+     ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+   and cs_sold_date_sk = d2.d_date_sk
+   and d2.d_year between 1999 AND 1999 + 2
+ intersect
+ select iws.i_brand_id
+     ,iws.i_class_id
+     ,iws.i_category_id
+ from web_sales
+     ,item iws
+     ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+   and ws_sold_date_sk = d3.d_date_sk
+   and d3.d_year between 1999 AND 1999 + 2) x
+ where i_brand_id = brand_id
+      and i_class_id = class_id
+      and i_category_id = category_id
+),
+ avg_sales as
+(select avg(quantity*list_price) average_sales
+  from (select ss_quantity quantity
+             ,ss_list_price list_price
+       from store_sales
+           ,date_dim
+       where ss_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2
+       union all
+       select cs_quantity quantity
+             ,cs_list_price list_price
+       from catalog_sales
+           ,date_dim
+       where cs_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2
+       union all
+       select ws_quantity quantity
+             ,ws_list_price list_price
+       from web_sales
+           ,date_dim
+       where ws_sold_date_sk = d_date_sk
+         and d_year between 1999 and 1999 + 2) x)
+  select  this_year.channel ty_channel
+                           ,this_year.i_brand_id ty_brand
+                           ,this_year.i_class_id ty_class
+                           ,this_year.i_category_id ty_category
+                           ,this_year.sales ty_sales
+                           ,this_year.number_sales ty_number_sales
+                           ,last_year.channel ly_channel
+                           ,last_year.i_brand_id ly_brand
+                           ,last_year.i_class_id ly_class
+                           ,last_year.i_category_id ly_category
+                           ,last_year.sales ly_sales
+                           ,last_year.number_sales ly_number_sales 
+ from
+ (select 'store' channel, i_brand_id,i_class_id,i_category_id
+        ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales
+ from store_sales 
+     ,item
+     ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+   and ss_item_sk = i_item_sk
+   and ss_sold_date_sk = d_date_sk
+   and d_week_seq = (select d_week_seq
+                     from date_dim
+                     where d_year = 1999 + 1
+                       and d_moy = 12
+                       and d_dom = 14)
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year,
+ (select 'store' channel, i_brand_id,i_class_id
+        ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales
+ from store_sales
+     ,item
+     ,date_dim
+ where ss_item_sk in (select ss_item_sk from cross_items)
+   and ss_item_sk = i_item_sk
+   and ss_sold_date_sk = d_date_sk
+   and d_week_seq = (select d_week_seq
+                     from date_dim
+                     where d_year = 1999
+                       and d_moy = 12
+                       and d_dom = 14)
+ group by i_brand_id,i_class_id,i_category_id
+ having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year
+ where this_year.i_brand_id= last_year.i_brand_id
+   and this_year.i_class_id = last_year.i_class_id
+   and this_year.i_category_id = last_year.i_category_id
+ order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q15.sql b/benchmarks/queries/tpcds/q15.sql
new file mode 100644
index 0000000000..bb1812a07c
--- /dev/null
+++ b/benchmarks/queries/tpcds/q15.sql
@@ -0,0 +1,21 @@
+-- SQLBench-DS query 15 derived from TPC-DS query 15 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  ca_zip
+       ,sum(cs_sales_price)
+ from catalog_sales
+     ,customer
+     ,customer_address
+     ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+ 	and c_current_addr_sk = ca_address_sk 
+ 	and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+                                   '85392', '85460', '80348', '81792')
+ 	      or ca_state in ('CA','WA','GA')
+ 	      or cs_sales_price > 500)
+ 	and cs_sold_date_sk = d_date_sk
+ 	and d_qoy = 2 and d_year = 2002
+ group by ca_zip
+ order by ca_zip
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q16.sql b/benchmarks/queries/tpcds/q16.sql
new file mode 100644
index 0000000000..2e0f9a9922
--- /dev/null
+++ b/benchmarks/queries/tpcds/q16.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 16 derived from TPC-DS query 16 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   count(distinct cs_order_number) as `order count`
+  ,sum(cs_ext_ship_cost) as `total shipping cost`
+  ,sum(cs_net_profit) as `total net profit`
+from
+   catalog_sales cs1
+  ,date_dim
+  ,customer_address
+  ,call_center
+where
+    d_date between '1999-5-01' and 
+           (cast('1999-5-01' as date) + INTERVAL '60 DAYS')
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'ID'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
+                  'Williamson County'
+)
+and exists (select *
+            from catalog_sales cs2
+            where cs1.cs_order_number = cs2.cs_order_number
+              and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+               from catalog_returns cr1
+               where cs1.cs_order_number = cr1.cr_order_number)
+order by count(distinct cs_order_number)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q17.sql b/benchmarks/queries/tpcds/q17.sql
new file mode 100644
index 0000000000..9f9e97d76e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q17.sql
@@ -0,0 +1,46 @@
+-- SQLBench-DS query 17 derived from TPC-DS query 17 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id
+       ,i_item_desc
+       ,s_state
+       ,count(ss_quantity) as store_sales_quantitycount
+       ,avg(ss_quantity) as store_sales_quantityave
+       ,stddev_samp(ss_quantity) as store_sales_quantitystdev
+       ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov
+       ,count(sr_return_quantity) as store_returns_quantitycount
+       ,avg(sr_return_quantity) as store_returns_quantityave
+       ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev
+       ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov
+       ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave
+       ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev
+       ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov
+ from store_sales
+     ,store_returns
+     ,catalog_sales
+     ,date_dim d1
+     ,date_dim d2
+     ,date_dim d3
+     ,store
+     ,item
+ where d1.d_quarter_name = '1999Q1'
+   and d1.d_date_sk = ss_sold_date_sk
+   and i_item_sk = ss_item_sk
+   and s_store_sk = ss_store_sk
+   and ss_customer_sk = sr_customer_sk
+   and ss_item_sk = sr_item_sk
+   and ss_ticket_number = sr_ticket_number
+   and sr_returned_date_sk = d2.d_date_sk
+   and d2.d_quarter_name in ('1999Q1','1999Q2','1999Q3')
+   and sr_customer_sk = cs_bill_customer_sk
+   and sr_item_sk = cs_item_sk
+   and cs_sold_date_sk = d3.d_date_sk
+   and d3.d_quarter_name in ('1999Q1','1999Q2','1999Q3')
+ group by i_item_id
+         ,i_item_desc
+         ,s_state
+ order by i_item_id
+         ,i_item_desc
+         ,s_state
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q18.sql b/benchmarks/queries/tpcds/q18.sql
new file mode 100644
index 0000000000..50cc6c63f2
--- /dev/null
+++ b/benchmarks/queries/tpcds/q18.sql
@@ -0,0 +1,35 @@
+-- SQLBench-DS query 18 derived from TPC-DS query 18 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id,
+        ca_country,
+        ca_state, 
+        ca_county,
+        avg( cast(cs_quantity as decimal(12,2))) agg1,
+        avg( cast(cs_list_price as decimal(12,2))) agg2,
+        avg( cast(cs_coupon_amt as decimal(12,2))) agg3,
+        avg( cast(cs_sales_price as decimal(12,2))) agg4,
+        avg( cast(cs_net_profit as decimal(12,2))) agg5,
+        avg( cast(c_birth_year as decimal(12,2))) agg6,
+        avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7
+ from catalog_sales, customer_demographics cd1, 
+      customer_demographics cd2, customer, customer_address, date_dim, item
+ where cs_sold_date_sk = d_date_sk and
+       cs_item_sk = i_item_sk and
+       cs_bill_cdemo_sk = cd1.cd_demo_sk and
+       cs_bill_customer_sk = c_customer_sk and
+       cd1.cd_gender = 'M' and 
+       cd1.cd_education_status = 'Primary' and
+       c_current_cdemo_sk = cd2.cd_demo_sk and
+       c_current_addr_sk = ca_address_sk and
+       c_birth_month in (1,2,9,5,11,3) and
+       d_year = 1998 and
+       ca_state in ('MS','NE','IA'
+                   ,'MI','GA','NY','CO')
+ group by rollup (i_item_id, ca_country, ca_state, ca_county)
+ order by ca_country,
+        ca_state, 
+        ca_county,
+	i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q19.sql b/benchmarks/queries/tpcds/q19.sql
new file mode 100644
index 0000000000..bf54b3b802
--- /dev/null
+++ b/benchmarks/queries/tpcds/q19.sql
@@ -0,0 +1,26 @@
+-- SQLBench-DS query 19 derived from TPC-DS query 19 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item,customer,customer_address,store
+ where d_date_sk = ss_sold_date_sk
+   and ss_item_sk = i_item_sk
+   and i_manager_id=8
+   and d_moy=11
+   and d_year=1999
+   and ss_customer_sk = c_customer_sk 
+   and c_current_addr_sk = ca_address_sk
+   and substr(ca_zip,1,5) <> substr(s_zip,1,5) 
+   and ss_store_sk = s_store_sk 
+ group by i_brand
+      ,i_brand_id
+      ,i_manufact_id
+      ,i_manufact
+ order by ext_price desc
+         ,i_brand
+         ,i_brand_id
+         ,i_manufact_id
+         ,i_manufact
+ LIMIT 100 ;
+
diff --git a/benchmarks/queries/tpcds/q2.sql b/benchmarks/queries/tpcds/q2.sql
new file mode 100644
index 0000000000..838717836b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q2.sql
@@ -0,0 +1,61 @@
+-- SQLBench-DS query 2 derived from TPC-DS query 2 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with wscs as
+ (select sold_date_sk
+        ,sales_price
+  from (select ws_sold_date_sk sold_date_sk
+              ,ws_ext_sales_price sales_price
+        from web_sales 
+        union all
+        select cs_sold_date_sk sold_date_sk
+              ,cs_ext_sales_price sales_price
+        from catalog_sales)),
+ wswscs as 
+ (select d_week_seq,
+        sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
+ from wscs
+     ,date_dim
+ where d_date_sk = sold_date_sk
+ group by d_week_seq)
+ select d_week_seq1
+       ,round(sun_sales1/sun_sales2,2)
+       ,round(mon_sales1/mon_sales2,2)
+       ,round(tue_sales1/tue_sales2,2)
+       ,round(wed_sales1/wed_sales2,2)
+       ,round(thu_sales1/thu_sales2,2)
+       ,round(fri_sales1/fri_sales2,2)
+       ,round(sat_sales1/sat_sales2,2)
+ from
+ (select wswscs.d_week_seq d_week_seq1
+        ,sun_sales sun_sales1
+        ,mon_sales mon_sales1
+        ,tue_sales tue_sales1
+        ,wed_sales wed_sales1
+        ,thu_sales thu_sales1
+        ,fri_sales fri_sales1
+        ,sat_sales sat_sales1
+  from wswscs,date_dim 
+  where date_dim.d_week_seq = wswscs.d_week_seq and
+        d_year = 2000) y,
+ (select wswscs.d_week_seq d_week_seq2
+        ,sun_sales sun_sales2
+        ,mon_sales mon_sales2
+        ,tue_sales tue_sales2
+        ,wed_sales wed_sales2
+        ,thu_sales thu_sales2
+        ,fri_sales fri_sales2
+        ,sat_sales sat_sales2
+  from wswscs
+      ,date_dim 
+  where date_dim.d_week_seq = wswscs.d_week_seq and
+        d_year = 2000+1) z
+ where d_week_seq1=d_week_seq2-53
+ order by d_week_seq1;
+
diff --git a/benchmarks/queries/tpcds/q20.sql b/benchmarks/queries/tpcds/q20.sql
new file mode 100644
index 0000000000..ea4747317d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q20.sql
@@ -0,0 +1,31 @@
+-- SQLBench-DS query 20 derived from TPC-DS query 20 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id
+       ,i_item_desc 
+       ,i_category 
+       ,i_class 
+       ,i_current_price
+       ,sum(cs_ext_sales_price) as itemrevenue 
+       ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over
+           (partition by i_class) as revenueratio
+ from	catalog_sales
+     ,item 
+     ,date_dim
+ where cs_item_sk = i_item_sk 
+   and i_category in ('Children', 'Sports', 'Music')
+   and cs_sold_date_sk = d_date_sk
+ and d_date between cast('2002-04-01' as date) 
+ 				and (cast('2002-04-01' as date) + INTERVAL '30 DAYS')
+ group by i_item_id
+         ,i_item_desc 
+         ,i_category
+         ,i_class
+         ,i_current_price
+ order by i_category
+         ,i_class
+         ,i_item_id
+         ,i_item_desc
+         ,revenueratio
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q21.sql b/benchmarks/queries/tpcds/q21.sql
new file mode 100644
index 0000000000..d768fa1428
--- /dev/null
+++ b/benchmarks/queries/tpcds/q21.sql
@@ -0,0 +1,31 @@
+-- SQLBench-DS query 21 derived from TPC-DS query 21 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  *
+ from(select w_warehouse_name
+            ,i_item_id
+            ,sum(case when (cast(d_date as date) < cast ('2000-05-19' as date))
+	                then inv_quantity_on_hand 
+                      else 0 end) as inv_before
+            ,sum(case when (cast(d_date as date) >= cast ('2000-05-19' as date))
+                      then inv_quantity_on_hand 
+                      else 0 end) as inv_after
+   from inventory
+       ,warehouse
+       ,item
+       ,date_dim
+   where i_current_price between 0.99 and 1.49
+     and i_item_sk          = inv_item_sk
+     and inv_warehouse_sk   = w_warehouse_sk
+     and inv_date_sk    = d_date_sk
+     and d_date between (cast ('2000-05-19' as date) - INTERVAL '30 DAYS')
+                    and (cast ('2000-05-19' as date) + INTERVAL '30 DAYS')
+   group by w_warehouse_name, i_item_id) x
+ where (case when inv_before > 0 
+             then inv_after / inv_before 
+             else null
+             end) between 2.0/3.0 and 3.0/2.0
+ order by w_warehouse_name
+         ,i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q22.sql b/benchmarks/queries/tpcds/q22.sql
new file mode 100644
index 0000000000..c7e1c78181
--- /dev/null
+++ b/benchmarks/queries/tpcds/q22.sql
@@ -0,0 +1,21 @@
+-- SQLBench-DS query 22 derived from TPC-DS query 22 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_product_name
+             ,i_brand
+             ,i_class
+             ,i_category
+             ,avg(inv_quantity_on_hand) qoh
+       from inventory
+           ,date_dim
+           ,item
+       where inv_date_sk=d_date_sk
+              and inv_item_sk=i_item_sk
+              and d_month_seq between 1201 and 1201 + 11
+       group by rollup(i_product_name
+                       ,i_brand
+                       ,i_class
+                       ,i_category)
+order by qoh, i_product_name, i_brand, i_class, i_category
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q23.sql b/benchmarks/queries/tpcds/q23.sql
new file mode 100644
index 0000000000..0dc7f73859
--- /dev/null
+++ b/benchmarks/queries/tpcds/q23.sql
@@ -0,0 +1,108 @@
+-- SQLBench-DS query 23 derived from TPC-DS query 23 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with frequent_ss_items as 
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+  from store_sales
+      ,date_dim 
+      ,item
+  where ss_sold_date_sk = d_date_sk
+    and ss_item_sk = i_item_sk 
+    and d_year in (2000,2000+1,2000+2,2000+3)
+  group by substr(i_item_desc,1,30),i_item_sk,d_date
+  having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax 
+  from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+        from store_sales
+            ,customer
+            ,date_dim 
+        where ss_customer_sk = c_customer_sk
+         and ss_sold_date_sk = d_date_sk
+         and d_year in (2000,2000+1,2000+2,2000+3) 
+        group by c_customer_sk)),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+  from store_sales
+      ,customer
+  where ss_customer_sk = c_customer_sk
+  group by c_customer_sk
+  having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+  *
+from
+ max_store_sales))
+  select  sum(sales)
+ from (select cs_quantity*cs_list_price sales
+       from catalog_sales
+           ,date_dim 
+       where d_year = 2000 
+         and d_moy = 3 
+         and cs_sold_date_sk = d_date_sk 
+         and cs_item_sk in (select item_sk from frequent_ss_items)
+         and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+      union all
+      select ws_quantity*ws_list_price sales
+       from web_sales 
+           ,date_dim 
+       where d_year = 2000 
+         and d_moy = 3 
+         and ws_sold_date_sk = d_date_sk 
+         and ws_item_sk in (select item_sk from frequent_ss_items)
+         and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) 
+  LIMIT 100;
+with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+  from store_sales
+      ,date_dim
+      ,item
+  where ss_sold_date_sk = d_date_sk
+    and ss_item_sk = i_item_sk
+    and d_year in (2000,2000 + 1,2000 + 2,2000 + 3)
+  group by substr(i_item_desc,1,30),i_item_sk,d_date
+  having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+  from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+        from store_sales
+            ,customer
+            ,date_dim 
+        where ss_customer_sk = c_customer_sk
+         and ss_sold_date_sk = d_date_sk
+         and d_year in (2000,2000+1,2000+2,2000+3)
+        group by c_customer_sk)),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+  from store_sales
+      ,customer
+  where ss_customer_sk = c_customer_sk
+  group by c_customer_sk
+  having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select
+  *
+ from max_store_sales))
+  select  c_last_name,c_first_name,sales
+ from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales
+        from catalog_sales
+            ,customer
+            ,date_dim 
+        where d_year = 2000 
+         and d_moy = 3 
+         and cs_sold_date_sk = d_date_sk 
+         and cs_item_sk in (select item_sk from frequent_ss_items)
+         and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+         and cs_bill_customer_sk = c_customer_sk 
+       group by c_last_name,c_first_name
+      union all
+      select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales
+       from web_sales
+           ,customer
+           ,date_dim 
+       where d_year = 2000 
+         and d_moy = 3 
+         and ws_sold_date_sk = d_date_sk 
+         and ws_item_sk in (select item_sk from frequent_ss_items)
+         and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+         and ws_bill_customer_sk = c_customer_sk
+       group by c_last_name,c_first_name) 
+     order by c_last_name,c_first_name,sales
+   LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q24.sql b/benchmarks/queries/tpcds/q24.sql
new file mode 100644
index 0000000000..5d6d2f5053
--- /dev/null
+++ b/benchmarks/queries/tpcds/q24.sql
@@ -0,0 +1,108 @@
+-- SQLBench-DS query 24 derived from TPC-DS query 24 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ssales as
+(select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,ca_state
+      ,s_state
+      ,i_color
+      ,i_current_price
+      ,i_manager_id
+      ,i_units
+      ,i_size
+      ,sum(ss_net_profit) netpaid
+from store_sales
+    ,store_returns
+    ,store
+    ,item
+    ,customer
+    ,customer_address
+where ss_ticket_number = sr_ticket_number
+  and ss_item_sk = sr_item_sk
+  and ss_customer_sk = c_customer_sk
+  and ss_item_sk = i_item_sk
+  and ss_store_sk = s_store_sk
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
+  and s_zip = ca_zip
+and s_market_id=10
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+        ,ca_state
+        ,s_state
+        ,i_color
+        ,i_current_price
+        ,i_manager_id
+        ,i_units
+        ,i_size)
+select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,sum(netpaid) paid
+from ssales
+where i_color = 'orchid'
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+                                 from ssales)
+order by c_last_name
+        ,c_first_name
+        ,s_store_name
+;
+with ssales as
+(select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,ca_state
+      ,s_state
+      ,i_color
+      ,i_current_price
+      ,i_manager_id
+      ,i_units
+      ,i_size
+      ,sum(ss_net_profit) netpaid
+from store_sales
+    ,store_returns
+    ,store
+    ,item
+    ,customer
+    ,customer_address
+where ss_ticket_number = sr_ticket_number
+  and ss_item_sk = sr_item_sk
+  and ss_customer_sk = c_customer_sk
+  and ss_item_sk = i_item_sk
+  and ss_store_sk = s_store_sk
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
+  and s_zip = ca_zip
+  and s_market_id = 10
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+        ,ca_state
+        ,s_state
+        ,i_color
+        ,i_current_price
+        ,i_manager_id
+        ,i_units
+        ,i_size)
+select c_last_name
+      ,c_first_name
+      ,s_store_name
+      ,sum(netpaid) paid
+from ssales
+where i_color = 'green'
+group by c_last_name
+        ,c_first_name
+        ,s_store_name
+having sum(netpaid) > (select 0.05*avg(netpaid)
+                           from ssales)
+order by c_last_name
+        ,c_first_name
+        ,s_store_name
+;
+
diff --git a/benchmarks/queries/tpcds/q25.sql b/benchmarks/queries/tpcds/q25.sql
new file mode 100644
index 0000000000..b0af0e61dd
--- /dev/null
+++ b/benchmarks/queries/tpcds/q25.sql
@@ -0,0 +1,49 @@
+-- SQLBench-DS query 25 derived from TPC-DS query 25 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ ,min(ss_net_profit) as store_sales_profit
+ ,min(sr_net_loss) as store_returns_loss
+ ,min(cs_net_profit) as catalog_sales_profit
+ from
+ store_sales
+ ,store_returns
+ ,catalog_sales
+ ,date_dim d1
+ ,date_dim d2
+ ,date_dim d3
+ ,store
+ ,item
+ where
+ d1.d_moy = 4
+ and d1.d_year = 2002
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk = ss_item_sk
+ and s_store_sk = ss_store_sk
+ and ss_customer_sk = sr_customer_sk
+ and ss_item_sk = sr_item_sk
+ and ss_ticket_number = sr_ticket_number
+ and sr_returned_date_sk = d2.d_date_sk
+ and d2.d_moy               between 4 and  10
+ and d2.d_year              = 2002
+ and sr_customer_sk = cs_bill_customer_sk
+ and sr_item_sk = cs_item_sk
+ and cs_sold_date_sk = d3.d_date_sk
+ and d3.d_moy               between 4 and  10 
+ and d3.d_year              = 2002
+ group by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+ order by
+ i_item_id
+ ,i_item_desc
+ ,s_store_id
+ ,s_store_name
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q26.sql b/benchmarks/queries/tpcds/q26.sql
new file mode 100644
index 0000000000..55ccc8b511
--- /dev/null
+++ b/benchmarks/queries/tpcds/q26.sql
@@ -0,0 +1,22 @@
+-- SQLBench-DS query 26 derived from TPC-DS query 26 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id, 
+        avg(cs_quantity) agg1,
+        avg(cs_list_price) agg2,
+        avg(cs_coupon_amt) agg3,
+        avg(cs_sales_price) agg4 
+ from catalog_sales, customer_demographics, date_dim, item, promotion
+ where cs_sold_date_sk = d_date_sk and
+       cs_item_sk = i_item_sk and
+       cs_bill_cdemo_sk = cd_demo_sk and
+       cs_promo_sk = p_promo_sk and
+       cd_gender = 'F' and 
+       cd_marital_status = 'M' and
+       cd_education_status = '4 yr Degree' and
+       (p_channel_email = 'N' or p_channel_event = 'N') and
+       d_year = 2000 
+ group by i_item_id
+ order by i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q27.sql b/benchmarks/queries/tpcds/q27.sql
new file mode 100644
index 0000000000..6d28e4e663
--- /dev/null
+++ b/benchmarks/queries/tpcds/q27.sql
@@ -0,0 +1,24 @@
+-- SQLBench-DS query 27 derived from TPC-DS query 27 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id,
+        s_state, grouping(s_state) g_state,
+        avg(ss_quantity) agg1,
+        avg(ss_list_price) agg2,
+        avg(ss_coupon_amt) agg3,
+        avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+       ss_item_sk = i_item_sk and
+       ss_store_sk = s_store_sk and
+       ss_cdemo_sk = cd_demo_sk and
+       cd_gender = 'M' and
+       cd_marital_status = 'U' and
+       cd_education_status = 'Secondary' and
+       d_year = 2000 and
+       s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN')
+ group by rollup (i_item_id, s_state)
+ order by i_item_id
+         ,s_state
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q28.sql b/benchmarks/queries/tpcds/q28.sql
new file mode 100644
index 0000000000..6efa7d7d77
--- /dev/null
+++ b/benchmarks/queries/tpcds/q28.sql
@@ -0,0 +1,54 @@
+-- SQLBench-DS query 28 derived from TPC-DS query 28 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  *
+from (select avg(ss_list_price) B1_LP
+            ,count(ss_list_price) B1_CNT
+            ,count(distinct ss_list_price) B1_CNTD
+      from store_sales
+      where ss_quantity between 0 and 5
+        and (ss_list_price between 28 and 28+10 
+             or ss_coupon_amt between 12573 and 12573+1000
+             or ss_wholesale_cost between 33 and 33+20)) B1,
+     (select avg(ss_list_price) B2_LP
+            ,count(ss_list_price) B2_CNT
+            ,count(distinct ss_list_price) B2_CNTD
+      from store_sales
+      where ss_quantity between 6 and 10
+        and (ss_list_price between 143 and 143+10
+          or ss_coupon_amt between 5562 and 5562+1000
+          or ss_wholesale_cost between 45 and 45+20)) B2,
+     (select avg(ss_list_price) B3_LP
+            ,count(ss_list_price) B3_CNT
+            ,count(distinct ss_list_price) B3_CNTD
+      from store_sales
+      where ss_quantity between 11 and 15
+        and (ss_list_price between 159 and 159+10
+          or ss_coupon_amt between 2807 and 2807+1000
+          or ss_wholesale_cost between 24 and 24+20)) B3,
+     (select avg(ss_list_price) B4_LP
+            ,count(ss_list_price) B4_CNT
+            ,count(distinct ss_list_price) B4_CNTD
+      from store_sales
+      where ss_quantity between 16 and 20
+        and (ss_list_price between 24 and 24+10
+          or ss_coupon_amt between 3706 and 3706+1000
+          or ss_wholesale_cost between 46 and 46+20)) B4,
+     (select avg(ss_list_price) B5_LP
+            ,count(ss_list_price) B5_CNT
+            ,count(distinct ss_list_price) B5_CNTD
+      from store_sales
+      where ss_quantity between 21 and 25
+        and (ss_list_price between 76 and 76+10
+          or ss_coupon_amt between 2096 and 2096+1000
+          or ss_wholesale_cost between 50 and 50+20)) B5,
+     (select avg(ss_list_price) B6_LP
+            ,count(ss_list_price) B6_CNT
+            ,count(distinct ss_list_price) B6_CNTD
+      from store_sales
+      where ss_quantity between 26 and 30
+        and (ss_list_price between 169 and 169+10
+          or ss_coupon_amt between 10672 and 10672+1000
+          or ss_wholesale_cost between 58 and 58+20)) B6
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q29.sql b/benchmarks/queries/tpcds/q29.sql
new file mode 100644
index 0000000000..8d463f3771
--- /dev/null
+++ b/benchmarks/queries/tpcds/q29.sql
@@ -0,0 +1,48 @@
+-- SQLBench-DS query 29 derived from TPC-DS query 29 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select   
+     i_item_id
+    ,i_item_desc
+    ,s_store_id
+    ,s_store_name
+    ,stddev_samp(ss_quantity)        as store_sales_quantity
+    ,stddev_samp(sr_return_quantity) as store_returns_quantity
+    ,stddev_samp(cs_quantity)        as catalog_sales_quantity
+ from
+    store_sales
+   ,store_returns
+   ,catalog_sales
+   ,date_dim             d1
+   ,date_dim             d2
+   ,date_dim             d3
+   ,store
+   ,item
+ where
+     d1.d_moy               = 4 
+ and d1.d_year              = 1999
+ and d1.d_date_sk           = ss_sold_date_sk
+ and i_item_sk              = ss_item_sk
+ and s_store_sk             = ss_store_sk
+ and ss_customer_sk         = sr_customer_sk
+ and ss_item_sk             = sr_item_sk
+ and ss_ticket_number       = sr_ticket_number
+ and sr_returned_date_sk    = d2.d_date_sk
+ and d2.d_moy               between 4 and  4 + 3 
+ and d2.d_year              = 1999
+ and sr_customer_sk         = cs_bill_customer_sk
+ and sr_item_sk             = cs_item_sk
+ and cs_sold_date_sk        = d3.d_date_sk     
+ and d3.d_year              in (1999,1999+1,1999+2)
+ group by
+    i_item_id
+   ,i_item_desc
+   ,s_store_id
+   ,s_store_name
+ order by
+    i_item_id 
+   ,i_item_desc
+   ,s_store_id
+   ,s_store_name
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q3.sql b/benchmarks/queries/tpcds/q3.sql
new file mode 100644
index 0000000000..d6a55cb8cf
--- /dev/null
+++ b/benchmarks/queries/tpcds/q3.sql
@@ -0,0 +1,22 @@
+-- SQLBench-DS query 3 derived from TPC-DS query 3 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  dt.d_year 
+       ,item.i_brand_id brand_id 
+       ,item.i_brand brand
+       ,sum(ss_net_profit) sum_agg
+ from  date_dim dt 
+      ,store_sales
+      ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+   and store_sales.ss_item_sk = item.i_item_sk
+   and item.i_manufact_id = 445
+   and dt.d_moy=12
+ group by dt.d_year
+      ,item.i_brand
+      ,item.i_brand_id
+ order by dt.d_year
+         ,sum_agg desc
+         ,brand_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q30.sql b/benchmarks/queries/tpcds/q30.sql
new file mode 100644
index 0000000000..7004078a50
--- /dev/null
+++ b/benchmarks/queries/tpcds/q30.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 30 derived from TPC-DS query 30 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with customer_total_return as
+ (select wr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+ 	sum(wr_return_amt) as ctr_total_return
+ from web_returns
+     ,date_dim
+     ,customer_address
+ where wr_returned_date_sk = d_date_sk 
+   and d_year =2000
+   and wr_returning_addr_sk = ca_address_sk 
+ group by wr_returning_customer_sk
+         ,ca_state)
+  select  c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+       ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+       ,c_last_review_date_sk,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ 			  from customer_total_return ctr2 
+                  	  where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'KS'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
+                  ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
+                  ,c_last_review_date_sk,ctr_total_return
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q31.sql b/benchmarks/queries/tpcds/q31.sql
new file mode 100644
index 0000000000..89aba18998
--- /dev/null
+++ b/benchmarks/queries/tpcds/q31.sql
@@ -0,0 +1,53 @@
+-- SQLBench-DS query 31 derived from TPC-DS query 31 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss as
+ (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales
+ from store_sales,date_dim,customer_address
+ where ss_sold_date_sk = d_date_sk
+  and ss_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year),
+ ws as
+ (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales
+ from web_sales,date_dim,customer_address
+ where ws_sold_date_sk = d_date_sk
+  and ws_bill_addr_sk=ca_address_sk
+ group by ca_county,d_qoy, d_year)
+ select 
+        ss1.ca_county
+       ,ss1.d_year
+       ,ws2.web_sales/ws1.web_sales web_q1_q2_increase
+       ,ss2.store_sales/ss1.store_sales store_q1_q2_increase
+       ,ws3.web_sales/ws2.web_sales web_q2_q3_increase
+       ,ss3.store_sales/ss2.store_sales store_q2_q3_increase
+ from
+        ss ss1
+       ,ss ss2
+       ,ss ss3
+       ,ws ws1
+       ,ws ws2
+       ,ws ws3
+ where
+    ss1.d_qoy = 1
+    and ss1.d_year = 1999
+    and ss1.ca_county = ss2.ca_county
+    and ss2.d_qoy = 2
+    and ss2.d_year = 1999
+ and ss2.ca_county = ss3.ca_county
+    and ss3.d_qoy = 3
+    and ss3.d_year = 1999
+    and ss1.ca_county = ws1.ca_county
+    and ws1.d_qoy = 1
+    and ws1.d_year = 1999
+    and ws1.ca_county = ws2.ca_county
+    and ws2.d_qoy = 2
+    and ws2.d_year = 1999
+    and ws1.ca_county = ws3.ca_county
+    and ws3.d_qoy = 3
+    and ws3.d_year =1999
+    and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end 
+       > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end
+    and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end
+       > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
+ order by ss1.ca_county;
+
diff --git a/benchmarks/queries/tpcds/q32.sql b/benchmarks/queries/tpcds/q32.sql
new file mode 100644
index 0000000000..419dcd0b05
--- /dev/null
+++ b/benchmarks/queries/tpcds/q32.sql
@@ -0,0 +1,29 @@
+-- SQLBench-DS query 32 derived from TPC-DS query 32 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  sum(cs_ext_discount_amt)  as `excess discount amount` 
+from 
+   catalog_sales 
+   ,item 
+   ,date_dim
+where
+i_manufact_id = 283
+and i_item_sk = cs_item_sk 
+and d_date between '1999-02-22' and 
+        (cast('1999-02-22' as date) + INTERVAL '90 DAYS')
+and d_date_sk = cs_sold_date_sk 
+and cs_ext_discount_amt  
+     > ( 
+         select 
+            1.3 * avg(cs_ext_discount_amt) 
+         from 
+            catalog_sales 
+           ,date_dim
+         where 
+              cs_item_sk = i_item_sk 
+          and d_date between '1999-02-22' and
+                             (cast('1999-02-22' as date) + INTERVAL '90 DAYS')
+          and d_date_sk = cs_sold_date_sk 
+      ) 
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q33.sql b/benchmarks/queries/tpcds/q33.sql
new file mode 100644
index 0000000000..1aabc472b7
--- /dev/null
+++ b/benchmarks/queries/tpcds/q33.sql
@@ -0,0 +1,76 @@
+-- SQLBench-DS query 33 derived from TPC-DS query 33 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss as (
+ select
+          i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 4
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -5 
+ group by i_manufact_id),
+ cs as (
+ select
+          i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id               in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 4
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -5 
+ group by i_manufact_id),
+ ws as (
+ select
+          i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_manufact_id               in (select
+  i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 1999
+ and     d_moy                   = 4
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -5
+ group by i_manufact_id)
+  select  i_manufact_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q34.sql b/benchmarks/queries/tpcds/q34.sql
new file mode 100644
index 0000000000..f61caa51a3
--- /dev/null
+++ b/benchmarks/queries/tpcds/q34.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 34 derived from TPC-DS query 34 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select c_last_name
+       ,c_first_name
+       ,c_salutation
+       ,c_preferred_cust_flag
+       ,ss_ticket_number
+       ,cnt from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,count(*) cnt
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28)
+    and (household_demographics.hd_buy_potential = '501-1000' or
+         household_demographics.hd_buy_potential = 'Unknown')
+    and household_demographics.hd_vehicle_count > 0
+    and (case when household_demographics.hd_vehicle_count > 0 
+	then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count 
+	else null 
+	end)  > 1.2
+    and date_dim.d_year in (2000,2000+1,2000+2)
+    and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County',
+                           'Williamson County','Williamson County','Williamson County','Williamson County')
+    group by ss_ticket_number,ss_customer_sk) dn,customer
+    where ss_customer_sk = c_customer_sk
+      and cnt between 15 and 20
+    order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number;
+
diff --git a/benchmarks/queries/tpcds/q35.sql b/benchmarks/queries/tpcds/q35.sql
new file mode 100644
index 0000000000..ba0ccf3667
--- /dev/null
+++ b/benchmarks/queries/tpcds/q35.sql
@@ -0,0 +1,59 @@
+-- SQLBench-DS query 35 derived from TPC-DS query 35 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select   
+  ca_state,
+  cd_gender,
+  cd_marital_status,
+  cd_dep_count,
+  count(*) cnt1,
+  max(cd_dep_count),
+  stddev_samp(cd_dep_count),
+  stddev_samp(cd_dep_count),
+  cd_dep_employed_count,
+  count(*) cnt2,
+  max(cd_dep_employed_count),
+  stddev_samp(cd_dep_employed_count),
+  stddev_samp(cd_dep_employed_count),
+  cd_dep_college_count,
+  count(*) cnt3,
+  max(cd_dep_college_count),
+  stddev_samp(cd_dep_college_count),
+  stddev_samp(cd_dep_college_count)
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2000 and
+                d_qoy < 4) and
+   (exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2000 and
+                  d_qoy < 4) or 
+    exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2000 and
+                  d_qoy < 4))
+ group by ca_state,
+          cd_gender,
+          cd_marital_status,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ order by ca_state,
+          cd_gender,
+          cd_marital_status,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q36.sql b/benchmarks/queries/tpcds/q36.sql
new file mode 100644
index 0000000000..889fff5d14
--- /dev/null
+++ b/benchmarks/queries/tpcds/q36.sql
@@ -0,0 +1,31 @@
+-- SQLBench-DS query 36 derived from TPC-DS query 36 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+    sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin
+   ,i_category
+   ,i_class
+   ,grouping(i_category)+grouping(i_class) as lochierarchy
+   ,rank() over (
+ 	partition by grouping(i_category)+grouping(i_class),
+ 	case when grouping(i_class) = 0 then i_category end 
+ 	order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent
+ from
+    store_sales
+   ,date_dim       d1
+   ,item
+   ,store
+ where
+    d1.d_year = 2001 
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk  = ss_item_sk 
+ and s_store_sk  = ss_store_sk
+ and s_state in ('TN','TN','TN','TN',
+                 'TN','TN','TN','TN')
+ group by rollup(i_category,i_class)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then i_category end
+  ,rank_within_parent
+   LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q37.sql b/benchmarks/queries/tpcds/q37.sql
new file mode 100644
index 0000000000..bdd12dc82e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q37.sql
@@ -0,0 +1,18 @@
+-- SQLBench-DS query 37 derived from TPC-DS query 37 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id
+       ,i_item_desc
+       ,i_current_price
+ from item, inventory, date_dim, catalog_sales
+ where i_current_price between 26 and 26 + 30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('2001-06-09' as date) and (cast('2001-06-09' as date) +  INTERVAL '60 DAYS')
+ and i_manufact_id in (744,884,722,693)
+ and inv_quantity_on_hand between 100 and 500
+ and cs_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q38.sql b/benchmarks/queries/tpcds/q38.sql
new file mode 100644
index 0000000000..03e4e07635
--- /dev/null
+++ b/benchmarks/queries/tpcds/q38.sql
@@ -0,0 +1,24 @@
+-- SQLBench-DS query 38 derived from TPC-DS query 38 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  count(*) from (
+    select distinct c_last_name, c_first_name, d_date
+    from store_sales, date_dim, customer
+          where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+      and store_sales.ss_customer_sk = customer.c_customer_sk
+      and d_month_seq between 1190 and 1190 + 11
+  intersect
+    select distinct c_last_name, c_first_name, d_date
+    from catalog_sales, date_dim, customer
+          where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+      and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+      and d_month_seq between 1190 and 1190 + 11
+  intersect
+    select distinct c_last_name, c_first_name, d_date
+    from web_sales, date_dim, customer
+          where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+      and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+      and d_month_seq between 1190 and 1190 + 11
+) hot_cust
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q39.sql b/benchmarks/queries/tpcds/q39.sql
new file mode 100644
index 0000000000..f49c223eba
--- /dev/null
+++ b/benchmarks/queries/tpcds/q39.sql
@@ -0,0 +1,55 @@
+-- SQLBench-DS query 39 derived from TPC-DS query 39 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+       ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+            ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+      from inventory
+          ,item
+          ,warehouse
+          ,date_dim
+      where inv_item_sk = i_item_sk
+        and inv_warehouse_sk = w_warehouse_sk
+        and inv_date_sk = d_date_sk
+        and d_year =2001
+      group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+        ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=1
+  and inv2.d_moy=1+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+        ,inv2.d_moy,inv2.mean, inv2.cov
+;
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+       ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+            ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean
+      from inventory
+          ,item
+          ,warehouse
+          ,date_dim
+      where inv_item_sk = i_item_sk
+        and inv_warehouse_sk = w_warehouse_sk
+        and inv_date_sk = d_date_sk
+        and d_year =2001
+      group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+        ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=1
+  and inv2.d_moy=1+1
+  and inv1.cov > 1.5
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+        ,inv2.d_moy,inv2.mean, inv2.cov
+;
+
diff --git a/benchmarks/queries/tpcds/q4.sql b/benchmarks/queries/tpcds/q4.sql
new file mode 100644
index 0000000000..08643201a5
--- /dev/null
+++ b/benchmarks/queries/tpcds/q4.sql
@@ -0,0 +1,117 @@
+-- SQLBench-DS query 4 derived from TPC-DS query 4 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
+       ,'c' sale_type
+ from customer
+     ,catalog_sales
+     ,date_dim
+ where c_customer_sk = cs_bill_customer_sk
+   and cs_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,c_preferred_cust_flag customer_preferred_cust_flag
+       ,c_birth_country customer_birth_country
+       ,c_login customer_login
+       ,c_email_address customer_email_address
+       ,d_year dyear
+       ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,c_preferred_cust_flag
+         ,c_birth_country
+         ,c_login
+         ,c_email_address
+         ,d_year
+         )
+  select  
+                  t_s_secyear.customer_id
+                 ,t_s_secyear.customer_first_name
+                 ,t_s_secyear.customer_last_name
+                 ,t_s_secyear.customer_email_address
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_c_firstyear
+     ,year_total t_c_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_c_secyear.customer_id
+   and t_s_firstyear.customer_id = t_c_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+   and t_s_firstyear.customer_id = t_w_secyear.customer_id
+   and t_s_firstyear.sale_type = 's'
+   and t_c_firstyear.sale_type = 'c'
+   and t_w_firstyear.sale_type = 'w'
+   and t_s_secyear.sale_type = 's'
+   and t_c_secyear.sale_type = 'c'
+   and t_w_secyear.sale_type = 'w'
+   and t_s_firstyear.dyear =  2001
+   and t_s_secyear.dyear = 2001+1
+   and t_c_firstyear.dyear =  2001
+   and t_c_secyear.dyear =  2001+1
+   and t_w_firstyear.dyear = 2001
+   and t_w_secyear.dyear = 2001+1
+   and t_s_firstyear.year_total > 0
+   and t_c_firstyear.year_total > 0
+   and t_w_firstyear.year_total > 0
+   and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+           > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+   and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
+           > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+ order by t_s_secyear.customer_id
+         ,t_s_secyear.customer_first_name
+         ,t_s_secyear.customer_last_name
+         ,t_s_secyear.customer_email_address
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q40.sql b/benchmarks/queries/tpcds/q40.sql
new file mode 100644
index 0000000000..7f54a9bbdf
--- /dev/null
+++ b/benchmarks/queries/tpcds/q40.sql
@@ -0,0 +1,29 @@
+-- SQLBench-DS query 40 derived from TPC-DS query 40 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   w_state
+  ,i_item_id
+  ,sum(case when (cast(d_date as date) < cast ('2002-05-18' as date)) 
+ 		then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before
+  ,sum(case when (cast(d_date as date) >= cast ('2002-05-18' as date)) 
+ 		then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after
+ from
+   catalog_sales left outer join catalog_returns on
+       (cs_order_number = cr_order_number 
+        and cs_item_sk = cr_item_sk)
+  ,warehouse 
+  ,item
+  ,date_dim
+ where
+     i_current_price between 0.99 and 1.49
+ and i_item_sk          = cs_item_sk
+ and cs_warehouse_sk    = w_warehouse_sk 
+ and cs_sold_date_sk    = d_date_sk
+ and d_date between (cast ('2002-05-18' as date) - INTERVAL '30 DAYS')
+                and (cast ('2002-05-18' as date) + INTERVAL '30 DAYS') 
+ group by
+    w_state,i_item_id
+ order by w_state,i_item_id
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q41.sql b/benchmarks/queries/tpcds/q41.sql
new file mode 100644
index 0000000000..d561cdba50
--- /dev/null
+++ b/benchmarks/queries/tpcds/q41.sql
@@ -0,0 +1,53 @@
+-- SQLBench-DS query 41 derived from TPC-DS query 41 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  distinct(i_product_name)
+ from item i1
+ where i_manufact_id between 668 and 668+40 
+   and (select count(*) as item_cnt
+        from item
+        where (i_manufact = i1.i_manufact and
+        ((i_category = 'Women' and 
+        (i_color = 'cream' or i_color = 'ghost') and 
+        (i_units = 'Ton' or i_units = 'Gross') and
+        (i_size = 'economy' or i_size = 'small')
+        ) or
+        (i_category = 'Women' and
+        (i_color = 'midnight' or i_color = 'burlywood') and
+        (i_units = 'Tsp' or i_units = 'Bundle') and
+        (i_size = 'medium' or i_size = 'extra large')
+        ) or
+        (i_category = 'Men' and
+        (i_color = 'lavender' or i_color = 'azure') and
+        (i_units = 'Each' or i_units = 'Lb') and
+        (i_size = 'large' or i_size = 'N/A')
+        ) or
+        (i_category = 'Men' and
+        (i_color = 'chocolate' or i_color = 'steel') and
+        (i_units = 'N/A' or i_units = 'Dozen') and
+        (i_size = 'economy' or i_size = 'small')
+        ))) or
+       (i_manufact = i1.i_manufact and
+        ((i_category = 'Women' and 
+        (i_color = 'floral' or i_color = 'royal') and 
+        (i_units = 'Unknown' or i_units = 'Tbl') and
+        (i_size = 'economy' or i_size = 'small')
+        ) or
+        (i_category = 'Women' and
+        (i_color = 'navy' or i_color = 'forest') and
+        (i_units = 'Bunch' or i_units = 'Dram') and
+        (i_size = 'medium' or i_size = 'extra large')
+        ) or
+        (i_category = 'Men' and
+        (i_color = 'cyan' or i_color = 'indian') and
+        (i_units = 'Carton' or i_units = 'Cup') and
+        (i_size = 'large' or i_size = 'N/A')
+        ) or
+        (i_category = 'Men' and
+        (i_color = 'coral' or i_color = 'pale') and
+        (i_units = 'Pallet' or i_units = 'Gram') and
+        (i_size = 'economy' or i_size = 'small')
+        )))) > 0
+ order by i_product_name
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q42.sql b/benchmarks/queries/tpcds/q42.sql
new file mode 100644
index 0000000000..ac91e7cc2b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q42.sql
@@ -0,0 +1,23 @@
+-- SQLBench-DS query 42 derived from TPC-DS query 42 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  dt.d_year
+ 	,item.i_category_id
+ 	,item.i_category
+ 	,sum(ss_ext_sales_price)
+ from 	date_dim dt
+ 	,store_sales
+ 	,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+ 	and store_sales.ss_item_sk = item.i_item_sk
+ 	and item.i_manager_id = 1  	
+ 	and dt.d_moy=11
+ 	and dt.d_year=1998
+ group by 	dt.d_year
+ 		,item.i_category_id
+ 		,item.i_category
+ order by       sum(ss_ext_sales_price) desc,dt.d_year
+ 		,item.i_category_id
+ 		,item.i_category
+ LIMIT 100 ;
+
diff --git a/benchmarks/queries/tpcds/q43.sql b/benchmarks/queries/tpcds/q43.sql
new file mode 100644
index 0000000000..ca09e8e77d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q43.sql
@@ -0,0 +1,20 @@
+-- SQLBench-DS query 43 derived from TPC-DS query 43 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  s_store_name, s_store_id,
+        sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then ss_sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+       s_store_sk = ss_store_sk and
+       s_gmt_offset = -5 and
+       d_year = 2000 
+ group by s_store_name, s_store_id
+ order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q44.sql b/benchmarks/queries/tpcds/q44.sql
new file mode 100644
index 0000000000..8c635cef49
--- /dev/null
+++ b/benchmarks/queries/tpcds/q44.sql
@@ -0,0 +1,36 @@
+-- SQLBench-DS query 44 derived from TPC-DS query 44 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing
+from(select *
+     from (select item_sk,rank() over (order by rank_col asc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col 
+                 from store_sales ss1
+                 where ss_store_sk = 6
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 6
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V1)V11
+     where rnk  < 11) asceding,
+    (select *
+     from (select item_sk,rank() over (order by rank_col desc) rnk
+           from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col
+                 from store_sales ss1
+                 where ss_store_sk = 6
+                 group by ss_item_sk
+                 having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col
+                                                  from store_sales
+                                                  where ss_store_sk = 6
+                                                    and ss_hdemo_sk is null
+                                                  group by ss_store_sk))V2)V21
+     where rnk  < 11) descending,
+item i1,
+item i2
+where asceding.rnk = descending.rnk 
+  and i1.i_item_sk=asceding.item_sk
+  and i2.i_item_sk=descending.item_sk
+order by asceding.rnk
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q45.sql b/benchmarks/queries/tpcds/q45.sql
new file mode 100644
index 0000000000..682cc9b54d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q45.sql
@@ -0,0 +1,21 @@
+-- SQLBench-DS query 45 derived from TPC-DS query 45 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  ca_zip, ca_city, sum(ws_sales_price)
+ from web_sales, customer, customer_address, date_dim, item
+ where ws_bill_customer_sk = c_customer_sk
+ 	and c_current_addr_sk = ca_address_sk 
+ 	and ws_item_sk = i_item_sk 
+ 	and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792')
+ 	      or 
+ 	      i_item_id in (select i_item_id
+                             from item
+                             where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
+                             )
+ 	    )
+ 	and ws_sold_date_sk = d_date_sk
+ 	and d_qoy = 2 and d_year = 2000
+ group by ca_zip, ca_city
+ order by ca_zip, ca_city
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q46.sql b/benchmarks/queries/tpcds/q46.sql
new file mode 100644
index 0000000000..81ae1d5815
--- /dev/null
+++ b/benchmarks/queries/tpcds/q46.sql
@@ -0,0 +1,36 @@
+-- SQLBench-DS query 46 derived from TPC-DS query 46 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,amt,profit 
+ from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,ca_city bought_city
+          ,sum(ss_coupon_amt) amt
+          ,sum(ss_net_profit) profit
+    from store_sales,date_dim,store,household_demographics,customer_address 
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and store_sales.ss_addr_sk = customer_address.ca_address_sk
+    and (household_demographics.hd_dep_count = 3 or
+         household_demographics.hd_vehicle_count= 1)
+    and date_dim.d_dow in (6,0)
+    and date_dim.d_year in (1999,1999+1,1999+2) 
+    and store.s_city in ('Midway','Fairview','Fairview','Midway','Fairview') 
+    group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr
+    where ss_customer_sk = c_customer_sk
+      and customer.c_current_addr_sk = current_addr.ca_address_sk
+      and current_addr.ca_city <> bought_city
+  order by c_last_name
+          ,c_first_name
+          ,ca_city
+          ,bought_city
+          ,ss_ticket_number
+   LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q47.sql b/benchmarks/queries/tpcds/q47.sql
new file mode 100644
index 0000000000..f741fe44cd
--- /dev/null
+++ b/benchmarks/queries/tpcds/q47.sql
@@ -0,0 +1,52 @@
+-- SQLBench-DS query 47 derived from TPC-DS query 47 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with v1 as(
+ select i_category, i_brand,
+        s_store_name, s_company_name,
+        d_year, d_moy,
+        sum(ss_sales_price) sum_sales,
+        avg(sum(ss_sales_price)) over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     s_store_name, s_company_name
+           order by d_year, d_moy) rn
+ from item, store_sales, date_dim, store
+ where ss_item_sk = i_item_sk and
+       ss_sold_date_sk = d_date_sk and
+       ss_store_sk = s_store_sk and
+       (
+         d_year = 2001 or
+         ( d_year = 2001-1 and d_moy =12) or
+         ( d_year = 2001+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          s_store_name, s_company_name,
+          d_year, d_moy),
+ v2 as(
+ select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name
+        ,v1.d_year
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1.s_store_name = v1_lag.s_store_name and
+       v1.s_store_name = v1_lead.s_store_name and
+       v1.s_company_name = v1_lag.s_company_name and
+       v1.s_company_name = v1_lead.s_company_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2001 and    
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, nsum
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q48.sql b/benchmarks/queries/tpcds/q48.sql
new file mode 100644
index 0000000000..fb83279b13
--- /dev/null
+++ b/benchmarks/queries/tpcds/q48.sql
@@ -0,0 +1,68 @@
+-- SQLBench-DS query 48 derived from TPC-DS query 48 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
+ where s_store_sk = ss_store_sk
+ and  ss_sold_date_sk = d_date_sk and d_year = 2001
+ and  
+ (
+  (
+   cd_demo_sk = ss_cdemo_sk
+   and 
+   cd_marital_status = 'W'
+   and 
+   cd_education_status = '2 yr Degree'
+   and 
+   ss_sales_price between 100.00 and 150.00  
+   )
+ or
+  (
+  cd_demo_sk = ss_cdemo_sk
+   and 
+   cd_marital_status = 'S'
+   and 
+   cd_education_status = 'Advanced Degree'
+   and 
+   ss_sales_price between 50.00 and 100.00   
+  )
+ or 
+ (
+  cd_demo_sk = ss_cdemo_sk
+  and 
+   cd_marital_status = 'D'
+   and 
+   cd_education_status = 'Primary'
+   and 
+   ss_sales_price between 150.00 and 200.00  
+ )
+ )
+ and
+ (
+  (
+  ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('IL', 'KY', 'OR')
+  and ss_net_profit between 0 and 2000  
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('VA', 'FL', 'AL')
+  and ss_net_profit between 150 and 3000 
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('OK', 'IA', 'TX')
+  and ss_net_profit between 50 and 25000 
+  )
+ )
+;
+
diff --git a/benchmarks/queries/tpcds/q49.sql b/benchmarks/queries/tpcds/q49.sql
new file mode 100644
index 0000000000..c97286528b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q49.sql
@@ -0,0 +1,130 @@
+-- SQLBench-DS query 49 derived from TPC-DS query 49 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  channel, item, return_ratio, return_rank, currency_rank from
+ (select
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select ws.ws_item_sk as item
+ 		,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		 web_sales ws left outer join web_returns wr 
+ 			on (ws.ws_order_number = wr.wr_order_number and 
+ 			ws.ws_item_sk = wr.wr_item_sk)
+                 ,date_dim
+ 		where 
+ 			wr.wr_return_amt > 10000 
+ 			and ws.ws_net_profit > 1
+                         and ws.ws_net_paid > 0
+                         and ws.ws_quantity > 0
+                         and ws_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+ 		group by ws.ws_item_sk
+ 	) in_web
+ ) web
+ where 
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select 
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select 
+ 		cs.cs_item_sk as item
+ 		,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/
+ 		cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		catalog_sales cs left outer join catalog_returns cr
+ 			on (cs.cs_order_number = cr.cr_order_number and 
+ 			cs.cs_item_sk = cr.cr_item_sk)
+                ,date_dim
+ 		where 
+ 			cr.cr_return_amount > 10000 
+ 			and cs.cs_net_profit > 1
+                         and cs.cs_net_paid > 0
+                         and cs.cs_quantity > 0
+                         and cs_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+                 group by cs.cs_item_sk
+ 	) in_cat
+ ) catalog
+ where 
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select 
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+ 	select 
+ 	 item
+ 	,return_ratio
+ 	,currency_ratio
+ 	,rank() over (order by return_ratio) as return_rank
+ 	,rank() over (order by currency_ratio) as currency_rank
+ 	from
+ 	(	select sts.ss_item_sk as item
+ 		,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio
+ 		,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio
+ 		from 
+ 		store_sales sts left outer join store_returns sr
+ 			on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk)
+                ,date_dim
+ 		where 
+ 			sr.sr_return_amt > 10000 
+ 			and sts.ss_net_profit > 1
+                         and sts.ss_net_paid > 0 
+                         and sts.ss_quantity > 0
+                         and ss_sold_date_sk = d_date_sk
+                         and d_year = 2000
+                         and d_moy = 12
+ 		group by sts.ss_item_sk
+ 	) in_store
+ ) store
+ where  (
+ store.return_rank <= 10
+ or 
+ store.currency_rank <= 10
+ )
+ )
+ order by 1,4,5,2
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q5.sql b/benchmarks/queries/tpcds/q5.sql
new file mode 100644
index 0000000000..4f2721634c
--- /dev/null
+++ b/benchmarks/queries/tpcds/q5.sql
@@ -0,0 +1,129 @@
+-- SQLBench-DS query 5 derived from TPC-DS query 5 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ssr as
+ (select s_store_id,
+        sum(sales_price) as sales,
+        sum(profit) as profit,
+        sum(return_amt) as returns,
+        sum(net_loss) as profit_loss
+ from
+  ( select  ss_store_sk as store_sk,
+            ss_sold_date_sk  as date_sk,
+            ss_ext_sales_price as sales_price,
+            ss_net_profit as profit,
+            cast(0 as decimal(7,2)) as return_amt,
+            cast(0 as decimal(7,2)) as net_loss
+    from store_sales
+    union all
+    select sr_store_sk as store_sk,
+           sr_returned_date_sk as date_sk,
+           cast(0 as decimal(7,2)) as sales_price,
+           cast(0 as decimal(7,2)) as profit,
+           sr_return_amt as return_amt,
+           sr_net_loss as net_loss
+    from store_returns
+   ) salesreturns,
+     date_dim,
+     store
+ where date_sk = d_date_sk
+       and d_date between cast('2001-08-04' as date) 
+                  and (cast('2001-08-04' as date) +  INTERVAL '14 DAYS')
+       and store_sk = s_store_sk
+ group by s_store_id)
+ ,
+ csr as
+ (select cp_catalog_page_id,
+        sum(sales_price) as sales,
+        sum(profit) as profit,
+        sum(return_amt) as returns,
+        sum(net_loss) as profit_loss
+ from
+  ( select  cs_catalog_page_sk as page_sk,
+            cs_sold_date_sk  as date_sk,
+            cs_ext_sales_price as sales_price,
+            cs_net_profit as profit,
+            cast(0 as decimal(7,2)) as return_amt,
+            cast(0 as decimal(7,2)) as net_loss
+    from catalog_sales
+    union all
+    select cr_catalog_page_sk as page_sk,
+           cr_returned_date_sk as date_sk,
+           cast(0 as decimal(7,2)) as sales_price,
+           cast(0 as decimal(7,2)) as profit,
+           cr_return_amount as return_amt,
+           cr_net_loss as net_loss
+    from catalog_returns
+   ) salesreturns,
+     date_dim,
+     catalog_page
+ where date_sk = d_date_sk
+       and d_date between cast('2001-08-04' as date)
+                  and (cast('2001-08-04' as date) +  INTERVAL '14 DAYS')
+       and page_sk = cp_catalog_page_sk
+ group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select web_site_id,
+        sum(sales_price) as sales,
+        sum(profit) as profit,
+        sum(return_amt) as returns,
+        sum(net_loss) as profit_loss
+ from
+  ( select  ws_web_site_sk as wsr_web_site_sk,
+            ws_sold_date_sk  as date_sk,
+            ws_ext_sales_price as sales_price,
+            ws_net_profit as profit,
+            cast(0 as decimal(7,2)) as return_amt,
+            cast(0 as decimal(7,2)) as net_loss
+    from web_sales
+    union all
+    select ws_web_site_sk as wsr_web_site_sk,
+           wr_returned_date_sk as date_sk,
+           cast(0 as decimal(7,2)) as sales_price,
+           cast(0 as decimal(7,2)) as profit,
+           wr_return_amt as return_amt,
+           wr_net_loss as net_loss
+    from web_returns left outer join web_sales on
+         ( wr_item_sk = ws_item_sk
+           and wr_order_number = ws_order_number)
+   ) salesreturns,
+     date_dim,
+     web_site
+ where date_sk = d_date_sk
+       and d_date between cast('2001-08-04' as date)
+                  and (cast('2001-08-04' as date) +  INTERVAL '14 DAYS')
+       and wsr_web_site_sk = web_site_sk
+ group by web_site_id)
+  select  channel
+        , id
+        , sum(sales) as sales
+        , sum(returns) as returns
+        , sum(profit) as profit
+ from 
+ (select 'store channel' as channel
+        , 'store' || s_store_id as id
+        , sales
+        , returns
+        , (profit - profit_loss) as profit
+ from   ssr
+ union all
+ select 'catalog channel' as channel
+        , 'catalog_page' || cp_catalog_page_id as id
+        , sales
+        , returns
+        , (profit - profit_loss) as profit
+ from  csr
+ union all
+ select 'web channel' as channel
+        , 'web_site' || web_site_id as id
+        , sales
+        , returns
+        , (profit - profit_loss) as profit
+ from   wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+         ,id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q50.sql b/benchmarks/queries/tpcds/q50.sql
new file mode 100644
index 0000000000..d3dd26a156
--- /dev/null
+++ b/benchmarks/queries/tpcds/q50.sql
@@ -0,0 +1,60 @@
+-- SQLBench-DS query 50 derived from TPC-DS query 50 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   s_store_name
+  ,s_company_id
+  ,s_street_number
+  ,s_street_name
+  ,s_street_type
+  ,s_suite_number
+  ,s_city
+  ,s_county
+  ,s_state
+  ,s_zip
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end)  as `30 days` 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and 
+                 (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end )  as `31-60 days` 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and 
+                 (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end)  as `61-90 days` 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and
+                 (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end)  as `91-120 days` 
+  ,sum(case when (sr_returned_date_sk - ss_sold_date_sk  > 120) then 1 else 0 end)  as `>120 days` 
+from
+   store_sales
+  ,store_returns
+  ,store
+  ,date_dim d1
+  ,date_dim d2
+where
+    d2.d_year = 2002
+and d2.d_moy  = 8
+and ss_ticket_number = sr_ticket_number
+and ss_item_sk = sr_item_sk
+and ss_sold_date_sk   = d1.d_date_sk
+and sr_returned_date_sk   = d2.d_date_sk
+and ss_customer_sk = sr_customer_sk
+and ss_store_sk = s_store_sk
+group by
+   s_store_name
+  ,s_company_id
+  ,s_street_number
+  ,s_street_name
+  ,s_street_type
+  ,s_suite_number
+  ,s_city
+  ,s_county
+  ,s_state
+  ,s_zip
+order by s_store_name
+        ,s_company_id
+        ,s_street_number
+        ,s_street_name
+        ,s_street_type
+        ,s_suite_number
+        ,s_city
+        ,s_county
+        ,s_state
+        ,s_zip
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q51.sql b/benchmarks/queries/tpcds/q51.sql
new file mode 100644
index 0000000000..5aeb3087b4
--- /dev/null
+++ b/benchmarks/queries/tpcds/q51.sql
@@ -0,0 +1,46 @@
+-- SQLBench-DS query 51 derived from TPC-DS query 51 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+WITH web_v1 as (
+select
+  ws_item_sk item_sk, d_date,
+  sum(sum(ws_sales_price))
+      over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from web_sales
+    ,date_dim
+where ws_sold_date_sk=d_date_sk
+  and d_month_seq between 1215 and 1215+11
+  and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+  ss_item_sk item_sk, d_date,
+  sum(sum(ss_sales_price))
+      over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales
+from store_sales
+    ,date_dim
+where ss_sold_date_sk=d_date_sk
+  and d_month_seq between 1215 and 1215+11
+  and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select  *
+from (select item_sk
+     ,d_date
+     ,web_sales
+     ,store_sales
+     ,max(web_sales)
+         over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative
+     ,max(store_sales)
+         over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative
+     from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk
+                 ,case when web.d_date is not null then web.d_date else store.d_date end d_date
+                 ,web.cume_sales web_sales
+                 ,store.cume_sales store_sales
+           from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+                                                          and web.d_date = store.d_date)
+          )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+        ,d_date
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q52.sql b/benchmarks/queries/tpcds/q52.sql
new file mode 100644
index 0000000000..b4d032baec
--- /dev/null
+++ b/benchmarks/queries/tpcds/q52.sql
@@ -0,0 +1,23 @@
+-- SQLBench-DS query 52 derived from TPC-DS query 52 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  dt.d_year
+ 	,item.i_brand_id brand_id
+ 	,item.i_brand brand
+ 	,sum(ss_ext_sales_price) ext_price
+ from date_dim dt
+     ,store_sales
+     ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+    and store_sales.ss_item_sk = item.i_item_sk
+    and item.i_manager_id = 1
+    and dt.d_moy=11
+    and dt.d_year=2000
+ group by dt.d_year
+ 	,item.i_brand
+ 	,item.i_brand_id
+ order by dt.d_year
+ 	,ext_price desc
+ 	,brand_id
+ LIMIT 100 ;
+
diff --git a/benchmarks/queries/tpcds/q53.sql b/benchmarks/queries/tpcds/q53.sql
new file mode 100644
index 0000000000..4c87797741
--- /dev/null
+++ b/benchmarks/queries/tpcds/q53.sql
@@ -0,0 +1,29 @@
+-- SQLBench-DS query 53 derived from TPC-DS query 53 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  * from 
+(select i_manufact_id,
+sum(ss_sales_price) sum_sales,
+avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ss_sold_date_sk = d_date_sk and
+ss_store_sk = s_store_sk and
+d_month_seq in (1197,1197+1,1197+2,1197+3,1197+4,1197+5,1197+6,1197+7,1197+8,1197+9,1197+10,1197+11) and
+((i_category in ('Books','Children','Electronics') and
+i_class in ('personal','portable','reference','self-help') and
+i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+		'exportiunivamalg #9','scholaramalgamalg #9'))
+or(i_category in ('Women','Music','Men') and
+i_class in ('accessories','classical','fragrances','pants') and
+i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+		'importoamalg #1')))
+group by i_manufact_id, d_qoy ) tmp1
+where case when avg_quarterly_sales > 0 
+	then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales 
+	else null end > 0.1
+order by avg_quarterly_sales,
+	 sum_sales,
+	 i_manufact_id
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q54.sql b/benchmarks/queries/tpcds/q54.sql
new file mode 100644
index 0000000000..4b382e1abe
--- /dev/null
+++ b/benchmarks/queries/tpcds/q54.sql
@@ -0,0 +1,57 @@
+-- SQLBench-DS query 54 derived from TPC-DS query 54 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with my_customers as (
+ select distinct c_customer_sk
+        , c_current_addr_sk
+ from   
+        ( select cs_sold_date_sk sold_date_sk,
+                 cs_bill_customer_sk customer_sk,
+                 cs_item_sk item_sk
+          from   catalog_sales
+          union all
+          select ws_sold_date_sk sold_date_sk,
+                 ws_bill_customer_sk customer_sk,
+                 ws_item_sk item_sk
+          from   web_sales
+         ) cs_or_ws_sales,
+         item,
+         date_dim,
+         customer
+ where   sold_date_sk = d_date_sk
+         and item_sk = i_item_sk
+         and i_category = 'Men'
+         and i_class = 'shirts'
+         and c_customer_sk = cs_or_ws_sales.customer_sk
+         and d_moy = 4
+         and d_year = 1998
+ )
+ , my_revenue as (
+ select c_customer_sk,
+        sum(ss_ext_sales_price) as revenue
+ from   my_customers,
+        store_sales,
+        customer_address,
+        store,
+        date_dim
+ where  c_current_addr_sk = ca_address_sk
+        and ca_county = s_county
+        and ca_state = s_state
+        and ss_sold_date_sk = d_date_sk
+        and c_customer_sk = ss_customer_sk
+        and d_month_seq between (select distinct d_month_seq+1
+                                 from   date_dim where d_year = 1998 and d_moy = 4)
+                           and  (select distinct d_month_seq+3
+                                 from   date_dim where d_year = 1998 and d_moy = 4)
+ group by c_customer_sk
+ )
+ , segments as
+ (select cast((revenue/50) as int) as segment
+  from   my_revenue
+ )
+  select  segment, count(*) as num_customers, segment*50 as segment_base
+ from segments
+ group by segment
+ order by segment, num_customers
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q55.sql b/benchmarks/queries/tpcds/q55.sql
new file mode 100644
index 0000000000..5dabcab05f
--- /dev/null
+++ b/benchmarks/queries/tpcds/q55.sql
@@ -0,0 +1,15 @@
+-- SQLBench-DS query 55 derived from TPC-DS query 55 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_brand_id brand_id, i_brand brand,
+ 	sum(ss_ext_sales_price) ext_price
+ from date_dim, store_sales, item
+ where d_date_sk = ss_sold_date_sk
+ 	and ss_item_sk = i_item_sk
+ 	and i_manager_id=20
+ 	and d_moy=12
+ 	and d_year=1998
+ group by i_brand, i_brand_id
+ order by ext_price desc, i_brand_id
+ LIMIT 100 ;
+
diff --git a/benchmarks/queries/tpcds/q56.sql b/benchmarks/queries/tpcds/q56.sql
new file mode 100644
index 0000000000..d877d0b8b9
--- /dev/null
+++ b/benchmarks/queries/tpcds/q56.sql
@@ -0,0 +1,70 @@
+-- SQLBench-DS query 56 derived from TPC-DS query 56 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where i_item_id in (select
+     i_item_id
+from item
+where i_color in ('powder','goldenrod','bisque'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 1998
+ and     d_moy                   = 5
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -5 
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','goldenrod','bisque'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 1998
+ and     d_moy                   = 5
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -5 
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from item
+where i_color in ('powder','goldenrod','bisque'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 1998
+ and     d_moy                   = 5
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -5
+ group by i_item_id)
+  select  i_item_id ,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by total_sales,
+          i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q57.sql b/benchmarks/queries/tpcds/q57.sql
new file mode 100644
index 0000000000..088ddc9eeb
--- /dev/null
+++ b/benchmarks/queries/tpcds/q57.sql
@@ -0,0 +1,49 @@
+-- SQLBench-DS query 57 derived from TPC-DS query 57 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with v1 as(
+ select i_category, i_brand,
+        cc_name,
+        d_year, d_moy,
+        sum(cs_sales_price) sum_sales,
+        avg(sum(cs_sales_price)) over
+          (partition by i_category, i_brand,
+                     cc_name, d_year)
+          avg_monthly_sales,
+        rank() over
+          (partition by i_category, i_brand,
+                     cc_name
+           order by d_year, d_moy) rn
+ from item, catalog_sales, date_dim, call_center
+ where cs_item_sk = i_item_sk and
+       cs_sold_date_sk = d_date_sk and
+       cc_call_center_sk= cs_call_center_sk and
+       (
+         d_year = 2000 or
+         ( d_year = 2000-1 and d_moy =12) or
+         ( d_year = 2000+1 and d_moy =1)
+       )
+ group by i_category, i_brand,
+          cc_name , d_year, d_moy),
+ v2 as(
+ select v1.cc_name
+        ,v1.d_year, v1.d_moy
+        ,v1.avg_monthly_sales
+        ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum
+ from v1, v1 v1_lag, v1 v1_lead
+ where v1.i_category = v1_lag.i_category and
+       v1.i_category = v1_lead.i_category and
+       v1.i_brand = v1_lag.i_brand and
+       v1.i_brand = v1_lead.i_brand and
+       v1. cc_name = v1_lag. cc_name and
+       v1. cc_name = v1_lead. cc_name and
+       v1.rn = v1_lag.rn + 1 and
+       v1.rn = v1_lead.rn - 1)
+  select  *
+ from v2
+ where  d_year = 2000 and
+        avg_monthly_sales > 0 and
+        case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+ order by sum_sales - avg_monthly_sales, psum
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q58.sql b/benchmarks/queries/tpcds/q58.sql
new file mode 100644
index 0000000000..05801ea4b3
--- /dev/null
+++ b/benchmarks/queries/tpcds/q58.sql
@@ -0,0 +1,66 @@
+-- SQLBench-DS query 58 derived from TPC-DS query 58 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss_items as
+ (select i_item_id item_id
+        ,sum(ss_ext_sales_price) ss_item_rev 
+ from store_sales
+     ,item
+     ,date_dim
+ where ss_item_sk = i_item_sk
+   and d_date in (select d_date
+                  from date_dim
+                  where d_week_seq = (select d_week_seq 
+                                      from date_dim
+                                      where d_date = '2000-02-12'))
+   and ss_sold_date_sk   = d_date_sk
+ group by i_item_id),
+ cs_items as
+ (select i_item_id item_id
+        ,sum(cs_ext_sales_price) cs_item_rev
+  from catalog_sales
+      ,item
+      ,date_dim
+ where cs_item_sk = i_item_sk
+  and  d_date in (select d_date
+                  from date_dim
+                  where d_week_seq = (select d_week_seq 
+                                      from date_dim
+                                      where d_date = '2000-02-12'))
+  and  cs_sold_date_sk = d_date_sk
+ group by i_item_id),
+ ws_items as
+ (select i_item_id item_id
+        ,sum(ws_ext_sales_price) ws_item_rev
+  from web_sales
+      ,item
+      ,date_dim
+ where ws_item_sk = i_item_sk
+  and  d_date in (select d_date
+                  from date_dim
+                  where d_week_seq =(select d_week_seq 
+                                     from date_dim
+                                     where d_date = '2000-02-12'))
+  and ws_sold_date_sk   = d_date_sk
+ group by i_item_id)
+  select  ss_items.item_id
+       ,ss_item_rev
+       ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev
+       ,cs_item_rev
+       ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev
+       ,ws_item_rev
+       ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev
+       ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average
+ from ss_items,cs_items,ws_items
+ where ss_items.item_id=cs_items.item_id
+   and ss_items.item_id=ws_items.item_id 
+   and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+   and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+   and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+   and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev
+   and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev
+   and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev
+ order by item_id
+         ,ss_item_rev
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q59.sql b/benchmarks/queries/tpcds/q59.sql
new file mode 100644
index 0000000000..e10c0dbf61
--- /dev/null
+++ b/benchmarks/queries/tpcds/q59.sql
@@ -0,0 +1,45 @@
+-- SQLBench-DS query 59 derived from TPC-DS query 59 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with wss as 
+ (select d_week_seq,
+        ss_store_sk,
+        sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales,
+        sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales,
+        sum(case when (d_day_name='Tuesday') then ss_sales_price else  null end) tue_sales,
+        sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales,
+        sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales,
+        sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales,
+        sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales
+ from store_sales,date_dim
+ where d_date_sk = ss_sold_date_sk
+ group by d_week_seq,ss_store_sk
+ )
+  select  s_store_name1,s_store_id1,d_week_seq1
+       ,sun_sales1/sun_sales2,mon_sales1/mon_sales2
+       ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2
+       ,fri_sales1/fri_sales2,sat_sales1/sat_sales2
+ from
+ (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1
+        ,s_store_id s_store_id1,sun_sales sun_sales1
+        ,mon_sales mon_sales1,tue_sales tue_sales1
+        ,wed_sales wed_sales1,thu_sales thu_sales1
+        ,fri_sales fri_sales1,sat_sales sat_sales1
+  from wss,store,date_dim d
+  where d.d_week_seq = wss.d_week_seq and
+        ss_store_sk = s_store_sk and 
+        d_month_seq between 1206 and 1206 + 11) y,
+ (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2
+        ,s_store_id s_store_id2,sun_sales sun_sales2
+        ,mon_sales mon_sales2,tue_sales tue_sales2
+        ,wed_sales wed_sales2,thu_sales thu_sales2
+        ,fri_sales fri_sales2,sat_sales sat_sales2
+  from wss,store,date_dim d
+  where d.d_week_seq = wss.d_week_seq and
+        ss_store_sk = s_store_sk and 
+        d_month_seq between 1206+ 12 and 1206 + 23) x
+ where s_store_id1=s_store_id2
+   and d_week_seq1=d_week_seq2-52
+ order by s_store_name1,s_store_id1,d_week_seq1
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q6.sql b/benchmarks/queries/tpcds/q6.sql
new file mode 100644
index 0000000000..098db850c8
--- /dev/null
+++ b/benchmarks/queries/tpcds/q6.sql
@@ -0,0 +1,27 @@
+-- SQLBench-DS query 6 derived from TPC-DS query 6 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  a.ca_state state, count(*) cnt
+ from customer_address a
+     ,customer c
+     ,store_sales s
+     ,date_dim d
+     ,item i
+ where       a.ca_address_sk = c.c_current_addr_sk
+ 	and c.c_customer_sk = s.ss_customer_sk
+ 	and s.ss_sold_date_sk = d.d_date_sk
+ 	and s.ss_item_sk = i.i_item_sk
+ 	and d.d_month_seq = 
+ 	     (select distinct (d_month_seq)
+ 	      from date_dim
+               where d_year = 1998
+ 	        and d_moy = 3 )
+ 	and i.i_current_price > 1.2 * 
+             (select avg(j.i_current_price) 
+ 	     from item j 
+ 	     where j.i_category = i.i_category)
+ group by a.ca_state
+ having count(*) >= 10
+ order by cnt, a.ca_state 
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q60.sql b/benchmarks/queries/tpcds/q60.sql
new file mode 100644
index 0000000000..1e088c1605
--- /dev/null
+++ b/benchmarks/queries/tpcds/q60.sql
@@ -0,0 +1,79 @@
+-- SQLBench-DS query 60 derived from TPC-DS query 60 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss as (
+ select
+          i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ 	store_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id in (select
+  i_item_id
+from
+ item
+where i_category in ('Shoes'))
+ and     ss_item_sk              = i_item_sk
+ and     ss_sold_date_sk         = d_date_sk
+ and     d_year                  = 2001
+ and     d_moy                   = 10
+ and     ss_addr_sk              = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ cs as (
+ select
+          i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ 	catalog_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from
+ item
+where i_category in ('Shoes'))
+ and     cs_item_sk              = i_item_sk
+ and     cs_sold_date_sk         = d_date_sk
+ and     d_year                  = 2001
+ and     d_moy                   = 10
+ and     cs_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6 
+ group by i_item_id),
+ ws as (
+ select
+          i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ 	web_sales,
+ 	date_dim,
+         customer_address,
+         item
+ where
+         i_item_id               in (select
+  i_item_id
+from
+ item
+where i_category in ('Shoes'))
+ and     ws_item_sk              = i_item_sk
+ and     ws_sold_date_sk         = d_date_sk
+ and     d_year                  = 2001
+ and     d_moy                   = 10
+ and     ws_bill_addr_sk         = ca_address_sk
+ and     ca_gmt_offset           = -6
+ group by i_item_id)
+  select   
+  i_item_id
+,sum(total_sales) total_sales
+ from  (select * from ss 
+        union all
+        select * from cs 
+        union all
+        select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+      ,total_sales
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q61.sql b/benchmarks/queries/tpcds/q61.sql
new file mode 100644
index 0000000000..6d6c2a5fcb
--- /dev/null
+++ b/benchmarks/queries/tpcds/q61.sql
@@ -0,0 +1,45 @@
+-- SQLBench-DS query 61 derived from TPC-DS query 61 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+        ,store
+        ,promotion
+        ,date_dim
+        ,customer
+        ,customer_address 
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -6
+   and   i_category = 'Sports'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -6
+   and   d_year = 2002
+   and   d_moy  = 11) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+        ,store
+        ,date_dim
+        ,customer
+        ,customer_address
+        ,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -6
+   and   i_category = 'Sports'
+   and   s_gmt_offset = -6
+   and   d_year = 2002
+   and   d_moy  = 11) all_sales
+order by promotions, total
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q62.sql b/benchmarks/queries/tpcds/q62.sql
new file mode 100644
index 0000000000..d0138e057b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q62.sql
@@ -0,0 +1,36 @@
+-- SQLBench-DS query 62 derived from TPC-DS query 62 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,web_name
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end)  as `30 days` 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and 
+                 (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end )  as `31-60 days` 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and 
+                 (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end)  as `61-90 days` 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and
+                 (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end)  as `91-120 days` 
+  ,sum(case when (ws_ship_date_sk - ws_sold_date_sk  > 120) then 1 else 0 end)  as `>120 days` 
+from
+   web_sales
+  ,warehouse
+  ,ship_mode
+  ,web_site
+  ,date_dim
+where
+    d_month_seq between 1217 and 1217 + 11
+and ws_ship_date_sk   = d_date_sk
+and ws_warehouse_sk   = w_warehouse_sk
+and ws_ship_mode_sk   = sm_ship_mode_sk
+and ws_web_site_sk    = web_site_sk
+group by
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,web_name
+order by substr(w_warehouse_name,1,20)
+        ,sm_type
+       ,web_name
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q63.sql b/benchmarks/queries/tpcds/q63.sql
new file mode 100644
index 0000000000..3d85a2e38b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q63.sql
@@ -0,0 +1,30 @@
+-- SQLBench-DS query 63 derived from TPC-DS query 63 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  * 
+from (select i_manager_id
+             ,sum(ss_sales_price) sum_sales
+             ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales
+      from item
+          ,store_sales
+          ,date_dim
+          ,store
+      where ss_item_sk = i_item_sk
+        and ss_sold_date_sk = d_date_sk
+        and ss_store_sk = s_store_sk
+        and d_month_seq in (1181,1181+1,1181+2,1181+3,1181+4,1181+5,1181+6,1181+7,1181+8,1181+9,1181+10,1181+11)
+        and ((    i_category in ('Books','Children','Electronics')
+              and i_class in ('personal','portable','reference','self-help')
+              and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7',
+		                  'exportiunivamalg #9','scholaramalgamalg #9'))
+           or(    i_category in ('Women','Music','Men')
+              and i_class in ('accessories','classical','fragrances','pants')
+              and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1',
+		                 'importoamalg #1')))
+group by i_manager_id, d_moy) tmp1
+where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1
+order by i_manager_id
+        ,avg_monthly_sales
+        ,sum_sales
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q64.sql b/benchmarks/queries/tpcds/q64.sql
new file mode 100644
index 0000000000..0350cdc7d0
--- /dev/null
+++ b/benchmarks/queries/tpcds/q64.sql
@@ -0,0 +1,122 @@
+-- SQLBench-DS query 64 derived from TPC-DS query 64 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with cs_ui as
+ (select cs_item_sk
+        ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund
+  from catalog_sales
+      ,catalog_returns
+  where cs_item_sk = cr_item_sk
+    and cs_order_number = cr_order_number
+  group by cs_item_sk
+  having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)),
+cross_sales as
+ (select i_product_name product_name
+     ,i_item_sk item_sk
+     ,s_store_name store_name
+     ,s_zip store_zip
+     ,ad1.ca_street_number b_street_number
+     ,ad1.ca_street_name b_street_name
+     ,ad1.ca_city b_city
+     ,ad1.ca_zip b_zip
+     ,ad2.ca_street_number c_street_number
+     ,ad2.ca_street_name c_street_name
+     ,ad2.ca_city c_city
+     ,ad2.ca_zip c_zip
+     ,d1.d_year as syear
+     ,d2.d_year as fsyear
+     ,d3.d_year s2year
+     ,count(*) cnt
+     ,sum(ss_wholesale_cost) s1
+     ,sum(ss_list_price) s2
+     ,sum(ss_coupon_amt) s3
+  FROM   store_sales
+        ,store_returns
+        ,cs_ui
+        ,date_dim d1
+        ,date_dim d2
+        ,date_dim d3
+        ,store
+        ,customer
+        ,customer_demographics cd1
+        ,customer_demographics cd2
+        ,promotion
+        ,household_demographics hd1
+        ,household_demographics hd2
+        ,customer_address ad1
+        ,customer_address ad2
+        ,income_band ib1
+        ,income_band ib2
+        ,item
+  WHERE  ss_store_sk = s_store_sk AND
+         ss_sold_date_sk = d1.d_date_sk AND
+         ss_customer_sk = c_customer_sk AND
+         ss_cdemo_sk= cd1.cd_demo_sk AND
+         ss_hdemo_sk = hd1.hd_demo_sk AND
+         ss_addr_sk = ad1.ca_address_sk and
+         ss_item_sk = i_item_sk and
+         ss_item_sk = sr_item_sk and
+         ss_ticket_number = sr_ticket_number and
+         ss_item_sk = cs_ui.cs_item_sk and
+         c_current_cdemo_sk = cd2.cd_demo_sk AND
+         c_current_hdemo_sk = hd2.hd_demo_sk AND
+         c_current_addr_sk = ad2.ca_address_sk and
+         c_first_sales_date_sk = d2.d_date_sk and
+         c_first_shipto_date_sk = d3.d_date_sk and
+         ss_promo_sk = p_promo_sk and
+         hd1.hd_income_band_sk = ib1.ib_income_band_sk and
+         hd2.hd_income_band_sk = ib2.ib_income_band_sk and
+         cd1.cd_marital_status <> cd2.cd_marital_status and
+         i_color in ('light','cyan','burnished','green','almond','smoke') and
+         i_current_price between 22 and 22 + 10 and
+         i_current_price between 22 + 1 and 22 + 15
+group by i_product_name
+       ,i_item_sk
+       ,s_store_name
+       ,s_zip
+       ,ad1.ca_street_number
+       ,ad1.ca_street_name
+       ,ad1.ca_city
+       ,ad1.ca_zip
+       ,ad2.ca_street_number
+       ,ad2.ca_street_name
+       ,ad2.ca_city
+       ,ad2.ca_zip
+       ,d1.d_year
+       ,d2.d_year
+       ,d3.d_year
+)
+select cs1.product_name
+     ,cs1.store_name
+     ,cs1.store_zip
+     ,cs1.b_street_number
+     ,cs1.b_street_name
+     ,cs1.b_city
+     ,cs1.b_zip
+     ,cs1.c_street_number
+     ,cs1.c_street_name
+     ,cs1.c_city
+     ,cs1.c_zip
+     ,cs1.syear
+     ,cs1.cnt
+     ,cs1.s1 as s11
+     ,cs1.s2 as s21
+     ,cs1.s3 as s31
+     ,cs2.s1 as s12
+     ,cs2.s2 as s22
+     ,cs2.s3 as s32
+     ,cs2.syear
+     ,cs2.cnt
+from cross_sales cs1,cross_sales cs2
+where cs1.item_sk=cs2.item_sk and
+     cs1.syear = 2001 and
+     cs2.syear = 2001 + 1 and
+     cs2.cnt <= cs1.cnt and
+     cs1.store_name = cs2.store_name and
+     cs1.store_zip = cs2.store_zip
+order by cs1.product_name
+       ,cs1.store_name
+       ,cs2.cnt
+       ,cs1.s1
+       ,cs2.s1;
+
diff --git a/benchmarks/queries/tpcds/q65.sql b/benchmarks/queries/tpcds/q65.sql
new file mode 100644
index 0000000000..0c13a0debf
--- /dev/null
+++ b/benchmarks/queries/tpcds/q65.sql
@@ -0,0 +1,30 @@
+-- SQLBench-DS query 65 derived from TPC-DS query 65 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select 
+	s_store_name,
+	i_item_desc,
+	sc.revenue,
+	i_current_price,
+	i_wholesale_cost,
+	i_brand
+ from store, item,
+     (select ss_store_sk, avg(revenue) as ave
+ 	from
+ 	    (select  ss_store_sk, ss_item_sk, 
+ 		     sum(ss_sales_price) as revenue
+ 		from store_sales, date_dim
+ 		where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11
+ 		group by ss_store_sk, ss_item_sk) sa
+ 	group by ss_store_sk) sb,
+     (select  ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue
+ 	from store_sales, date_dim
+ 	where ss_sold_date_sk = d_date_sk and d_month_seq between 1186 and 1186+11
+ 	group by ss_store_sk, ss_item_sk) sc
+ where sb.ss_store_sk = sc.ss_store_sk and 
+       sc.revenue <= 0.1 * sb.ave and
+       s_store_sk = sc.ss_store_sk and
+       i_item_sk = sc.ss_item_sk
+ order by s_store_name, i_item_desc
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q66.sql b/benchmarks/queries/tpcds/q66.sql
new file mode 100644
index 0000000000..ba066a561d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q66.sql
@@ -0,0 +1,221 @@
+-- SQLBench-DS query 66 derived from TPC-DS query 66 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select   
+         w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+        ,ship_carriers
+        ,year
+ 	,sum(jan_sales) as jan_sales
+ 	,sum(feb_sales) as feb_sales
+ 	,sum(mar_sales) as mar_sales
+ 	,sum(apr_sales) as apr_sales
+ 	,sum(may_sales) as may_sales
+ 	,sum(jun_sales) as jun_sales
+ 	,sum(jul_sales) as jul_sales
+ 	,sum(aug_sales) as aug_sales
+ 	,sum(sep_sales) as sep_sales
+ 	,sum(oct_sales) as oct_sales
+ 	,sum(nov_sales) as nov_sales
+ 	,sum(dec_sales) as dec_sales
+ 	,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+ 	,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+ 	,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+ 	,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+ 	,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+ 	,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+ 	,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+ 	,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+ 	,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+ 	,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+ 	,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+ 	,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+ 	,sum(jan_net) as jan_net
+ 	,sum(feb_net) as feb_net
+ 	,sum(mar_net) as mar_net
+ 	,sum(apr_net) as apr_net
+ 	,sum(may_net) as may_net
+ 	,sum(jun_net) as jun_net
+ 	,sum(jul_net) as jul_net
+ 	,sum(aug_net) as aug_net
+ 	,sum(sep_net) as sep_net
+ 	,sum(oct_net) as oct_net
+ 	,sum(nov_net) as nov_net
+ 	,sum(dec_net) as dec_net
+ from (
+     select 
+ 	w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+ 	,'FEDEX' || ',' || 'GERMA' as ship_carriers
+       ,d_year as year
+ 	,sum(case when d_moy = 1 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as jan_sales
+ 	,sum(case when d_moy = 2 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as feb_sales
+ 	,sum(case when d_moy = 3 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as mar_sales
+ 	,sum(case when d_moy = 4 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as apr_sales
+ 	,sum(case when d_moy = 5 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as may_sales
+ 	,sum(case when d_moy = 6 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as jun_sales
+ 	,sum(case when d_moy = 7 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as jul_sales
+ 	,sum(case when d_moy = 8 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as aug_sales
+ 	,sum(case when d_moy = 9 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as sep_sales
+ 	,sum(case when d_moy = 10 
+ 		then ws_ext_list_price* ws_quantity else 0 end) as oct_sales
+ 	,sum(case when d_moy = 11
+ 		then ws_ext_list_price* ws_quantity else 0 end) as nov_sales
+ 	,sum(case when d_moy = 12
+ 		then ws_ext_list_price* ws_quantity else 0 end) as dec_sales
+ 	,sum(case when d_moy = 1 
+ 		then ws_net_profit * ws_quantity else 0 end) as jan_net
+ 	,sum(case when d_moy = 2
+ 		then ws_net_profit * ws_quantity else 0 end) as feb_net
+ 	,sum(case when d_moy = 3 
+ 		then ws_net_profit * ws_quantity else 0 end) as mar_net
+ 	,sum(case when d_moy = 4 
+ 		then ws_net_profit * ws_quantity else 0 end) as apr_net
+ 	,sum(case when d_moy = 5 
+ 		then ws_net_profit * ws_quantity else 0 end) as may_net
+ 	,sum(case when d_moy = 6 
+ 		then ws_net_profit * ws_quantity else 0 end) as jun_net
+ 	,sum(case when d_moy = 7 
+ 		then ws_net_profit * ws_quantity else 0 end) as jul_net
+ 	,sum(case when d_moy = 8 
+ 		then ws_net_profit * ws_quantity else 0 end) as aug_net
+ 	,sum(case when d_moy = 9 
+ 		then ws_net_profit * ws_quantity else 0 end) as sep_net
+ 	,sum(case when d_moy = 10 
+ 		then ws_net_profit * ws_quantity else 0 end) as oct_net
+ 	,sum(case when d_moy = 11
+ 		then ws_net_profit * ws_quantity else 0 end) as nov_net
+ 	,sum(case when d_moy = 12
+ 		then ws_net_profit * ws_quantity else 0 end) as dec_net
+     from
+          web_sales
+         ,warehouse
+         ,date_dim
+         ,time_dim
+ 	  ,ship_mode
+     where
+            ws_warehouse_sk =  w_warehouse_sk
+        and ws_sold_date_sk = d_date_sk
+        and ws_sold_time_sk = t_time_sk
+ 	and ws_ship_mode_sk = sm_ship_mode_sk
+        and d_year = 2001
+ 	and t_time between 19072 and 19072+28800 
+ 	and sm_carrier in ('FEDEX','GERMA')
+     group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+       ,d_year
+ union all
+     select 
+ 	w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+ 	,'FEDEX' || ',' || 'GERMA' as ship_carriers
+       ,d_year as year
+ 	,sum(case when d_moy = 1 
+ 		then cs_sales_price* cs_quantity else 0 end) as jan_sales
+ 	,sum(case when d_moy = 2 
+ 		then cs_sales_price* cs_quantity else 0 end) as feb_sales
+ 	,sum(case when d_moy = 3 
+ 		then cs_sales_price* cs_quantity else 0 end) as mar_sales
+ 	,sum(case when d_moy = 4 
+ 		then cs_sales_price* cs_quantity else 0 end) as apr_sales
+ 	,sum(case when d_moy = 5 
+ 		then cs_sales_price* cs_quantity else 0 end) as may_sales
+ 	,sum(case when d_moy = 6 
+ 		then cs_sales_price* cs_quantity else 0 end) as jun_sales
+ 	,sum(case when d_moy = 7 
+ 		then cs_sales_price* cs_quantity else 0 end) as jul_sales
+ 	,sum(case when d_moy = 8 
+ 		then cs_sales_price* cs_quantity else 0 end) as aug_sales
+ 	,sum(case when d_moy = 9 
+ 		then cs_sales_price* cs_quantity else 0 end) as sep_sales
+ 	,sum(case when d_moy = 10 
+ 		then cs_sales_price* cs_quantity else 0 end) as oct_sales
+ 	,sum(case when d_moy = 11
+ 		then cs_sales_price* cs_quantity else 0 end) as nov_sales
+ 	,sum(case when d_moy = 12
+ 		then cs_sales_price* cs_quantity else 0 end) as dec_sales
+ 	,sum(case when d_moy = 1 
+ 		then cs_net_paid * cs_quantity else 0 end) as jan_net
+ 	,sum(case when d_moy = 2 
+ 		then cs_net_paid * cs_quantity else 0 end) as feb_net
+ 	,sum(case when d_moy = 3 
+ 		then cs_net_paid * cs_quantity else 0 end) as mar_net
+ 	,sum(case when d_moy = 4 
+ 		then cs_net_paid * cs_quantity else 0 end) as apr_net
+ 	,sum(case when d_moy = 5 
+ 		then cs_net_paid * cs_quantity else 0 end) as may_net
+ 	,sum(case when d_moy = 6 
+ 		then cs_net_paid * cs_quantity else 0 end) as jun_net
+ 	,sum(case when d_moy = 7 
+ 		then cs_net_paid * cs_quantity else 0 end) as jul_net
+ 	,sum(case when d_moy = 8 
+ 		then cs_net_paid * cs_quantity else 0 end) as aug_net
+ 	,sum(case when d_moy = 9 
+ 		then cs_net_paid * cs_quantity else 0 end) as sep_net
+ 	,sum(case when d_moy = 10 
+ 		then cs_net_paid * cs_quantity else 0 end) as oct_net
+ 	,sum(case when d_moy = 11
+ 		then cs_net_paid * cs_quantity else 0 end) as nov_net
+ 	,sum(case when d_moy = 12
+ 		then cs_net_paid * cs_quantity else 0 end) as dec_net
+     from
+          catalog_sales
+         ,warehouse
+         ,date_dim
+         ,time_dim
+ 	 ,ship_mode
+     where
+            cs_warehouse_sk =  w_warehouse_sk
+        and cs_sold_date_sk = d_date_sk
+        and cs_sold_time_sk = t_time_sk
+ 	and cs_ship_mode_sk = sm_ship_mode_sk
+        and d_year = 2001
+ 	and t_time between 19072 AND 19072+28800 
+ 	and sm_carrier in ('FEDEX','GERMA')
+     group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+       ,d_year
+ ) x
+ group by 
+        w_warehouse_name
+ 	,w_warehouse_sq_ft
+ 	,w_city
+ 	,w_county
+ 	,w_state
+ 	,w_country
+ 	,ship_carriers
+       ,year
+ order by w_warehouse_name
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q67.sql b/benchmarks/queries/tpcds/q67.sql
new file mode 100644
index 0000000000..7d684e6745
--- /dev/null
+++ b/benchmarks/queries/tpcds/q67.sql
@@ -0,0 +1,45 @@
+-- SQLBench-DS query 67 derived from TPC-DS query 67 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  *
+from (select i_category
+            ,i_class
+            ,i_brand
+            ,i_product_name
+            ,d_year
+            ,d_qoy
+            ,d_moy
+            ,s_store_id
+            ,sumsales
+            ,rank() over (partition by i_category order by sumsales desc) rk
+      from (select i_category
+                  ,i_class
+                  ,i_brand
+                  ,i_product_name
+                  ,d_year
+                  ,d_qoy
+                  ,d_moy
+                  ,s_store_id
+                  ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales
+            from store_sales
+                ,date_dim
+                ,store
+                ,item
+       where  ss_sold_date_sk=d_date_sk
+          and ss_item_sk=i_item_sk
+          and ss_store_sk = s_store_sk
+          and d_month_seq between 1194 and 1194+11
+       group by  rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2
+where rk <= 100
+order by i_category
+        ,i_class
+        ,i_brand
+        ,i_product_name
+        ,d_year
+        ,d_qoy
+        ,d_moy
+        ,s_store_id
+        ,sumsales
+        ,rk
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q68.sql b/benchmarks/queries/tpcds/q68.sql
new file mode 100644
index 0000000000..242e0dbd93
--- /dev/null
+++ b/benchmarks/queries/tpcds/q68.sql
@@ -0,0 +1,43 @@
+-- SQLBench-DS query 68 derived from TPC-DS query 68 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  c_last_name
+       ,c_first_name
+       ,ca_city
+       ,bought_city
+       ,ss_ticket_number
+       ,extended_price
+       ,extended_tax
+       ,list_price
+ from (select ss_ticket_number
+             ,ss_customer_sk
+             ,ca_city bought_city
+             ,sum(ss_ext_sales_price) extended_price 
+             ,sum(ss_ext_list_price) list_price
+             ,sum(ss_ext_tax) extended_tax 
+       from store_sales
+           ,date_dim
+           ,store
+           ,household_demographics
+           ,customer_address 
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_store_sk = store.s_store_sk  
+        and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+        and store_sales.ss_addr_sk = customer_address.ca_address_sk
+        and date_dim.d_dom between 1 and 2 
+        and (household_demographics.hd_dep_count = 8 or
+             household_demographics.hd_vehicle_count= 3)
+        and date_dim.d_year in (2000,2000+1,2000+2)
+        and store.s_city in ('Midway','Fairview')
+       group by ss_ticket_number
+               ,ss_customer_sk
+               ,ss_addr_sk,ca_city) dn
+      ,customer
+      ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+         ,ss_ticket_number
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q69.sql b/benchmarks/queries/tpcds/q69.sql
new file mode 100644
index 0000000000..4d4030cf59
--- /dev/null
+++ b/benchmarks/queries/tpcds/q69.sql
@@ -0,0 +1,48 @@
+-- SQLBench-DS query 69 derived from TPC-DS query 69 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_state in ('IN','VA','MS') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2002 and
+                d_moy between 2 and 2+2) and
+   (not exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 2 and 2+2) and
+    not exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 2 and 2+2))
+ group by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating
+ order by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q7.sql b/benchmarks/queries/tpcds/q7.sql
new file mode 100644
index 0000000000..bb58851616
--- /dev/null
+++ b/benchmarks/queries/tpcds/q7.sql
@@ -0,0 +1,22 @@
+-- SQLBench-DS query 7 derived from TPC-DS query 7 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id, 
+        avg(ss_quantity) agg1,
+        avg(ss_list_price) agg2,
+        avg(ss_coupon_amt) agg3,
+        avg(ss_sales_price) agg4 
+ from store_sales, customer_demographics, date_dim, item, promotion
+ where ss_sold_date_sk = d_date_sk and
+       ss_item_sk = i_item_sk and
+       ss_cdemo_sk = cd_demo_sk and
+       ss_promo_sk = p_promo_sk and
+       cd_gender = 'M' and 
+       cd_marital_status = 'M' and
+       cd_education_status = '4 yr Degree' and
+       (p_channel_email = 'N' or p_channel_event = 'N') and
+       d_year = 2001 
+ group by i_item_id
+ order by i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q70.sql b/benchmarks/queries/tpcds/q70.sql
new file mode 100644
index 0000000000..a8b5f1c99f
--- /dev/null
+++ b/benchmarks/queries/tpcds/q70.sql
@@ -0,0 +1,39 @@
+-- SQLBench-DS query 70 derived from TPC-DS query 70 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+    sum(ss_net_profit) as total_sum
+   ,s_state
+   ,s_county
+   ,grouping(s_state)+grouping(s_county) as lochierarchy
+   ,rank() over (
+ 	partition by grouping(s_state)+grouping(s_county),
+ 	case when grouping(s_county) = 0 then s_state end 
+ 	order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+    store_sales
+   ,date_dim       d1
+   ,store
+ where
+    d1.d_month_seq between 1180 and 1180+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk  = ss_store_sk
+ and s_state in
+             ( select s_state
+               from  (select s_state as s_state,
+ 			    rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking
+                      from   store_sales, store, date_dim
+                      where  d_month_seq between 1180 and 1180+11
+ 			    and d_date_sk = ss_sold_date_sk
+ 			    and s_store_sk  = ss_store_sk
+                      group by s_state
+                     ) tmp1 
+               where ranking <= 5
+             )
+ group by rollup(s_state,s_county)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then s_state end
+  ,rank_within_parent
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q71.sql b/benchmarks/queries/tpcds/q71.sql
new file mode 100644
index 0000000000..90d00806b0
--- /dev/null
+++ b/benchmarks/queries/tpcds/q71.sql
@@ -0,0 +1,41 @@
+-- SQLBench-DS query 71 derived from TPC-DS query 71 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select i_brand_id brand_id, i_brand brand,t_hour,t_minute,
+ 	sum(ext_price) ext_price
+ from item, (select ws_ext_sales_price as ext_price, 
+                        ws_sold_date_sk as sold_date_sk,
+                        ws_item_sk as sold_item_sk,
+                        ws_sold_time_sk as time_sk  
+                 from web_sales,date_dim
+                 where d_date_sk = ws_sold_date_sk
+                   and d_moy=11
+                   and d_year=2001
+                 union all
+                 select cs_ext_sales_price as ext_price,
+                        cs_sold_date_sk as sold_date_sk,
+                        cs_item_sk as sold_item_sk,
+                        cs_sold_time_sk as time_sk
+                 from catalog_sales,date_dim
+                 where d_date_sk = cs_sold_date_sk
+                   and d_moy=11
+                   and d_year=2001
+                 union all
+                 select ss_ext_sales_price as ext_price,
+                        ss_sold_date_sk as sold_date_sk,
+                        ss_item_sk as sold_item_sk,
+                        ss_sold_time_sk as time_sk
+                 from store_sales,date_dim
+                 where d_date_sk = ss_sold_date_sk
+                   and d_moy=11
+                   and d_year=2001
+                 ) tmp,time_dim
+ where
+   sold_item_sk = i_item_sk
+   and i_manager_id=1
+   and time_sk = t_time_sk
+   and (t_meal_time = 'breakfast' or t_meal_time = 'dinner')
+ group by i_brand, i_brand_id,t_hour,t_minute
+ order by ext_price desc, i_brand_id
+ ;
+
diff --git a/benchmarks/queries/tpcds/q72.sql b/benchmarks/queries/tpcds/q72.sql
new file mode 100644
index 0000000000..0e31057a03
--- /dev/null
+++ b/benchmarks/queries/tpcds/q72.sql
@@ -0,0 +1,30 @@
+-- SQLBench-DS query 72 derived from TPC-DS query 72 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_desc
+      ,w_warehouse_name
+      ,d1.d_week_seq
+      ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo
+      ,sum(case when p_promo_sk is not null then 1 else 0 end) promo
+      ,count(*) total_cnt
+from catalog_sales
+join inventory on (cs_item_sk = inv_item_sk)
+join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+join item on (i_item_sk = cs_item_sk)
+join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+left outer join promotion on (cs_promo_sk=p_promo_sk)
+left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+  and inv_quantity_on_hand < cs_quantity 
+  and d3.d_date > d1.d_date + 5
+  and hd_buy_potential = '501-1000'
+  and d1.d_year = 1999
+  and cd_marital_status = 'S'
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q72_optimized.sql b/benchmarks/queries/tpcds/q72_optimized.sql
new file mode 100644
index 0000000000..a98a70e8f0
--- /dev/null
+++ b/benchmarks/queries/tpcds/q72_optimized.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 72 derived from TPC-DS query 72 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+
+-- This is a modified version of q72 that changes the join order to be sensible (the original q72
+-- intentionally has a terrible join order for testing database vendors join reordering rules)
+
+select  i_item_desc
+     ,w_warehouse_name
+     ,d1.d_week_seq
+     ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo
+     ,sum(case when p_promo_sk is not null then 1 else 0 end) promo
+     ,count(*) total_cnt
+from catalog_sales
+         join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk)
+         join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk)
+         join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk)
+         join item on (i_item_sk = cs_item_sk)
+         join inventory on (cs_item_sk = inv_item_sk)
+         join warehouse on (w_warehouse_sk=inv_warehouse_sk)
+         join date_dim d2 on (inv_date_sk = d2.d_date_sk)
+         join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk)
+         left outer join promotion on (cs_promo_sk=p_promo_sk)
+         left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number)
+where d1.d_week_seq = d2.d_week_seq
+  and inv_quantity_on_hand < cs_quantity
+  and d3.d_date > d1.d_date + 5
+  and hd_buy_potential = '501-1000'
+  and d1.d_year = 1999
+  and cd_marital_status = 'S'
+group by i_item_desc,w_warehouse_name,d1.d_week_seq
+order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
+LIMIT 100;
\ No newline at end of file
diff --git a/benchmarks/queries/tpcds/q73.sql b/benchmarks/queries/tpcds/q73.sql
new file mode 100644
index 0000000000..e7879d09ff
--- /dev/null
+++ b/benchmarks/queries/tpcds/q73.sql
@@ -0,0 +1,29 @@
+-- SQLBench-DS query 73 derived from TPC-DS query 73 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select c_last_name
+       ,c_first_name
+       ,c_salutation
+       ,c_preferred_cust_flag 
+       ,ss_ticket_number
+       ,cnt from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,count(*) cnt
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and date_dim.d_dom between 1 and 2 
+    and (household_demographics.hd_buy_potential = '1001-5000' or
+         household_demographics.hd_buy_potential = '5001-10000')
+    and household_demographics.hd_vehicle_count > 0
+    and case when household_demographics.hd_vehicle_count > 0 then 
+             household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1
+    and date_dim.d_year in (1999,1999+1,1999+2)
+    and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County')
+    group by ss_ticket_number,ss_customer_sk) dj,customer
+    where ss_customer_sk = c_customer_sk
+      and cnt between 1 and 5
+    order by cnt desc, c_last_name asc;
+
diff --git a/benchmarks/queries/tpcds/q74.sql b/benchmarks/queries/tpcds/q74.sql
new file mode 100644
index 0000000000..b9829d9d5e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q74.sql
@@ -0,0 +1,62 @@
+-- SQLBench-DS query 74 derived from TPC-DS query 74 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with year_total as (
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,d_year as year
+       ,stddev_samp(ss_net_paid) year_total
+       ,'s' sale_type
+ from customer
+     ,store_sales
+     ,date_dim
+ where c_customer_sk = ss_customer_sk
+   and ss_sold_date_sk = d_date_sk
+   and d_year in (2001,2001+1)
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,d_year
+ union all
+ select c_customer_id customer_id
+       ,c_first_name customer_first_name
+       ,c_last_name customer_last_name
+       ,d_year as year
+       ,stddev_samp(ws_net_paid) year_total
+       ,'w' sale_type
+ from customer
+     ,web_sales
+     ,date_dim
+ where c_customer_sk = ws_bill_customer_sk
+   and ws_sold_date_sk = d_date_sk
+   and d_year in (2001,2001+1)
+ group by c_customer_id
+         ,c_first_name
+         ,c_last_name
+         ,d_year
+         )
+  select 
+        t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name
+ from year_total t_s_firstyear
+     ,year_total t_s_secyear
+     ,year_total t_w_firstyear
+     ,year_total t_w_secyear
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
+         and t_s_firstyear.customer_id = t_w_secyear.customer_id
+         and t_s_firstyear.customer_id = t_w_firstyear.customer_id
+         and t_s_firstyear.sale_type = 's'
+         and t_w_firstyear.sale_type = 'w'
+         and t_s_secyear.sale_type = 's'
+         and t_w_secyear.sale_type = 'w'
+         and t_s_firstyear.year = 2001
+         and t_s_secyear.year = 2001+1
+         and t_w_firstyear.year = 2001
+         and t_w_secyear.year = 2001+1
+         and t_s_firstyear.year_total > 0
+         and t_w_firstyear.year_total > 0
+         and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
+           > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
+ order by 3,2,1
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q75.sql b/benchmarks/queries/tpcds/q75.sql
new file mode 100644
index 0000000000..cec9da56a5
--- /dev/null
+++ b/benchmarks/queries/tpcds/q75.sql
@@ -0,0 +1,71 @@
+-- SQLBench-DS query 75 derived from TPC-DS query 75 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+WITH all_sales AS (
+ SELECT d_year
+       ,i_brand_id
+       ,i_class_id
+       ,i_category_id
+       ,i_manufact_id
+       ,SUM(sales_cnt) AS sales_cnt
+       ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+             ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+       FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+                          JOIN date_dim ON d_date_sk=cs_sold_date_sk
+                          LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number 
+                                                    AND cs_item_sk=cr_item_sk)
+       WHERE i_category='Shoes'
+       UNION
+       SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+             ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+       FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+                        JOIN date_dim ON d_date_sk=ss_sold_date_sk
+                        LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number 
+                                                AND ss_item_sk=sr_item_sk)
+       WHERE i_category='Shoes'
+       UNION
+       SELECT d_year
+             ,i_brand_id
+             ,i_class_id
+             ,i_category_id
+             ,i_manufact_id
+             ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+             ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+       FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+                      JOIN date_dim ON d_date_sk=ws_sold_date_sk
+                      LEFT JOIN web_returns ON (ws_order_number=wr_order_number 
+                                            AND ws_item_sk=wr_item_sk)
+       WHERE i_category='Shoes') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT  prev_yr.d_year AS prev_year
+                          ,curr_yr.d_year AS year
+                          ,curr_yr.i_brand_id
+                          ,curr_yr.i_class_id
+                          ,curr_yr.i_category_id
+                          ,curr_yr.i_manufact_id
+                          ,prev_yr.sales_cnt AS prev_yr_cnt
+                          ,curr_yr.sales_cnt AS curr_yr_cnt
+                          ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff
+                          ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+   AND curr_yr.i_class_id=prev_yr.i_class_id
+   AND curr_yr.i_category_id=prev_yr.i_category_id
+   AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+   AND curr_yr.d_year=2000
+   AND prev_yr.d_year=2000-1
+   AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff,sales_amt_diff
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q76.sql b/benchmarks/queries/tpcds/q76.sql
new file mode 100644
index 0000000000..931a1334f6
--- /dev/null
+++ b/benchmarks/queries/tpcds/q76.sql
@@ -0,0 +1,25 @@
+-- SQLBench-DS query 76 derived from TPC-DS query 76 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM (
+        SELECT 'store' as channel, 'ss_customer_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price
+         FROM store_sales, item, date_dim
+         WHERE ss_customer_sk IS NULL
+           AND ss_sold_date_sk=d_date_sk
+           AND ss_item_sk=i_item_sk
+        UNION ALL
+        SELECT 'web' as channel, 'ws_ship_hdemo_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price
+         FROM web_sales, item, date_dim
+         WHERE ws_ship_hdemo_sk IS NULL
+           AND ws_sold_date_sk=d_date_sk
+           AND ws_item_sk=i_item_sk
+        UNION ALL
+        SELECT 'catalog' as channel, 'cs_bill_customer_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price
+         FROM catalog_sales, item, date_dim
+         WHERE cs_bill_customer_sk IS NULL
+           AND cs_sold_date_sk=d_date_sk
+           AND cs_item_sk=i_item_sk) foo
+GROUP BY channel, col_name, d_year, d_qoy, i_category
+ORDER BY channel, col_name, d_year, d_qoy, i_category
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q77.sql b/benchmarks/queries/tpcds/q77.sql
new file mode 100644
index 0000000000..d04bc14bc9
--- /dev/null
+++ b/benchmarks/queries/tpcds/q77.sql
@@ -0,0 +1,109 @@
+-- SQLBench-DS query 77 derived from TPC-DS query 77 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ss as
+ (select s_store_sk,
+         sum(ss_ext_sales_price) as sales,
+         sum(ss_net_profit) as profit
+ from store_sales,
+      date_dim,
+      store
+ where ss_sold_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date) 
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS') 
+       and ss_store_sk = s_store_sk
+ group by s_store_sk)
+ ,
+ sr as
+ (select s_store_sk,
+         sum(sr_return_amt) as returns,
+         sum(sr_net_loss) as profit_loss
+ from store_returns,
+      date_dim,
+      store
+ where sr_returned_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date)
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS')
+       and sr_store_sk = s_store_sk
+ group by s_store_sk), 
+ cs as
+ (select cs_call_center_sk,
+        sum(cs_ext_sales_price) as sales,
+        sum(cs_net_profit) as profit
+ from catalog_sales,
+      date_dim
+ where cs_sold_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date)
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS')
+ group by cs_call_center_sk 
+ ), 
+ cr as
+ (select cr_call_center_sk,
+         sum(cr_return_amount) as returns,
+         sum(cr_net_loss) as profit_loss
+ from catalog_returns,
+      date_dim
+ where cr_returned_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date)
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS')
+ group by cr_call_center_sk
+ ), 
+ ws as
+ ( select wp_web_page_sk,
+        sum(ws_ext_sales_price) as sales,
+        sum(ws_net_profit) as profit
+ from web_sales,
+      date_dim,
+      web_page
+ where ws_sold_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date)
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS')
+       and ws_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk), 
+ wr as
+ (select wp_web_page_sk,
+        sum(wr_return_amt) as returns,
+        sum(wr_net_loss) as profit_loss
+ from web_returns,
+      date_dim,
+      web_page
+ where wr_returned_date_sk = d_date_sk
+       and d_date between cast('2001-08-11' as date)
+                  and (cast('2001-08-11' as date) +  INTERVAL '30 DAYS')
+       and wr_web_page_sk = wp_web_page_sk
+ group by wp_web_page_sk)
+  select  channel
+        , id
+        , sum(sales) as sales
+        , sum(returns) as returns
+        , sum(profit) as profit
+ from 
+ (select 'store channel' as channel
+        , ss.s_store_sk as id
+        , sales
+        , coalesce(returns, 0) as returns
+        , (profit - coalesce(profit_loss,0)) as profit
+ from   ss left join sr
+        on  ss.s_store_sk = sr.s_store_sk
+ union all
+ select 'catalog channel' as channel
+        , cs_call_center_sk as id
+        , sales
+        , returns
+        , (profit - profit_loss) as profit
+ from  cs
+       , cr
+ union all
+ select 'web channel' as channel
+        , ws.wp_web_page_sk as id
+        , sales
+        , coalesce(returns, 0) returns
+        , (profit - coalesce(profit_loss,0)) as profit
+ from   ws left join wr
+        on  ws.wp_web_page_sk = wr.wp_web_page_sk
+ ) x
+ group by rollup (channel, id)
+ order by channel
+         ,id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q78.sql b/benchmarks/queries/tpcds/q78.sql
new file mode 100644
index 0000000000..927ef63561
--- /dev/null
+++ b/benchmarks/queries/tpcds/q78.sql
@@ -0,0 +1,59 @@
+-- SQLBench-DS query 78 derived from TPC-DS query 78 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ws as
+  (select d_year AS ws_sold_year, ws_item_sk,
+    ws_bill_customer_sk ws_customer_sk,
+    sum(ws_quantity) ws_qty,
+    sum(ws_wholesale_cost) ws_wc,
+    sum(ws_sales_price) ws_sp
+   from web_sales
+   left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk
+   join date_dim on ws_sold_date_sk = d_date_sk
+   where wr_order_number is null
+   group by d_year, ws_item_sk, ws_bill_customer_sk
+   ),
+cs as
+  (select d_year AS cs_sold_year, cs_item_sk,
+    cs_bill_customer_sk cs_customer_sk,
+    sum(cs_quantity) cs_qty,
+    sum(cs_wholesale_cost) cs_wc,
+    sum(cs_sales_price) cs_sp
+   from catalog_sales
+   left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk
+   join date_dim on cs_sold_date_sk = d_date_sk
+   where cr_order_number is null
+   group by d_year, cs_item_sk, cs_bill_customer_sk
+   ),
+ss as
+  (select d_year AS ss_sold_year, ss_item_sk,
+    ss_customer_sk,
+    sum(ss_quantity) ss_qty,
+    sum(ss_wholesale_cost) ss_wc,
+    sum(ss_sales_price) ss_sp
+   from store_sales
+   left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk
+   join date_dim on ss_sold_date_sk = d_date_sk
+   where sr_ticket_number is null
+   group by d_year, ss_item_sk, ss_customer_sk
+   )
+ select 
+ss_customer_sk,
+round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio,
+ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price,
+coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty,
+coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost,
+coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price
+from ss
+left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk)
+left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk)
+where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2001
+order by 
+  ss_customer_sk,
+  ss_qty desc, ss_wc desc, ss_sp desc,
+  other_chan_qty,
+  other_chan_wholesale_cost,
+  other_chan_sales_price,
+  ratio
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q79.sql b/benchmarks/queries/tpcds/q79.sql
new file mode 100644
index 0000000000..568444b152
--- /dev/null
+++ b/benchmarks/queries/tpcds/q79.sql
@@ -0,0 +1,24 @@
+-- SQLBench-DS query 79 derived from TPC-DS query 79 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select 
+  c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit
+  from
+   (select ss_ticket_number
+          ,ss_customer_sk
+          ,store.s_city
+          ,sum(ss_coupon_amt) amt
+          ,sum(ss_net_profit) profit
+    from store_sales,date_dim,store,household_demographics
+    where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+    and store_sales.ss_store_sk = store.s_store_sk  
+    and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+    and (household_demographics.hd_dep_count = 0 or household_demographics.hd_vehicle_count > 4)
+    and date_dim.d_dow = 1
+    and date_dim.d_year in (1999,1999+1,1999+2) 
+    and store.s_number_employees between 200 and 295
+    group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer
+    where ss_customer_sk = c_customer_sk
+ order by c_last_name,c_first_name,substr(s_city,1,30), profit
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q8.sql b/benchmarks/queries/tpcds/q8.sql
new file mode 100644
index 0000000000..0a994b4d21
--- /dev/null
+++ b/benchmarks/queries/tpcds/q8.sql
@@ -0,0 +1,109 @@
+-- SQLBench-DS query 8 derived from TPC-DS query 8 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  s_store_name
+      ,sum(ss_net_profit)
+ from store_sales
+     ,date_dim
+     ,store,
+     (select ca_zip
+     from (
+      SELECT substr(ca_zip,1,5) ca_zip
+      FROM customer_address
+      WHERE substr(ca_zip,1,5) IN (
+                          '19100','41548','51640','49699','88329','55986',
+                          '85119','19510','61020','95452','26235',
+                          '51102','16733','42819','27823','90192',
+                          '31905','28865','62197','23750','81398',
+                          '95288','45114','82060','12313','25218',
+                          '64386','46400','77230','69271','43672',
+                          '36521','34217','13017','27936','42766',
+                          '59233','26060','27477','39981','93402',
+                          '74270','13932','51731','71642','17710',
+                          '85156','21679','70840','67191','39214',
+                          '35273','27293','17128','15458','31615',
+                          '60706','67657','54092','32775','14683',
+                          '32206','62543','43053','11297','58216',
+                          '49410','14710','24501','79057','77038',
+                          '91286','32334','46298','18326','67213',
+                          '65382','40315','56115','80162','55956',
+                          '81583','73588','32513','62880','12201',
+                          '11592','17014','83832','61796','57872',
+                          '78829','69912','48524','22016','26905',
+                          '48511','92168','63051','25748','89786',
+                          '98827','86404','53029','37524','14039',
+                          '50078','34487','70142','18697','40129',
+                          '60642','42810','62667','57183','46414',
+                          '58463','71211','46364','34851','54884',
+                          '25382','25239','74126','21568','84204',
+                          '13607','82518','32982','36953','86001',
+                          '79278','21745','64444','35199','83181',
+                          '73255','86177','98043','90392','13882',
+                          '47084','17859','89526','42072','20233',
+                          '52745','75000','22044','77013','24182',
+                          '52554','56138','43440','86100','48791',
+                          '21883','17096','15965','31196','74903',
+                          '19810','35763','92020','55176','54433',
+                          '68063','71919','44384','16612','32109',
+                          '28207','14762','89933','10930','27616',
+                          '56809','14244','22733','33177','29784',
+                          '74968','37887','11299','34692','85843',
+                          '83663','95421','19323','17406','69264',
+                          '28341','50150','79121','73974','92917',
+                          '21229','32254','97408','46011','37169',
+                          '18146','27296','62927','68812','47734',
+                          '86572','12620','80252','50173','27261',
+                          '29534','23488','42184','23695','45868',
+                          '12910','23429','29052','63228','30731',
+                          '15747','25827','22332','62349','56661',
+                          '44652','51862','57007','22773','40361',
+                          '65238','19327','17282','44708','35484',
+                          '34064','11148','92729','22995','18833',
+                          '77528','48917','17256','93166','68576',
+                          '71096','56499','35096','80551','82424',
+                          '17700','32748','78969','46820','57725',
+                          '46179','54677','98097','62869','83959',
+                          '66728','19716','48326','27420','53458',
+                          '69056','84216','36688','63957','41469',
+                          '66843','18024','81950','21911','58387',
+                          '58103','19813','34581','55347','17171',
+                          '35914','75043','75088','80541','26802',
+                          '28849','22356','57721','77084','46385',
+                          '59255','29308','65885','70673','13306',
+                          '68788','87335','40987','31654','67560',
+                          '92309','78116','65961','45018','16548',
+                          '67092','21818','33716','49449','86150',
+                          '12156','27574','43201','50977','52839',
+                          '33234','86611','71494','17823','57172',
+                          '59869','34086','51052','11320','39717',
+                          '79604','24672','70555','38378','91135',
+                          '15567','21606','74994','77168','38607',
+                          '27384','68328','88944','40203','37893',
+                          '42726','83549','48739','55652','27543',
+                          '23109','98908','28831','45011','47525',
+                          '43870','79404','35780','42136','49317',
+                          '14574','99586','21107','14302','83882',
+                          '81272','92552','14916','87533','86518',
+                          '17862','30741','96288','57886','30304',
+                          '24201','79457','36728','49833','35182',
+                          '20108','39858','10804','47042','20439',
+                          '54708','59027','82499','75311','26548',
+                          '53406','92060','41152','60446','33129',
+                          '43979','16903','60319','35550','33887',
+                          '25463','40343','20726','44429')
+     intersect
+      select ca_zip
+      from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt
+            FROM customer_address, customer
+            WHERE ca_address_sk = c_current_addr_sk and
+                  c_preferred_cust_flag='Y'
+            group by ca_zip
+            having count(*) > 10)A1)A2) V1
+ where ss_store_sk = s_store_sk
+  and ss_sold_date_sk = d_date_sk
+  and d_qoy = 1 and d_year = 2000
+  and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2))
+ group by s_store_name
+ order by s_store_name
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q80.sql b/benchmarks/queries/tpcds/q80.sql
new file mode 100644
index 0000000000..29b2f87464
--- /dev/null
+++ b/benchmarks/queries/tpcds/q80.sql
@@ -0,0 +1,97 @@
+-- SQLBench-DS query 80 derived from TPC-DS query 80 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ssr as
+ (select  s_store_id as store_id,
+          sum(ss_ext_sales_price) as sales,
+          sum(coalesce(sr_return_amt, 0)) as returns,
+          sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit
+  from store_sales left outer join store_returns on
+         (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number),
+     date_dim,
+     store,
+     item,
+     promotion
+ where ss_sold_date_sk = d_date_sk
+       and d_date between cast('2002-08-04' as date) 
+                  and (cast('2002-08-04' as date) +  INTERVAL '30 DAYS')
+       and ss_store_sk = s_store_sk
+       and ss_item_sk = i_item_sk
+       and i_current_price > 50
+       and ss_promo_sk = p_promo_sk
+       and p_channel_tv = 'N'
+ group by s_store_id)
+ ,
+ csr as
+ (select  cp_catalog_page_id as catalog_page_id,
+          sum(cs_ext_sales_price) as sales,
+          sum(coalesce(cr_return_amount, 0)) as returns,
+          sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit
+  from catalog_sales left outer join catalog_returns on
+         (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number),
+     date_dim,
+     catalog_page,
+     item,
+     promotion
+ where cs_sold_date_sk = d_date_sk
+       and d_date between cast('2002-08-04' as date)
+                  and (cast('2002-08-04' as date) +  INTERVAL '30 DAYS')
+        and cs_catalog_page_sk = cp_catalog_page_sk
+       and cs_item_sk = i_item_sk
+       and i_current_price > 50
+       and cs_promo_sk = p_promo_sk
+       and p_channel_tv = 'N'
+group by cp_catalog_page_id)
+ ,
+ wsr as
+ (select  web_site_id,
+          sum(ws_ext_sales_price) as sales,
+          sum(coalesce(wr_return_amt, 0)) as returns,
+          sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit
+  from web_sales left outer join web_returns on
+         (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number),
+     date_dim,
+     web_site,
+     item,
+     promotion
+ where ws_sold_date_sk = d_date_sk
+       and d_date between cast('2002-08-04' as date)
+                  and (cast('2002-08-04' as date) +  INTERVAL '30 DAYS')
+        and ws_web_site_sk = web_site_sk
+       and ws_item_sk = i_item_sk
+       and i_current_price > 50
+       and ws_promo_sk = p_promo_sk
+       and p_channel_tv = 'N'
+group by web_site_id)
+  select  channel
+        , id
+        , sum(sales) as sales
+        , sum(returns) as returns
+        , sum(profit) as profit
+ from 
+ (select 'store channel' as channel
+        , 'store' || store_id as id
+        , sales
+        , returns
+        , profit
+ from   ssr
+ union all
+ select 'catalog channel' as channel
+        , 'catalog_page' || catalog_page_id as id
+        , sales
+        , returns
+        , profit
+ from  csr
+ union all
+ select 'web channel' as channel
+        , 'web_site' || web_site_id as id
+        , sales
+        , returns
+        , profit
+ from   wsr
+ ) x
+ group by rollup (channel, id)
+ order by channel
+         ,id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q81.sql b/benchmarks/queries/tpcds/q81.sql
new file mode 100644
index 0000000000..8dd4c43067
--- /dev/null
+++ b/benchmarks/queries/tpcds/q81.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 81 derived from TPC-DS query 81 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+        ,ca_state as ctr_state, 
+ 	sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+     ,date_dim
+     ,customer_address
+ where cr_returned_date_sk = d_date_sk 
+   and d_year =1998
+   and cr_returning_addr_sk = ca_address_sk 
+ group by cr_returning_customer_sk
+         ,ca_state )
+  select  c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+                   ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+                  ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+     ,customer_address
+     ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ 			  from customer_total_return ctr2 
+                  	  where ctr1.ctr_state = ctr2.ctr_state)
+       and ca_address_sk = c_current_addr_sk
+       and ca_state = 'TX'
+       and ctr1.ctr_customer_sk = c_customer_sk
+ order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+                   ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+                  ,ca_location_type,ctr_total_return
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q82.sql b/benchmarks/queries/tpcds/q82.sql
new file mode 100644
index 0000000000..faea7a2f67
--- /dev/null
+++ b/benchmarks/queries/tpcds/q82.sql
@@ -0,0 +1,18 @@
+-- SQLBench-DS query 82 derived from TPC-DS query 82 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  i_item_id
+       ,i_item_desc
+       ,i_current_price
+ from item, inventory, date_dim, store_sales
+ where i_current_price between 69 and 69+30
+ and inv_item_sk = i_item_sk
+ and d_date_sk=inv_date_sk
+ and d_date between cast('1998-06-06' as date) and (cast('1998-06-06' as date) +  INTERVAL '60 DAYS')
+ and i_manufact_id in (105,513,180,137)
+ and inv_quantity_on_hand between 100 and 500
+ and ss_item_sk = i_item_sk
+ group by i_item_id,i_item_desc,i_current_price
+ order by i_item_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q83.sql b/benchmarks/queries/tpcds/q83.sql
new file mode 100644
index 0000000000..b2512ed83e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q83.sql
@@ -0,0 +1,68 @@
+-- SQLBench-DS query 83 derived from TPC-DS query 83 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with sr_items as
+ (select i_item_id item_id,
+        sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+      item,
+      date_dim
+ where sr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+	  where d_date in ('2000-04-29','2000-09-09','2000-11-02')))
+ and   sr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+        sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+      item,
+      date_dim
+ where cr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+	  where d_date in ('2000-04-29','2000-09-09','2000-11-02')))
+ and   cr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+        sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+      item,
+      date_dim
+ where wr_item_sk = i_item_sk
+ and   d_date    in 
+	(select d_date
+	from date_dim
+	where d_week_seq in 
+		(select d_week_seq
+		from date_dim
+		where d_date in ('2000-04-29','2000-09-09','2000-11-02')))
+ and   wr_returned_date_sk   = d_date_sk
+ group by i_item_id)
+  select  sr_items.item_id
+       ,sr_item_qty
+       ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+       ,cr_item_qty
+       ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+       ,wr_item_qty
+       ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+       ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+     ,cr_items
+     ,wr_items
+ where sr_items.item_id=cr_items.item_id
+   and sr_items.item_id=wr_items.item_id 
+ order by sr_items.item_id
+         ,sr_item_qty
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q84.sql b/benchmarks/queries/tpcds/q84.sql
new file mode 100644
index 0000000000..a07249b463
--- /dev/null
+++ b/benchmarks/queries/tpcds/q84.sql
@@ -0,0 +1,22 @@
+-- SQLBench-DS query 84 derived from TPC-DS query 84 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  c_customer_id as customer_id
+       , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername
+ from customer
+     ,customer_address
+     ,customer_demographics
+     ,household_demographics
+     ,income_band
+     ,store_returns
+ where ca_city	        =  'White Oak'
+   and c_current_addr_sk = ca_address_sk
+   and ib_lower_bound   >=  45626
+   and ib_upper_bound   <=  45626 + 50000
+   and ib_income_band_sk = hd_income_band_sk
+   and cd_demo_sk = c_current_cdemo_sk
+   and hd_demo_sk = c_current_hdemo_sk
+   and sr_cdemo_sk = cd_demo_sk
+ order by c_customer_id
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q85.sql b/benchmarks/queries/tpcds/q85.sql
new file mode 100644
index 0000000000..c529acfe9e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q85.sql
@@ -0,0 +1,85 @@
+-- SQLBench-DS query 85 derived from TPC-DS query 85 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  substr(r_reason_desc,1,20)
+       ,avg(ws_quantity)
+       ,avg(wr_refunded_cash)
+       ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+      customer_demographics cd2, customer_address, date_dim, reason 
+ where ws_web_page_sk = wp_web_page_sk
+   and ws_item_sk = wr_item_sk
+   and ws_order_number = wr_order_number
+   and ws_sold_date_sk = d_date_sk and d_year = 2001
+   and cd1.cd_demo_sk = wr_refunded_cdemo_sk 
+   and cd2.cd_demo_sk = wr_returning_cdemo_sk
+   and ca_address_sk = wr_refunded_addr_sk
+   and r_reason_sk = wr_reason_sk
+   and
+   (
+    (
+     cd1.cd_marital_status = 'D'
+     and
+     cd1.cd_marital_status = cd2.cd_marital_status
+     and
+     cd1.cd_education_status = 'Primary'
+     and 
+     cd1.cd_education_status = cd2.cd_education_status
+     and
+     ws_sales_price between 100.00 and 150.00
+    )
+   or
+    (
+     cd1.cd_marital_status = 'U'
+     and
+     cd1.cd_marital_status = cd2.cd_marital_status
+     and
+     cd1.cd_education_status = 'Unknown' 
+     and
+     cd1.cd_education_status = cd2.cd_education_status
+     and
+     ws_sales_price between 50.00 and 100.00
+    )
+   or
+    (
+     cd1.cd_marital_status = 'M'
+     and
+     cd1.cd_marital_status = cd2.cd_marital_status
+     and
+     cd1.cd_education_status = 'Advanced Degree'
+     and
+     cd1.cd_education_status = cd2.cd_education_status
+     and
+     ws_sales_price between 150.00 and 200.00
+    )
+   )
+   and
+   (
+    (
+     ca_country = 'United States'
+     and
+     ca_state in ('SC', 'IN', 'VA')
+     and ws_net_profit between 100 and 200  
+    )
+    or
+    (
+     ca_country = 'United States'
+     and
+     ca_state in ('WA', 'KS', 'KY')
+     and ws_net_profit between 150 and 300  
+    )
+    or
+    (
+     ca_country = 'United States'
+     and
+     ca_state in ('SD', 'WI', 'NE')
+     and ws_net_profit between 50 and 250  
+    )
+   )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+        ,avg(ws_quantity)
+        ,avg(wr_refunded_cash)
+        ,avg(wr_fee)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q86.sql b/benchmarks/queries/tpcds/q86.sql
new file mode 100644
index 0000000000..ed7f4f85d0
--- /dev/null
+++ b/benchmarks/queries/tpcds/q86.sql
@@ -0,0 +1,27 @@
+-- SQLBench-DS query 86 derived from TPC-DS query 86 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select   
+    sum(ws_net_paid) as total_sum
+   ,i_category
+   ,i_class
+   ,grouping(i_category)+grouping(i_class) as lochierarchy
+   ,rank() over (
+ 	partition by grouping(i_category)+grouping(i_class),
+ 	case when grouping(i_class) = 0 then i_category end 
+ 	order by sum(ws_net_paid) desc) as rank_within_parent
+ from
+    web_sales
+   ,date_dim       d1
+   ,item
+ where
+    d1.d_month_seq between 1205 and 1205+11
+ and d1.d_date_sk = ws_sold_date_sk
+ and i_item_sk  = ws_item_sk
+ group by rollup(i_category,i_class)
+ order by
+   lochierarchy desc,
+   case when lochierarchy = 0 then i_category end,
+   rank_within_parent
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q87.sql b/benchmarks/queries/tpcds/q87.sql
new file mode 100644
index 0000000000..13e2d8e2e7
--- /dev/null
+++ b/benchmarks/queries/tpcds/q87.sql
@@ -0,0 +1,24 @@
+-- SQLBench-DS query 87 derived from TPC-DS query 87 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select count(*) 
+from ((select distinct c_last_name, c_first_name, d_date
+       from store_sales, date_dim, customer
+       where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+         and store_sales.ss_customer_sk = customer.c_customer_sk
+         and d_month_seq between 1189 and 1189+11)
+       except
+      (select distinct c_last_name, c_first_name, d_date
+       from catalog_sales, date_dim, customer
+       where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
+         and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
+         and d_month_seq between 1189 and 1189+11)
+       except
+      (select distinct c_last_name, c_first_name, d_date
+       from web_sales, date_dim, customer
+       where web_sales.ws_sold_date_sk = date_dim.d_date_sk
+         and web_sales.ws_bill_customer_sk = customer.c_customer_sk
+         and d_month_seq between 1189 and 1189+11)
+) cool_cust
+;
+
diff --git a/benchmarks/queries/tpcds/q88.sql b/benchmarks/queries/tpcds/q88.sql
new file mode 100644
index 0000000000..8d47334a4e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q88.sql
@@ -0,0 +1,95 @@
+-- SQLBench-DS query 88 derived from TPC-DS query 88 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk   
+     and ss_hdemo_sk = household_demographics.hd_demo_sk 
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 8
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2)) 
+     and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk 
+     and time_dim.t_hour = 9 
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 9
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10 
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10 
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk 
+     and time_dim.t_hour = 11
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 11
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 12
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or
+          (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2))
+     and store.s_store_name = 'ese') s8
+;
+
diff --git a/benchmarks/queries/tpcds/q89.sql b/benchmarks/queries/tpcds/q89.sql
new file mode 100644
index 0000000000..ac02b6fe33
--- /dev/null
+++ b/benchmarks/queries/tpcds/q89.sql
@@ -0,0 +1,29 @@
+-- SQLBench-DS query 89 derived from TPC-DS query 89 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  *
+from(
+select i_category, i_class, i_brand,
+       s_store_name, s_company_name,
+       d_moy,
+       sum(ss_sales_price) sum_sales,
+       avg(sum(ss_sales_price)) over
+         (partition by i_category, i_brand, s_store_name, s_company_name)
+         avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+      ss_sold_date_sk = d_date_sk and
+      ss_store_sk = s_store_sk and
+      d_year in (2001) and
+        ((i_category in ('Children','Jewelry','Home') and
+          i_class in ('infants','birdal','flatware')
+         )
+      or (i_category in ('Electronics','Music','Books') and
+          i_class in ('audio','classical','science') 
+        ))
+group by i_category, i_class, i_brand,
+         s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q9.sql b/benchmarks/queries/tpcds/q9.sql
new file mode 100644
index 0000000000..cf723ccf29
--- /dev/null
+++ b/benchmarks/queries/tpcds/q9.sql
@@ -0,0 +1,52 @@
+-- SQLBench-DS query 9 derived from TPC-DS query 9 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select case when (select count(*) 
+                  from store_sales 
+                  where ss_quantity between 1 and 20) > 31002
+            then (select avg(ss_ext_discount_amt) 
+                  from store_sales 
+                  where ss_quantity between 1 and 20) 
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 1 and 20) end bucket1 ,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 21 and 40) > 588
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 21 and 40) 
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 21 and 40) end bucket2,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 41 and 60) > 2456
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 41 and 60)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 41 and 60) end bucket3,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 61 and 80) > 21645
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 61 and 80)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 61 and 80) end bucket4,
+       case when (select count(*)
+                  from store_sales
+                  where ss_quantity between 81 and 100) > 20553
+            then (select avg(ss_ext_discount_amt)
+                  from store_sales
+                  where ss_quantity between 81 and 100)
+            else (select avg(ss_net_profit)
+                  from store_sales
+                  where ss_quantity between 81 and 100) end bucket5
+from reason
+where r_reason_sk = 1
+;
+
diff --git a/benchmarks/queries/tpcds/q90.sql b/benchmarks/queries/tpcds/q90.sql
new file mode 100644
index 0000000000..dedf5fd066
--- /dev/null
+++ b/benchmarks/queries/tpcds/q90.sql
@@ -0,0 +1,23 @@
+-- SQLBench-DS query 90 derived from TPC-DS query 90 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+       from web_sales, household_demographics , time_dim, web_page
+       where ws_sold_time_sk = time_dim.t_time_sk
+         and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+         and ws_web_page_sk = web_page.wp_web_page_sk
+         and time_dim.t_hour between 9 and 9+1
+         and household_demographics.hd_dep_count = 2
+         and web_page.wp_char_count between 5000 and 5200) at,
+      ( select count(*) pmc
+       from web_sales, household_demographics , time_dim, web_page
+       where ws_sold_time_sk = time_dim.t_time_sk
+         and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+         and ws_web_page_sk = web_page.wp_web_page_sk
+         and time_dim.t_hour between 15 and 15+1
+         and household_demographics.hd_dep_count = 2
+         and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+  LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q91.sql b/benchmarks/queries/tpcds/q91.sql
new file mode 100644
index 0000000000..894d41bb2b
--- /dev/null
+++ b/benchmarks/queries/tpcds/q91.sql
@@ -0,0 +1,32 @@
+-- SQLBench-DS query 91 derived from TPC-DS query 91 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+        cc_call_center_id Call_Center,
+        cc_name Call_Center_Name,
+        cc_manager Manager,
+        sum(cr_net_loss) Returns_Loss
+from
+        call_center,
+        catalog_returns,
+        date_dim,
+        customer,
+        customer_address,
+        customer_demographics,
+        household_demographics
+where
+        cr_call_center_sk       = cc_call_center_sk
+and     cr_returned_date_sk     = d_date_sk
+and     cr_returning_customer_sk= c_customer_sk
+and     cd_demo_sk              = c_current_cdemo_sk
+and     hd_demo_sk              = c_current_hdemo_sk
+and     ca_address_sk           = c_current_addr_sk
+and     d_year                  = 2002 
+and     d_moy                   = 11
+and     ( (cd_marital_status       = 'M' and cd_education_status     = 'Unknown')
+        or(cd_marital_status       = 'W' and cd_education_status     = 'Advanced Degree'))
+and     hd_buy_potential like 'Unknown%'
+and     ca_gmt_offset           = -6
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc;
+
diff --git a/benchmarks/queries/tpcds/q92.sql b/benchmarks/queries/tpcds/q92.sql
new file mode 100644
index 0000000000..171a968627
--- /dev/null
+++ b/benchmarks/queries/tpcds/q92.sql
@@ -0,0 +1,31 @@
+-- SQLBench-DS query 92 derived from TPC-DS query 92 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   sum(ws_ext_discount_amt)  as `Excess Discount Amount` 
+from 
+    web_sales 
+   ,item 
+   ,date_dim
+where
+i_manufact_id = 914
+and i_item_sk = ws_item_sk 
+and d_date between '2001-01-25' and 
+        (cast('2001-01-25' as date) + INTERVAL '90 DAYS')
+and d_date_sk = ws_sold_date_sk 
+and ws_ext_discount_amt  
+     > ( 
+         SELECT 
+            1.3 * avg(ws_ext_discount_amt) 
+         FROM 
+            web_sales 
+           ,date_dim
+         WHERE 
+              ws_item_sk = i_item_sk 
+          and d_date between '2001-01-25' and
+                             (cast('2001-01-25' as date) + INTERVAL '90 DAYS')
+          and d_date_sk = ws_sold_date_sk 
+      ) 
+order by sum(ws_ext_discount_amt)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q93.sql b/benchmarks/queries/tpcds/q93.sql
new file mode 100644
index 0000000000..31ec9e7d4e
--- /dev/null
+++ b/benchmarks/queries/tpcds/q93.sql
@@ -0,0 +1,19 @@
+-- SQLBench-DS query 93 derived from TPC-DS query 93 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  ss_customer_sk
+            ,sum(act_sales) sumsales
+      from (select ss_item_sk
+                  ,ss_ticket_number
+                  ,ss_customer_sk
+                  ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+                                                            else (ss_quantity*ss_sales_price) end act_sales
+            from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+                                                               and sr_ticket_number = ss_ticket_number)
+                ,reason
+            where sr_reason_sk = r_reason_sk
+              and r_reason_desc = 'Did not get it on time') t
+      group by ss_customer_sk
+      order by sumsales, ss_customer_sk
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q94.sql b/benchmarks/queries/tpcds/q94.sql
new file mode 100644
index 0000000000..cf04e14e0d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q94.sql
@@ -0,0 +1,30 @@
+-- SQLBench-DS query 94 derived from TPC-DS query 94 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   count(distinct ws_order_number) as `order count`
+  ,sum(ws_ext_ship_cost) as `total shipping cost`
+  ,sum(ws_net_profit) as `total net profit`
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    d_date between '1999-4-01' and 
+           (cast('1999-4-01' as date) + INTERVAL '60 DAYS')
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'WI'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+            from web_sales ws2
+            where ws1.ws_order_number = ws2.ws_order_number
+              and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+               from web_returns wr1
+               where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q95.sql b/benchmarks/queries/tpcds/q95.sql
new file mode 100644
index 0000000000..2db3e50327
--- /dev/null
+++ b/benchmarks/queries/tpcds/q95.sql
@@ -0,0 +1,33 @@
+-- SQLBench-DS query 95 derived from TPC-DS query 95 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+   and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select  
+   count(distinct ws_order_number) as `order count`
+  ,sum(ws_ext_ship_cost) as `total shipping cost`
+  ,sum(ws_net_profit) as `total net profit`
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+    d_date between '2002-5-01' and 
+           (cast('2002-5-01' as date) + INTERVAL '60 DAYS')
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'MA'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+                            from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+                            from web_returns,ws_wh
+                            where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q96.sql b/benchmarks/queries/tpcds/q96.sql
new file mode 100644
index 0000000000..63c6fdbf97
--- /dev/null
+++ b/benchmarks/queries/tpcds/q96.sql
@@ -0,0 +1,17 @@
+-- SQLBench-DS query 96 derived from TPC-DS query 96 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  count(*) 
+from store_sales
+    ,household_demographics 
+    ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk   
+    and ss_hdemo_sk = household_demographics.hd_demo_sk 
+    and ss_store_sk = s_store_sk
+    and time_dim.t_hour = 8
+    and time_dim.t_minute >= 30
+    and household_demographics.hd_dep_count = 5
+    and store.s_store_name = 'ese'
+order by count(*)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q97.sql b/benchmarks/queries/tpcds/q97.sql
new file mode 100644
index 0000000000..5741cc9c56
--- /dev/null
+++ b/benchmarks/queries/tpcds/q97.sql
@@ -0,0 +1,26 @@
+-- SQLBench-DS query 97 derived from TPC-DS query 97 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+with ssci as (
+select ss_customer_sk customer_sk
+      ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+  and d_month_seq between 1211 and 1211 + 11
+group by ss_customer_sk
+        ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+      ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+  and d_month_seq between 1211 and 1211 + 11
+group by cs_bill_customer_sk
+        ,cs_item_sk)
+ select  sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+      ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+      ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+                               and ssci.item_sk = csci.item_sk)
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpcds/q98.sql b/benchmarks/queries/tpcds/q98.sql
new file mode 100644
index 0000000000..86bf08b2a1
--- /dev/null
+++ b/benchmarks/queries/tpcds/q98.sql
@@ -0,0 +1,34 @@
+-- SQLBench-DS query 98 derived from TPC-DS query 98 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select i_item_id
+      ,i_item_desc 
+      ,i_category 
+      ,i_class 
+      ,i_current_price
+      ,sum(ss_ext_sales_price) as itemrevenue 
+      ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+          (partition by i_class) as revenueratio
+from	
+	store_sales
+    	,item 
+    	,date_dim
+where 
+	ss_item_sk = i_item_sk 
+  	and i_category in ('Shoes', 'Music', 'Men')
+  	and ss_sold_date_sk = d_date_sk
+	and d_date between cast('2000-01-05' as date) 
+				and (cast('2000-01-05' as date) + INTERVAL '30 DAYS')
+group by 
+	i_item_id
+        ,i_item_desc 
+        ,i_category
+        ,i_class
+        ,i_current_price
+order by 
+	i_category
+        ,i_class
+        ,i_item_id
+        ,i_item_desc
+        ,revenueratio;
+
diff --git a/benchmarks/queries/tpcds/q99.sql b/benchmarks/queries/tpcds/q99.sql
new file mode 100644
index 0000000000..8bd1f6406d
--- /dev/null
+++ b/benchmarks/queries/tpcds/q99.sql
@@ -0,0 +1,36 @@
+-- SQLBench-DS query 99 derived from TPC-DS query 99 under the terms of the TPC Fair Use Policy.
+-- TPC-DS queries are Copyright 2021 Transaction Processing Performance Council.
+-- This query was generated at scale factor 1.
+select  
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,cc_name
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end)  as `30 days` 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and 
+                 (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end )  as `31-60 days` 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and 
+                 (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end)  as `61-90 days` 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+                 (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end)  as `91-120 days` 
+  ,sum(case when (cs_ship_date_sk - cs_sold_date_sk  > 120) then 1 else 0 end)  as `>120 days` 
+from
+   catalog_sales
+  ,warehouse
+  ,ship_mode
+  ,call_center
+  ,date_dim
+where
+    d_month_seq between 1188 and 1188 + 11
+and cs_ship_date_sk   = d_date_sk
+and cs_warehouse_sk   = w_warehouse_sk
+and cs_ship_mode_sk   = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+   substr(w_warehouse_name,1,20)
+  ,sm_type
+  ,cc_name
+order by substr(w_warehouse_name,1,20)
+        ,sm_type
+        ,cc_name
+ LIMIT 100;
+
diff --git a/benchmarks/queries/tpch/q1.sql b/benchmarks/queries/tpch/q1.sql
new file mode 100644
index 0000000000..0dc4c3e598
--- /dev/null
+++ b/benchmarks/queries/tpch/q1.sql
@@ -0,0 +1,23 @@
+-- SQLBench-H query 1 derived from TPC-H query 1 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	l_returnflag,
+	l_linestatus,
+	sum(l_quantity) as sum_qty,
+	sum(l_extendedprice) as sum_base_price,
+	sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+	sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+	avg(l_quantity) as avg_qty,
+	avg(l_extendedprice) as avg_price,
+	avg(l_discount) as avg_disc,
+	count(*) as count_order
+from
+	lineitem
+where
+	l_shipdate <= date '1998-12-01' - interval '68 days'
+group by
+	l_returnflag,
+	l_linestatus
+order by
+	l_returnflag,
+	l_linestatus;
diff --git a/benchmarks/queries/tpch/q10.sql b/benchmarks/queries/tpch/q10.sql
new file mode 100644
index 0000000000..576338f044
--- /dev/null
+++ b/benchmarks/queries/tpch/q10.sql
@@ -0,0 +1,33 @@
+-- SQLBench-H query 10 derived from TPC-H query 10 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	c_custkey,
+	c_name,
+	sum(l_extendedprice * (1 - l_discount)) as revenue,
+	c_acctbal,
+	n_name,
+	c_address,
+	c_phone,
+	c_comment
+from
+	customer,
+	orders,
+	lineitem,
+	nation
+where
+	c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and o_orderdate >= date '1993-07-01'
+	and o_orderdate < date '1993-07-01' + interval '3' month
+	and l_returnflag = 'R'
+	and c_nationkey = n_nationkey
+group by
+	c_custkey,
+	c_name,
+	c_acctbal,
+	c_phone,
+	n_name,
+	c_address,
+	c_comment
+order by
+	revenue desc limit 20;
diff --git a/benchmarks/queries/tpch/q11.sql b/benchmarks/queries/tpch/q11.sql
new file mode 100644
index 0000000000..f4ead457b7
--- /dev/null
+++ b/benchmarks/queries/tpch/q11.sql
@@ -0,0 +1,29 @@
+-- SQLBench-H query 11 derived from TPC-H query 11 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	ps_partkey,
+	sum(ps_supplycost * ps_availqty) as value
+from
+	partsupp,
+	supplier,
+	nation
+where
+	ps_suppkey = s_suppkey
+	and s_nationkey = n_nationkey
+	and n_name = 'ALGERIA'
+group by
+	ps_partkey having
+		sum(ps_supplycost * ps_availqty) > (
+			select
+				sum(ps_supplycost * ps_availqty) * 0.0001000000
+			from
+				partsupp,
+				supplier,
+				nation
+			where
+				ps_suppkey = s_suppkey
+				and s_nationkey = n_nationkey
+				and n_name = 'ALGERIA'
+		)
+order by
+	value desc;
diff --git a/benchmarks/queries/tpch/q12.sql b/benchmarks/queries/tpch/q12.sql
new file mode 100644
index 0000000000..4ab4ea6e3b
--- /dev/null
+++ b/benchmarks/queries/tpch/q12.sql
@@ -0,0 +1,30 @@
+-- SQLBench-H query 12 derived from TPC-H query 12 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	l_shipmode,
+	sum(case
+		when o_orderpriority = '1-URGENT'
+			or o_orderpriority = '2-HIGH'
+			then 1
+		else 0
+	end) as high_line_count,
+	sum(case
+		when o_orderpriority <> '1-URGENT'
+			and o_orderpriority <> '2-HIGH'
+			then 1
+		else 0
+	end) as low_line_count
+from
+	orders,
+	lineitem
+where
+	o_orderkey = l_orderkey
+	and l_shipmode in ('FOB', 'SHIP')
+	and l_commitdate < l_receiptdate
+	and l_shipdate < l_commitdate
+	and l_receiptdate >= date '1995-01-01'
+	and l_receiptdate < date '1995-01-01' + interval '1' year
+group by
+	l_shipmode
+order by
+	l_shipmode;
diff --git a/benchmarks/queries/tpch/q13.sql b/benchmarks/queries/tpch/q13.sql
new file mode 100644
index 0000000000..301e35d193
--- /dev/null
+++ b/benchmarks/queries/tpch/q13.sql
@@ -0,0 +1,22 @@
+-- SQLBench-H query 13 derived from TPC-H query 13 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	c_count,
+	count(*) as custdist
+from
+	(
+		select
+			c_custkey,
+			count(o_orderkey)
+		from
+			customer left outer join orders on
+				c_custkey = o_custkey
+				and o_comment not like '%express%requests%'
+		group by
+			c_custkey
+	) as c_orders (c_custkey, c_count)
+group by
+	c_count
+order by
+	custdist desc,
+	c_count desc;
diff --git a/benchmarks/queries/tpch/q14.sql b/benchmarks/queries/tpch/q14.sql
new file mode 100644
index 0000000000..6040ac734c
--- /dev/null
+++ b/benchmarks/queries/tpch/q14.sql
@@ -0,0 +1,15 @@
+-- SQLBench-H query 14 derived from TPC-H query 14 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	100.00 * sum(case
+		when p_type like 'PROMO%'
+			then l_extendedprice * (1 - l_discount)
+		else 0
+	end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
+from
+	lineitem,
+	part
+where
+	l_partkey = p_partkey
+	and l_shipdate >= date '1995-02-01'
+	and l_shipdate < date '1995-02-01' + interval '1' month;
diff --git a/benchmarks/queries/tpch/q15.sql b/benchmarks/queries/tpch/q15.sql
new file mode 100644
index 0000000000..0fe03a79c0
--- /dev/null
+++ b/benchmarks/queries/tpch/q15.sql
@@ -0,0 +1,33 @@
+-- SQLBench-H query 15 derived from TPC-H query 15 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+create view revenue0 (supplier_no, total_revenue) as
+	select
+		l_suppkey,
+		sum(l_extendedprice * (1 - l_discount))
+	from
+		lineitem
+	where
+		l_shipdate >= date '1996-08-01'
+		and l_shipdate < date '1996-08-01' + interval '3' month
+	group by
+		l_suppkey;
+select
+	s_suppkey,
+	s_name,
+	s_address,
+	s_phone,
+	total_revenue
+from
+	supplier,
+	revenue0
+where
+	s_suppkey = supplier_no
+	and total_revenue = (
+		select
+			max(total_revenue)
+		from
+			revenue0
+	)
+order by
+	s_suppkey;
+drop view revenue0;
diff --git a/benchmarks/queries/tpch/q16.sql b/benchmarks/queries/tpch/q16.sql
new file mode 100644
index 0000000000..7fdf36522a
--- /dev/null
+++ b/benchmarks/queries/tpch/q16.sql
@@ -0,0 +1,32 @@
+-- SQLBench-H query 16 derived from TPC-H query 16 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	p_brand,
+	p_type,
+	p_size,
+	count(distinct ps_suppkey) as supplier_cnt
+from
+	partsupp,
+	part
+where
+	p_partkey = ps_partkey
+	and p_brand <> 'Brand#14'
+	and p_type not like 'SMALL PLATED%'
+	and p_size in (14, 6, 5, 31, 49, 15, 41, 47)
+	and ps_suppkey not in (
+		select
+			s_suppkey
+		from
+			supplier
+		where
+			s_comment like '%Customer%Complaints%'
+	)
+group by
+	p_brand,
+	p_type,
+	p_size
+order by
+	supplier_cnt desc,
+	p_brand,
+	p_type,
+	p_size;
diff --git a/benchmarks/queries/tpch/q17.sql b/benchmarks/queries/tpch/q17.sql
new file mode 100644
index 0000000000..ffa0f15c8a
--- /dev/null
+++ b/benchmarks/queries/tpch/q17.sql
@@ -0,0 +1,19 @@
+-- SQLBench-H query 17 derived from TPC-H query 17 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	sum(l_extendedprice) / 7.0 as avg_yearly
+from
+	lineitem,
+	part
+where
+	p_partkey = l_partkey
+	and p_brand = 'Brand#42'
+	and p_container = 'LG BAG'
+	and l_quantity < (
+		select
+			0.2 * avg(l_quantity)
+		from
+			lineitem
+		where
+			l_partkey = p_partkey
+	);
diff --git a/benchmarks/queries/tpch/q18.sql b/benchmarks/queries/tpch/q18.sql
new file mode 100644
index 0000000000..f4ab1945e7
--- /dev/null
+++ b/benchmarks/queries/tpch/q18.sql
@@ -0,0 +1,34 @@
+-- SQLBench-H query 18 derived from TPC-H query 18 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	c_name,
+	c_custkey,
+	o_orderkey,
+	o_orderdate,
+	o_totalprice,
+	sum(l_quantity)
+from
+	customer,
+	orders,
+	lineitem
+where
+	o_orderkey in (
+		select
+			l_orderkey
+		from
+			lineitem
+		group by
+			l_orderkey having
+				sum(l_quantity) > 313
+	)
+	and c_custkey = o_custkey
+	and o_orderkey = l_orderkey
+group by
+	c_name,
+	c_custkey,
+	o_orderkey,
+	o_orderdate,
+	o_totalprice
+order by
+	o_totalprice desc,
+	o_orderdate limit 100;
diff --git a/benchmarks/queries/tpch/q19.sql b/benchmarks/queries/tpch/q19.sql
new file mode 100644
index 0000000000..ad5fb7d929
--- /dev/null
+++ b/benchmarks/queries/tpch/q19.sql
@@ -0,0 +1,37 @@
+-- SQLBench-H query 19 derived from TPC-H query 19 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	sum(l_extendedprice* (1 - l_discount)) as revenue
+from
+	lineitem,
+	part
+where
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#21'
+		and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+		and l_quantity >= 8 and l_quantity <= 8 + 10
+		and p_size between 1 and 5
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	)
+	or
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#13'
+		and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+		and l_quantity >= 20 and l_quantity <= 20 + 10
+		and p_size between 1 and 10
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	)
+	or
+	(
+		p_partkey = l_partkey
+		and p_brand = 'Brand#52'
+		and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+		and l_quantity >= 30 and l_quantity <= 30 + 10
+		and p_size between 1 and 15
+		and l_shipmode in ('AIR', 'AIR REG')
+		and l_shipinstruct = 'DELIVER IN PERSON'
+	);
diff --git a/benchmarks/queries/tpch/q2.sql b/benchmarks/queries/tpch/q2.sql
new file mode 100644
index 0000000000..2936532889
--- /dev/null
+++ b/benchmarks/queries/tpch/q2.sql
@@ -0,0 +1,45 @@
+-- SQLBench-H query 2 derived from TPC-H query 2 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	s_acctbal,
+	s_name,
+	n_name,
+	p_partkey,
+	p_mfgr,
+	s_address,
+	s_phone,
+	s_comment
+from
+	part,
+	supplier,
+	partsupp,
+	nation,
+	region
+where
+	p_partkey = ps_partkey
+	and s_suppkey = ps_suppkey
+	and p_size = 48
+	and p_type like '%TIN'
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'ASIA'
+	and ps_supplycost = (
+		select
+			min(ps_supplycost)
+		from
+			partsupp,
+			supplier,
+			nation,
+			region
+		where
+			p_partkey = ps_partkey
+			and s_suppkey = ps_suppkey
+			and s_nationkey = n_nationkey
+			and n_regionkey = r_regionkey
+			and r_name = 'ASIA'
+	)
+order by
+	s_acctbal desc,
+	n_name,
+	s_name,
+	p_partkey limit 100;
diff --git a/benchmarks/queries/tpch/q20.sql b/benchmarks/queries/tpch/q20.sql
new file mode 100644
index 0000000000..3136ca302c
--- /dev/null
+++ b/benchmarks/queries/tpch/q20.sql
@@ -0,0 +1,39 @@
+-- SQLBench-H query 20 derived from TPC-H query 20 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	s_name,
+	s_address
+from
+	supplier,
+	nation
+where
+	s_suppkey in (
+		select
+			ps_suppkey
+		from
+			partsupp
+		where
+			ps_partkey in (
+				select
+					p_partkey
+				from
+					part
+				where
+					p_name like 'blanched%'
+			)
+			and ps_availqty > (
+				select
+					0.5 * sum(l_quantity)
+				from
+					lineitem
+				where
+					l_partkey = ps_partkey
+					and l_suppkey = ps_suppkey
+					and l_shipdate >= date '1993-01-01'
+					and l_shipdate < date '1993-01-01' + interval '1' year
+			)
+	)
+	and s_nationkey = n_nationkey
+	and n_name = 'KENYA'
+order by
+	s_name;
diff --git a/benchmarks/queries/tpch/q21.sql b/benchmarks/queries/tpch/q21.sql
new file mode 100644
index 0000000000..01704697c4
--- /dev/null
+++ b/benchmarks/queries/tpch/q21.sql
@@ -0,0 +1,41 @@
+-- SQLBench-H query 21 derived from TPC-H query 21 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	s_name,
+	count(*) as numwait
+from
+	supplier,
+	lineitem l1,
+	orders,
+	nation
+where
+	s_suppkey = l1.l_suppkey
+	and o_orderkey = l1.l_orderkey
+	and o_orderstatus = 'F'
+	and l1.l_receiptdate > l1.l_commitdate
+	and exists (
+		select
+			*
+		from
+			lineitem l2
+		where
+			l2.l_orderkey = l1.l_orderkey
+			and l2.l_suppkey <> l1.l_suppkey
+	)
+	and not exists (
+		select
+			*
+		from
+			lineitem l3
+		where
+			l3.l_orderkey = l1.l_orderkey
+			and l3.l_suppkey <> l1.l_suppkey
+			and l3.l_receiptdate > l3.l_commitdate
+	)
+	and s_nationkey = n_nationkey
+	and n_name = 'ARGENTINA'
+group by
+	s_name
+order by
+	numwait desc,
+	s_name limit 100;
diff --git a/benchmarks/queries/tpch/q22.sql b/benchmarks/queries/tpch/q22.sql
new file mode 100644
index 0000000000..8d528ef6da
--- /dev/null
+++ b/benchmarks/queries/tpch/q22.sql
@@ -0,0 +1,39 @@
+-- SQLBench-H query 22 derived from TPC-H query 22 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	cntrycode,
+	count(*) as numcust,
+	sum(c_acctbal) as totacctbal
+from
+	(
+		select
+			substring(c_phone from 1 for 2) as cntrycode,
+			c_acctbal
+		from
+			customer
+		where
+			substring(c_phone from 1 for 2) in
+				('24', '34', '16', '30', '33', '14', '13')
+			and c_acctbal > (
+				select
+					avg(c_acctbal)
+				from
+					customer
+				where
+					c_acctbal > 0.00
+					and substring(c_phone from 1 for 2) in
+						('24', '34', '16', '30', '33', '14', '13')
+			)
+			and not exists (
+				select
+					*
+				from
+					orders
+				where
+					o_custkey = c_custkey
+			)
+	) as custsale
+group by
+	cntrycode
+order by
+	cntrycode;
diff --git a/benchmarks/queries/tpch/q3.sql b/benchmarks/queries/tpch/q3.sql
new file mode 100644
index 0000000000..b60be7ff69
--- /dev/null
+++ b/benchmarks/queries/tpch/q3.sql
@@ -0,0 +1,24 @@
+-- SQLBench-H query 3 derived from TPC-H query 3 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	l_orderkey,
+	sum(l_extendedprice * (1 - l_discount)) as revenue,
+	o_orderdate,
+	o_shippriority
+from
+	customer,
+	orders,
+	lineitem
+where
+	c_mktsegment = 'BUILDING'
+	and c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and o_orderdate < date '1995-03-15'
+	and l_shipdate > date '1995-03-15'
+group by
+	l_orderkey,
+	o_orderdate,
+	o_shippriority
+order by
+	revenue desc,
+	o_orderdate limit 10;
diff --git a/benchmarks/queries/tpch/q4.sql b/benchmarks/queries/tpch/q4.sql
new file mode 100644
index 0000000000..05fae97af9
--- /dev/null
+++ b/benchmarks/queries/tpch/q4.sql
@@ -0,0 +1,23 @@
+-- SQLBench-H query 4 derived from TPC-H query 4 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	o_orderpriority,
+	count(*) as order_count
+from
+	orders
+where
+	o_orderdate >= date '1995-04-01'
+	and o_orderdate < date '1995-04-01' + interval '3' month
+	and exists (
+		select
+			*
+		from
+			lineitem
+		where
+			l_orderkey = o_orderkey
+			and l_commitdate < l_receiptdate
+	)
+group by
+	o_orderpriority
+order by
+	o_orderpriority;
diff --git a/benchmarks/queries/tpch/q5.sql b/benchmarks/queries/tpch/q5.sql
new file mode 100644
index 0000000000..4b97ef0e48
--- /dev/null
+++ b/benchmarks/queries/tpch/q5.sql
@@ -0,0 +1,26 @@
+-- SQLBench-H query 5 derived from TPC-H query 5 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	n_name,
+	sum(l_extendedprice * (1 - l_discount)) as revenue
+from
+	customer,
+	orders,
+	lineitem,
+	supplier,
+	nation,
+	region
+where
+	c_custkey = o_custkey
+	and l_orderkey = o_orderkey
+	and l_suppkey = s_suppkey
+	and c_nationkey = s_nationkey
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'AFRICA'
+	and o_orderdate >= date '1994-01-01'
+	and o_orderdate < date '1994-01-01' + interval '1' year
+group by
+	n_name
+order by
+	revenue desc;
diff --git a/benchmarks/queries/tpch/q6.sql b/benchmarks/queries/tpch/q6.sql
new file mode 100644
index 0000000000..f5b4bae70e
--- /dev/null
+++ b/benchmarks/queries/tpch/q6.sql
@@ -0,0 +1,11 @@
+-- SQLBench-H query 6 derived from TPC-H query 6 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	sum(l_extendedprice * l_discount) as revenue
+from
+	lineitem
+where
+	l_shipdate >= date '1994-01-01'
+	and l_shipdate < date '1994-01-01' + interval '1' year
+	and l_discount between 0.04 - 0.01 and 0.04 + 0.01
+	and l_quantity < 24;
diff --git a/benchmarks/queries/tpch/q7.sql b/benchmarks/queries/tpch/q7.sql
new file mode 100644
index 0000000000..f3919be2db
--- /dev/null
+++ b/benchmarks/queries/tpch/q7.sql
@@ -0,0 +1,41 @@
+-- SQLBench-H query 7 derived from TPC-H query 7 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	supp_nation,
+	cust_nation,
+	l_year,
+	sum(volume) as revenue
+from
+	(
+		select
+			n1.n_name as supp_nation,
+			n2.n_name as cust_nation,
+			extract(year from l_shipdate) as l_year,
+			l_extendedprice * (1 - l_discount) as volume
+		from
+			supplier,
+			lineitem,
+			orders,
+			customer,
+			nation n1,
+			nation n2
+		where
+			s_suppkey = l_suppkey
+			and o_orderkey = l_orderkey
+			and c_custkey = o_custkey
+			and s_nationkey = n1.n_nationkey
+			and c_nationkey = n2.n_nationkey
+			and (
+				(n1.n_name = 'GERMANY' and n2.n_name = 'IRAQ')
+				or (n1.n_name = 'IRAQ' and n2.n_name = 'GERMANY')
+			)
+			and l_shipdate between date '1995-01-01' and date '1996-12-31'
+	) as shipping
+group by
+	supp_nation,
+	cust_nation,
+	l_year
+order by
+	supp_nation,
+	cust_nation,
+	l_year;
diff --git a/benchmarks/queries/tpch/q8.sql b/benchmarks/queries/tpch/q8.sql
new file mode 100644
index 0000000000..7c53e145e4
--- /dev/null
+++ b/benchmarks/queries/tpch/q8.sql
@@ -0,0 +1,39 @@
+-- SQLBench-H query 8 derived from TPC-H query 8 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	o_year,
+	sum(case
+		when nation = 'IRAQ' then volume
+		else 0
+	end) / sum(volume) as mkt_share
+from
+	(
+		select
+			extract(year from o_orderdate) as o_year,
+			l_extendedprice * (1 - l_discount) as volume,
+			n2.n_name as nation
+		from
+			part,
+			supplier,
+			lineitem,
+			orders,
+			customer,
+			nation n1,
+			nation n2,
+			region
+		where
+			p_partkey = l_partkey
+			and s_suppkey = l_suppkey
+			and l_orderkey = o_orderkey
+			and o_custkey = c_custkey
+			and c_nationkey = n1.n_nationkey
+			and n1.n_regionkey = r_regionkey
+			and r_name = 'MIDDLE EAST'
+			and s_nationkey = n2.n_nationkey
+			and o_orderdate between date '1995-01-01' and date '1996-12-31'
+			and p_type = 'LARGE PLATED STEEL'
+	) as all_nations
+group by
+	o_year
+order by
+	o_year;
diff --git a/benchmarks/queries/tpch/q9.sql b/benchmarks/queries/tpch/q9.sql
new file mode 100644
index 0000000000..2455695618
--- /dev/null
+++ b/benchmarks/queries/tpch/q9.sql
@@ -0,0 +1,34 @@
+-- SQLBench-H query 9 derived from TPC-H query 9 under the terms of the TPC Fair Use Policy.
+-- TPC-H queries are Copyright 1993-2022 Transaction Processing Performance Council.
+select
+	nation,
+	o_year,
+	sum(amount) as sum_profit
+from
+	(
+		select
+			n_name as nation,
+			extract(year from o_orderdate) as o_year,
+			l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
+		from
+			part,
+			supplier,
+			lineitem,
+			partsupp,
+			orders,
+			nation
+		where
+			s_suppkey = l_suppkey
+			and ps_suppkey = l_suppkey
+			and ps_partkey = l_partkey
+			and p_partkey = l_partkey
+			and o_orderkey = l_orderkey
+			and s_nationkey = n_nationkey
+			and p_name like '%moccasin%'
+	) as profit
+group by
+	nation,
+	o_year
+order by
+	nation,
+	o_year desc;
diff --git a/benchmarks/run.py b/benchmarks/run.py
new file mode 100755
index 0000000000..0632764edc
--- /dev/null
+++ b/benchmarks/run.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Unified benchmark runner wrapper.
+
+Reads .conf files, merges them with precedence (profile < engine < CLI),
+then builds and executes the spark-submit command.
+
+Usage::
+
+    # = comet-tpch.sh
+    python benchmarks/run.py --engine comet --profile standalone-tpch \\
+        --restart-cluster \\
+        -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \\
+           --output . --iterations 1
+
+
+
+    # = comet-tpch-iceberg.sh (dynamic catalog via --conf)
+    python benchmarks/run.py --engine comet-iceberg --profile standalone-tpch \\
+        --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \\
+        --conf spark.sql.catalog.local.type=hadoop \\
+        --conf spark.sql.catalog.local.warehouse=$ICEBERG_WAREHOUSE \\
+        --conf spark.sql.defaultCatalog=local \\
+        --restart-cluster \\
+        -- tpc --benchmark tpch --catalog local --database tpch \\
+           --queries $TPCH_QUERIES --output . --iterations 1
+
+    # shuffle benchmark
+    python benchmarks/run.py --engine comet-jvm-shuffle --profile local \\
+        -- shuffle --benchmark shuffle-hash --data /tmp/data --mode jvm \\
+           --output . --iterations 3
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+# Allow importing from the repo root so ``from benchmarks.runner.config ...``
+# works when this script is run directly.
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+_REPO_ROOT = os.path.dirname(_SCRIPT_DIR)
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from benchmarks.runner.config import merge_configs, split_config
+
+
+def _parse_args():
+    """Parse wrapper-level arguments, splitting on ``--``."""
+    parser = argparse.ArgumentParser(
+        description="Unified benchmark runner — builds and executes spark-submit",
+        usage=(
+            "%(prog)s --engine NAME [--profile NAME] "
+            "[--conf key=value ...] [--restart-cluster] "
+            "[--dry-run] -- SUITE_ARGS..."
+        ),
+    )
+    parser.add_argument("--engine", required=True, help="Engine config name")
+    parser.add_argument("--profile", default=None, help="Profile config name")
+    parser.add_argument(
+        "--conf", action="append", default=[],
+        help="Extra key=value config override (repeatable)",
+    )
+    parser.add_argument(
+        "--restart-cluster", action="store_true",
+        help="Stop and restart Spark standalone master + worker",
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true",
+        help="Print the spark-submit command without executing it",
+    )
+
+    # Split on "--": everything before goes to this parser, everything after
+    # is passed through to the benchmark suite CLI.
+    argv = sys.argv[1:]
+    if "--" in argv:
+        sep = argv.index("--")
+        wrapper_args = argv[:sep]
+        suite_args = argv[sep + 1:]
+    else:
+        wrapper_args = argv
+        suite_args = []
+
+    args = parser.parse_args(wrapper_args)
+    args.suite_args = suite_args
+    return args
+
+
+def _resolve_conf_path(conf_dir, kind, name):
+    """Return the path to a .conf file, or exit with an error."""
+    path = os.path.join(conf_dir, kind, f"{name}.conf")
+    if not os.path.isfile(path):
+        available = sorted(
+            f.removesuffix(".conf")
+            for f in os.listdir(os.path.join(conf_dir, kind))
+            if f.endswith(".conf")
+        )
+        print(
+            f"Error: {kind} config '{name}' not found at {path}\n"
+            f"Available: {', '.join(available)}",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    return path
+
+
+def _restart_cluster():
+    """Stop and start Spark standalone master + worker."""
+    spark_home = os.environ.get("SPARK_HOME")
+    if not spark_home:
+        print("Error: SPARK_HOME must be set for --restart-cluster", file=sys.stderr)
+        sys.exit(1)
+    spark_master = os.environ.get("SPARK_MASTER")
+    if not spark_master:
+        print("Error: SPARK_MASTER must be set for --restart-cluster", file=sys.stderr)
+        sys.exit(1)
+
+    sbin = os.path.join(spark_home, "sbin")
+    print("Restarting Spark standalone cluster...")
+    subprocess.run([os.path.join(sbin, "stop-master.sh")], stderr=subprocess.DEVNULL, check=False)
+    subprocess.run([os.path.join(sbin, "stop-worker.sh")], stderr=subprocess.DEVNULL, check=False)
+    subprocess.check_call([os.path.join(sbin, "start-master.sh")])
+    subprocess.check_call([os.path.join(sbin, "start-worker.sh"), spark_master])
+
+
+def main():
+    args = _parse_args()
+    conf_dir = os.path.join(_SCRIPT_DIR, "conf")
+
+    # Resolve config file paths
+    engine_path = _resolve_conf_path(conf_dir, "engines", args.engine)
+    profile_path = (
+        _resolve_conf_path(conf_dir, "profiles", args.profile)
+        if args.profile else None
+    )
+
+    # Merge configs: profile < engine < CLI overrides
+    merged = merge_configs(
+        profile_path=profile_path,
+        engine_path=engine_path,
+        cli_overrides=args.conf,
+    )
+    spark_conf, runner_conf = split_config(merged)
+
+    # Export runner.env.* as environment variables
+    for key, value in runner_conf.items():
+        if key.startswith("env."):
+            env_var = key[len("env."):]
+            os.environ[env_var] = value
+            print(f"Exported {env_var}={value}")
+
+    # Restart cluster if requested
+    if args.restart_cluster:
+        _restart_cluster()
+
+    # Build spark-submit command
+    spark_home = os.environ.get("SPARK_HOME", "")
+    if not spark_home:
+        print("Error: SPARK_HOME must be set", file=sys.stderr)
+        sys.exit(1)
+
+    cmd = [os.path.join(spark_home, "bin", "spark-submit")]
+
+    # Master
+    master = runner_conf.get("master")
+    if master:
+        cmd += ["--master", master]
+
+    # JARs
+    jars = runner_conf.get("jars")
+    if jars:
+        cmd += ["--jars", jars]
+        cmd += ["--driver-class-path", jars.replace(",", ":")]
+
+    # Spark configs
+    for key, value in spark_conf.items():
+        cmd += ["--conf", f"{key}={value}"]
+
+    # Python script (the CLI entry point)
+    cmd.append(os.path.join(_SCRIPT_DIR, "runner", "cli.py"))
+
+    # Inject --name from runner.name if not already in suite args.
+    # Insert after the first positional arg (the subcommand) so that
+    # argparse routes it to the correct subparser.
+    runner_name = runner_conf.get("name", args.engine)
+    suite_args = list(args.suite_args)
+    if "--name" not in suite_args and suite_args:
+        suite_args.insert(1, "--name")
+        suite_args.insert(2, runner_name)
+
+    cmd += suite_args
+
+    # Print and execute
+    print()
+    print("spark-submit command:")
+    print(f"  {' '.join(cmd)}")
+    print()
+
+    if args.dry_run:
+        return
+
+    os.execvp(cmd[0], cmd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/runner/__init__.py b/benchmarks/runner/__init__.py
new file mode 100644
index 0000000000..0ccbeeeafb
--- /dev/null
+++ b/benchmarks/runner/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmarks/runner/cli.py b/benchmarks/runner/cli.py
new file mode 100644
index 0000000000..5394cf9599
--- /dev/null
+++ b/benchmarks/runner/cli.py
@@ -0,0 +1,315 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Single CLI entry point for the unified benchmark runner.
+
+Designed to be the Python script passed to ``spark-submit``.  Subcommands
+correspond to benchmark suites (currently: ``tpc``, ``shuffle``, ``micro``).
+
+Usage (via spark-submit)::
+
+    spark-submit ... benchmarks/runner/cli.py tpc --benchmark tpch --data /path ...
+    spark-submit ... benchmarks/runner/cli.py shuffle --benchmark shuffle-hash --data /path ...
+    spark-submit ... benchmarks/runner/cli.py micro --benchmark string-expressions --output ...
+"""
+
+import argparse
+import json
+import os
+import sys
+
+from benchmarks.runner.spark_session import create_session
+from benchmarks.suites import tpc
+from benchmarks.suites import shuffle
+from benchmarks.suites import micro
+
+
+# ---------------------------------------------------------------------------
+# Profiling helpers
+# ---------------------------------------------------------------------------
+
+def _maybe_start_profiler(spark, args):
+    """Start profiler if ``--profile`` was passed.  Returns profiler or None."""
+    if not getattr(args, "profile", False):
+        return None
+    from benchmarks.runner.profiling import SparkMetricsProfiler
+
+    interval = getattr(args, "profile_interval", 2.0)
+    profiler = SparkMetricsProfiler(spark, interval_secs=interval)
+    profiler.start()
+    return profiler
+
+
+def _maybe_stop_profiler(profiler, output_dir, name, benchmark):
+    """Stop profiler and write CSV if active."""
+    if profiler is None:
+        return
+    profiler.stop()
+    csv_path = os.path.join(output_dir, f"{name}-{benchmark}-metrics.csv")
+    profiler.write_csv(csv_path)
+
+
+def _add_profiling_args(parser):
+    """Add common profiling flags to a subparser."""
+    parser.add_argument(
+        "--profile", action="store_true",
+        help="Enable Level 1 JVM metrics profiling via Spark REST API",
+    )
+    parser.add_argument(
+        "--profile-interval", type=float, default=2.0,
+        help="Profiling poll interval in seconds (default: 2.0)",
+    )
+
+
+# ---------------------------------------------------------------------------
+# TPC subcommand
+# ---------------------------------------------------------------------------
+
+def _add_tpc_subparser(subparsers):
+    """Register the ``tpc`` subcommand with the same args as tpcbench.py."""
+    p = subparsers.add_parser(
+        "tpc",
+        help="Run TPC-H or TPC-DS benchmarks",
+        description="TPC-H/TPC-DS benchmark runner for files or Iceberg tables",
+    )
+    p.add_argument("--benchmark", required=True, help="tpch or tpcds")
+
+    source = p.add_mutually_exclusive_group(required=True)
+    source.add_argument("--data", help="Path to data files")
+    source.add_argument("--catalog", help="Iceberg catalog name")
+
+    p.add_argument(
+        "--format", default="parquet",
+        help="Input file format: parquet, csv, json (only with --data)",
+    )
+    p.add_argument(
+        "--options", type=json.loads, default={},
+        help='Spark reader options as JSON, e.g. \'{"header": "true"}\'',
+    )
+    p.add_argument(
+        "--database", default="tpch",
+        help="Database containing TPC tables (only with --catalog)",
+    )
+    p.add_argument("--queries", required=True, help="Path to query SQL files")
+    p.add_argument("--iterations", type=int, default=1, help="Number of iterations")
+    p.add_argument("--output", required=True, help="Directory for results JSON")
+    p.add_argument("--name", required=True, help="Prefix for result file")
+    p.add_argument("--query", type=int, help="Run a single query (1-based)")
+    p.add_argument("--write", help="Path to save query results as Parquet")
+    _add_profiling_args(p)
+
+
+def _run_tpc(args):
+    """Execute the TPC suite."""
+    spark = create_session(
+        app_name=f"{args.name} benchmark derived from {args.benchmark}",
+        spark_conf={},  # configs already set by spark-submit
+    )
+
+    profiler = _maybe_start_profiler(spark, args)
+
+    using_iceberg = tpc.register_tables(
+        spark,
+        benchmark=args.benchmark,
+        data_path=args.data,
+        catalog=args.catalog,
+        database=args.database,
+        file_format=args.format,
+        reader_options=args.options,
+    )
+
+    timings = tpc.run_queries(
+        spark,
+        benchmark=args.benchmark,
+        query_path=args.queries,
+        iterations=args.iterations,
+        query_num=args.query,
+        write_path=args.write,
+    )
+
+    results = tpc.build_results(
+        spark,
+        benchmark=args.benchmark,
+        query_path=args.queries,
+        data_path=args.data,
+        catalog=args.catalog,
+        database=args.database,
+        using_iceberg=using_iceberg,
+        name=args.name,
+        timings=timings,
+    )
+
+    tpc.write_results(results, args.output, args.name, args.benchmark)
+    _maybe_stop_profiler(profiler, args.output, args.name, args.benchmark)
+    spark.stop()
+
+
+# ---------------------------------------------------------------------------
+# Shuffle subcommand
+# ---------------------------------------------------------------------------
+
+def _add_shuffle_subparser(subparsers):
+    """Register the ``shuffle`` subcommand."""
+    p = subparsers.add_parser(
+        "shuffle",
+        help="Run shuffle benchmarks (hash, round-robin)",
+        description=(
+            "Shuffle benchmark runner.  Tests different partitioning strategies "
+            "across Spark, Comet JVM, and Comet Native shuffle implementations."
+        ),
+    )
+    p.add_argument(
+        "--benchmark", required=True,
+        choices=list(shuffle.BENCHMARKS),
+        help="Shuffle benchmark to run",
+    )
+    p.add_argument("--data", required=True, help="Path to input parquet data")
+    p.add_argument(
+        "--mode", required=True,
+        choices=["spark", "jvm", "native"],
+        help="Shuffle mode being tested",
+    )
+    p.add_argument(
+        "--partitions", type=int, default=200,
+        help="Number of shuffle partitions (default: 200)",
+    )
+    p.add_argument("--iterations", type=int, default=1, help="Number of iterations")
+    p.add_argument("--output", required=True, help="Directory for results JSON")
+    p.add_argument("--name", required=True, help="Prefix for result file")
+    _add_profiling_args(p)
+
+
+def _run_shuffle(args):
+    """Execute the shuffle suite."""
+    spark = create_session(
+        app_name=f"{args.name}-{args.benchmark}-{args.mode.upper()}",
+        spark_conf={},  # configs already set by spark-submit
+    )
+
+    profiler = _maybe_start_profiler(spark, args)
+
+    timings = shuffle.run_shuffle(
+        spark,
+        benchmark=args.benchmark,
+        data_path=args.data,
+        mode=args.mode,
+        num_partitions=args.partitions,
+        iterations=args.iterations,
+    )
+
+    results = shuffle.build_results(
+        spark,
+        benchmark=args.benchmark,
+        data_path=args.data,
+        mode=args.mode,
+        name=args.name,
+        timings=timings,
+    )
+
+    shuffle.write_results(results, args.output, args.name, args.benchmark)
+    _maybe_stop_profiler(profiler, args.output, args.name, args.benchmark)
+    spark.stop()
+
+
+# ---------------------------------------------------------------------------
+# Micro subcommand
+# ---------------------------------------------------------------------------
+
+def _add_micro_subparser(subparsers):
+    """Register the ``micro`` subcommand."""
+    p = subparsers.add_parser(
+        "micro",
+        help="Run expression-level microbenchmarks",
+        description=(
+            "Microbenchmark runner.  Generates a small dataset and times "
+            "individual SQL expressions."
+        ),
+    )
+    p.add_argument(
+        "--benchmark", required=True,
+        choices=list(micro.BENCHMARKS),
+        help="Microbenchmark to run",
+    )
+    p.add_argument(
+        "--rows", type=int, default=1024,
+        help="Number of rows for data generation (default: 1024)",
+    )
+    p.add_argument("--iterations", type=int, default=3, help="Number of iterations")
+    p.add_argument("--expression", help="Run a single expression by name")
+    p.add_argument("--output", required=True, help="Directory for results JSON")
+    p.add_argument("--name", required=True, help="Prefix for result file")
+    _add_profiling_args(p)
+
+
+def _run_micro(args):
+    """Execute the micro suite."""
+    spark = create_session(
+        app_name=f"{args.name}-{args.benchmark}",
+        spark_conf={},  # configs already set by spark-submit
+    )
+
+    profiler = _maybe_start_profiler(spark, args)
+
+    timings = micro.run_micro(
+        spark,
+        benchmark=args.benchmark,
+        num_rows=args.rows,
+        iterations=args.iterations,
+        expression=args.expression,
+    )
+
+    results = micro.build_results(
+        spark,
+        benchmark=args.benchmark,
+        name=args.name,
+        timings=timings,
+    )
+
+    micro.write_results(results, args.output, args.name, args.benchmark)
+    _maybe_stop_profiler(profiler, args.output, args.name, args.benchmark)
+    spark.stop()
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        prog="benchmark-runner",
+        description="Unified benchmark runner for Apache DataFusion Comet",
+    )
+    subparsers = parser.add_subparsers(dest="suite", required=True)
+    _add_tpc_subparser(subparsers)
+    _add_shuffle_subparser(subparsers)
+    _add_micro_subparser(subparsers)
+
+    args = parser.parse_args(argv)
+
+    if args.suite == "tpc":
+        _run_tpc(args)
+    elif args.suite == "shuffle":
+        _run_shuffle(args)
+    elif args.suite == "micro":
+        _run_micro(args)
+    else:
+        parser.error(f"Unknown suite: {args.suite}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/runner/config.py b/benchmarks/runner/config.py
new file mode 100644
index 0000000000..ff03d622d3
--- /dev/null
+++ b/benchmarks/runner/config.py
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Config loader for the unified benchmark runner.
+
+Reads key=value .conf files, merges them with precedence
+(profile < engine < CLI overrides), and splits into spark vs runner configs.
+
+The ``runner.*`` namespace controls the shell wrapper (JAR paths, env vars,
+result name) without colliding with Spark config keys.  Examples:
+  runner.jars=${COMET_JAR}
+  runner.env.TZ=UTC
+  runner.name=comet
+"""
+
+import os
+import re
+from typing import Dict, List, Tuple
+
+
+def load_conf_file(path: str) -> Dict[str, str]:
+    """Read a key=value .conf file.
+
+    - Blank lines and lines starting with ``#`` are skipped.
+    - ``${VAR}`` references are expanded from the environment.
+    - Values may optionally be quoted with single or double quotes.
+    """
+    conf: Dict[str, str] = {}
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            key, _, value = line.partition("=")
+            key = key.strip()
+            value = value.strip()
+            if not key or not _:
+                continue
+            # Strip optional quotes
+            if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
+                value = value[1:-1]
+            # Expand ${VAR} references from environment
+            value = re.sub(
+                r"\$\{(\w+)\}",
+                lambda m: os.environ.get(m.group(1), m.group(0)),
+                value,
+            )
+            conf[key] = value
+    return conf
+
+
+def merge_configs(
+    profile_path: str = None,
+    engine_path: str = None,
+    cli_overrides: List[str] = None,
+) -> Dict[str, str]:
+    """Merge configs with precedence: profile < engine < CLI overrides."""
+    merged: Dict[str, str] = {}
+    if profile_path:
+        merged.update(load_conf_file(profile_path))
+    if engine_path:
+        merged.update(load_conf_file(engine_path))
+    for override in cli_overrides or []:
+        key, _, value = override.partition("=")
+        key = key.strip()
+        value = value.strip()
+        if key and _:
+            # Expand ${VAR} in CLI overrides too
+            value = re.sub(
+                r"\$\{(\w+)\}",
+                lambda m: os.environ.get(m.group(1), m.group(0)),
+                value,
+            )
+            merged[key] = value
+    return merged
+
+
+def split_config(merged: Dict[str, str]) -> Tuple[Dict[str, str], Dict[str, str]]:
+    """Separate ``runner.*`` keys from ``spark.*`` (and other) keys.
+
+    Returns (spark_conf, runner_conf) where runner_conf has the
+    ``runner.`` prefix stripped.
+    """
+    spark_conf: Dict[str, str] = {}
+    runner_conf: Dict[str, str] = {}
+    for key, value in merged.items():
+        if key.startswith("runner."):
+            runner_conf[key[len("runner."):]] = value
+        else:
+            spark_conf[key] = value
+    return spark_conf, runner_conf
diff --git a/benchmarks/runner/profiling.py b/benchmarks/runner/profiling.py
new file mode 100644
index 0000000000..98aa56d493
--- /dev/null
+++ b/benchmarks/runner/profiling.py
@@ -0,0 +1,179 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Level 1 profiling hooks: JVM metrics via the Spark REST API.
+
+Polls ``/api/v1/applications/{appId}/executors`` at a configurable interval
+and records executor memory metrics as a time-series CSV alongside the
+benchmark results.
+
+Usage::
+
+    profiler = SparkMetricsProfiler(spark, interval_secs=2)
+    profiler.start()
+    # ... run benchmark ...
+    profiler.stop()
+    profiler.write_csv("/path/to/output/metrics.csv")
+"""
+
+import csv
+import threading
+import time
+from typing import Any, Dict, List, Optional
+
+from pyspark.sql import SparkSession
+
+try:
+    from urllib.request import urlopen
+    import json as _json
+
+    def _fetch_json(url: str) -> Any:
+        with urlopen(url, timeout=5) as resp:
+            return _json.loads(resp.read().decode())
+except ImportError:
+    _fetch_json = None  # type: ignore[assignment]
+
+
+# Metrics we extract per executor from the REST API response
+_EXECUTOR_METRICS = [
+    "memoryUsed",
+    "maxMemory",
+    "totalOnHeapStorageMemory",
+    "usedOnHeapStorageMemory",
+    "totalOffHeapStorageMemory",
+    "usedOffHeapStorageMemory",
+]
+
+# Metrics nested under peakMemoryMetrics (if available)
+_PEAK_MEMORY_METRICS = [
+    "JVMHeapMemory",
+    "JVMOffHeapMemory",
+    "OnHeapExecutionMemory",
+    "OffHeapExecutionMemory",
+    "OnHeapStorageMemory",
+    "OffHeapStorageMemory",
+    "OnHeapUnifiedMemory",
+    "OffHeapUnifiedMemory",
+    "ProcessTreeJVMRSSMemory",
+]
+
+
+class SparkMetricsProfiler:
+    """Periodically polls executor metrics from the Spark REST API."""
+
+    def __init__(
+        self,
+        spark: SparkSession,
+        interval_secs: float = 2.0,
+    ):
+        self._spark = spark
+        self._interval = interval_secs
+        self._samples: List[Dict[str, Any]] = []
+        self._stop_event = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+        self._start_time: float = 0.0
+
+    @property
+    def samples(self) -> List[Dict[str, Any]]:
+        """Return collected samples (each is a flat dict)."""
+        return list(self._samples)
+
+    def _ui_url(self) -> Optional[str]:
+        """Return the Spark UI base URL, or None if unavailable."""
+        url = self._spark.sparkContext.uiWebUrl
+        if url:
+            return url.rstrip("/")
+        return None
+
+    def _app_id(self) -> str:
+        return self._spark.sparkContext.applicationId
+
+    def _poll_once(self) -> None:
+        """Fetch executor metrics and append a timestamped sample."""
+        base = self._ui_url()
+        if base is None or _fetch_json is None:
+            return
+
+        url = f"{base}/api/v1/applications/{self._app_id()}/executors"
+        try:
+            executors = _fetch_json(url)
+        except Exception:
+            return
+
+        elapsed = time.time() - self._start_time
+        for exc in executors:
+            row: Dict[str, Any] = {
+                "elapsed_secs": round(elapsed, 2),
+                "executor_id": exc.get("id", ""),
+                "is_active": exc.get("isActive", True),
+            }
+            for key in _EXECUTOR_METRICS:
+                row[key] = exc.get(key, 0)
+
+            peak = exc.get("peakMemoryMetrics", {})
+            for key in _PEAK_MEMORY_METRICS:
+                row[f"peak_{key}"] = peak.get(key, 0)
+
+            self._samples.append(row)
+
+    def _run(self) -> None:
+        """Background polling loop."""
+        while not self._stop_event.is_set():
+            self._poll_once()
+            self._stop_event.wait(self._interval)
+
+    def start(self) -> None:
+        """Start background polling thread."""
+        if self._thread is not None:
+            return
+        self._start_time = time.time()
+        self._stop_event.clear()
+        self._thread = threading.Thread(
+            target=self._run, name="spark-metrics-profiler", daemon=True
+        )
+        self._thread.start()
+        print(
+            f"Profiler started (interval={self._interval}s, "
+            f"ui={self._ui_url()})"
+        )
+
+    def stop(self) -> None:
+        """Stop the polling thread and collect a final sample."""
+        if self._thread is None:
+            return
+        self._stop_event.set()
+        self._thread.join(timeout=self._interval + 2)
+        self._thread = None
+        # One last poll to capture final state
+        self._poll_once()
+        print(f"Profiler stopped ({len(self._samples)} samples collected)")
+
+    def write_csv(self, path: str) -> str:
+        """Write collected samples to a CSV file.  Returns the path."""
+        if not self._samples:
+            print("Profiler: no samples to write")
+            return path
+
+        fieldnames = list(self._samples[0].keys())
+        with open(path, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            for row in self._samples:
+                writer.writerow(row)
+        print(f"Profiler: wrote {len(self._samples)} samples to {path}")
+        return path
diff --git a/benchmarks/runner/spark_session.py b/benchmarks/runner/spark_session.py
new file mode 100644
index 0000000000..f2e21a464a
--- /dev/null
+++ b/benchmarks/runner/spark_session.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Thin wrapper around SparkSession.builder."""
+
+from typing import Dict
+
+from pyspark.sql import SparkSession
+
+
+def create_session(app_name: str, spark_conf: Dict[str, str]) -> SparkSession:
+    """Create (or retrieve) a SparkSession with the given config.
+
+    When launched via spark-submit the configs are already set; this just
+    picks up the existing session.
+    """
+    builder = SparkSession.builder.appName(app_name)
+    for key, value in spark_conf.items():
+        builder = builder.config(key, value)
+    return builder.getOrCreate()
diff --git a/benchmarks/suites/MICRO.md b/benchmarks/suites/MICRO.md
new file mode 100644
index 0000000000..41c5fa2bc0
--- /dev/null
+++ b/benchmarks/suites/MICRO.md
@@ -0,0 +1,108 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Microbenchmark Suite
+
+Runs expression-level microbenchmarks that generate a small in-memory dataset
+and time individual SQL expressions. Currently supports the string expression
+benchmark (ported from `CometStringExpressionBenchmark.scala`).
+
+## Arguments
+
+| Argument             | Required | Default | Description                                    |
+| -------------------- | -------- | ------- | ---------------------------------------------- |
+| `--benchmark`        | yes      |         | `string-expressions`                           |
+| `--rows`             | no       | `1024`  | Number of rows for data generation             |
+| `--iterations`       | no       | `3`     | Number of timed iterations per expression      |
+| `--expression`       | no       |         | Run a single expression by name                |
+| `--output`           | yes      |         | Directory for results JSON                     |
+| `--name`             | auto     |         | Result file prefix (auto-injected by `run.py`) |
+| `--profile`          | no       |         | Enable JVM metrics profiling                   |
+| `--profile-interval` | no       | `2.0`   | Profiling poll interval in seconds             |
+
+## Examples
+
+### String expressions with Comet
+
+```bash
+python benchmarks/run.py --engine comet --profile local \
+    -- micro --benchmark string-expressions --output . --iterations 3
+```
+
+### String expressions with vanilla Spark (baseline)
+
+```bash
+python benchmarks/run.py --engine spark --profile local \
+    -- micro --benchmark string-expressions --output . --iterations 3
+```
+
+### String expressions with Gluten
+
+```bash
+python benchmarks/run.py --engine gluten --profile local \
+    -- micro --benchmark string-expressions --output . --iterations 3
+```
+
+### Run a single expression
+
+```bash
+python benchmarks/run.py --engine comet --profile local \
+    -- micro --benchmark string-expressions --output . --expression ascii
+```
+
+### Compare results across engines
+
+```bash
+# Run each engine
+for engine in comet spark gluten; do
+    python benchmarks/run.py --engine $engine --profile local \
+        -- micro --benchmark string-expressions --output . --iterations 3
+done
+
+# Generate comparison chart
+python -m benchmarks.analysis.compare \
+    comet-string-expressions-*.json spark-string-expressions-*.json \
+    --labels Comet Spark --benchmark string-expressions
+```
+
+## Output Format
+
+Results are written as JSON with the filename `{name}-{benchmark}-{timestamp_millis}.json`:
+
+```json
+{
+    "engine": "datafusion-comet",
+    "benchmark": "string-expressions",
+    "spark_conf": { ... },
+    "ascii": [0.12, 0.10, 0.08],
+    "bit_length": [0.05, 0.04, 0.04],
+    "lower": [0.15, 0.11, 0.07],
+    ...
+}
+```
+
+Expression names are top-level keys, each mapping to a list of elapsed seconds
+per iteration. This format is directly compatible with `analysis/compare.py`.
+
+## Available Expressions (string-expressions)
+
+ascii, bit_length, btrim, chr, concat, concat_ws, contains, endswith, initcap,
+instr, length, like, lower, lpad, ltrim, octet_length, regexp_replace, repeat,
+replace, reverse, rlike, rpad, rtrim, space, startswith, substring, translate,
+trim, upper.
diff --git a/benchmarks/suites/SHUFFLE.md b/benchmarks/suites/SHUFFLE.md
new file mode 100644
index 0000000000..222fa4cc82
--- /dev/null
+++ b/benchmarks/suites/SHUFFLE.md
@@ -0,0 +1,132 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Shuffle Benchmark Suite
+
+Compares shuffle file sizes and performance across Spark, Comet JVM, and
+Comet Native shuffle implementations using hash or round-robin partitioning.
+
+## Arguments
+
+| Argument             | Required | Default | Description                            |
+| -------------------- | -------- | ------- | -------------------------------------- |
+| `--benchmark`        | yes      |         | `shuffle-hash` or `shuffle-roundrobin` |
+| `--data`             | yes      |         | Path to input Parquet data             |
+| `--mode`             | yes      |         | `spark`, `jvm`, or `native`            |
+| `--partitions`       | no       | `200`   | Number of shuffle partitions           |
+| `--iterations`       | no       | `1`     | Number of iterations                   |
+| `--output`           | yes      |         | Directory for results JSON             |
+| `--name`             | auto     |         | Result file prefix (auto-injected)     |
+| `--profile`          | no       |         | Enable JVM metrics profiling           |
+| `--profile-interval` | no       | `2.0`   | Profiling poll interval in seconds     |
+
+## Generating Test Data
+
+Generate a Parquet dataset with a wide schema (100 columns including deeply
+nested structs, arrays, and maps):
+
+```bash
+$SPARK_HOME/bin/spark-submit \
+    --master $SPARK_MASTER \
+    --executor-memory 16g \
+    benchmarks/generate_shuffle_data.py \
+    --output /tmp/shuffle-benchmark-data \
+    --rows 10000000 \
+    --partitions 200
+```
+
+> **Note**: The data generation script is a standalone PySpark job. It can be
+> run with any Spark installation — no engine JARs required.
+
+## Examples
+
+### Hash shuffle — Spark baseline
+
+```bash
+python benchmarks/run.py --engine spark-shuffle --profile local \
+    -- shuffle --benchmark shuffle-hash --data /tmp/shuffle-data \
+       --mode spark --output . --iterations 3
+```
+
+### Hash shuffle — Comet JVM
+
+```bash
+python benchmarks/run.py --engine comet-jvm-shuffle --profile local \
+    -- shuffle --benchmark shuffle-hash --data /tmp/shuffle-data \
+       --mode jvm --output . --iterations 3
+```
+
+### Hash shuffle — Comet Native
+
+```bash
+python benchmarks/run.py --engine comet-native-shuffle --profile local \
+    -- shuffle --benchmark shuffle-hash --data /tmp/shuffle-data \
+       --mode native --output . --iterations 3
+```
+
+### Round-robin shuffle
+
+```bash
+python benchmarks/run.py --engine comet-native-shuffle --profile local \
+    -- shuffle --benchmark shuffle-roundrobin --data /tmp/shuffle-data \
+       --mode native --output . --iterations 3
+```
+
+### Run all three modes back-to-back
+
+```bash
+for engine_mode in "spark-shuffle spark" "comet-jvm-shuffle jvm" "comet-native-shuffle native"; do
+    set -- $engine_mode
+    python benchmarks/run.py --engine "$1" --profile local \
+        -- shuffle --benchmark shuffle-hash --data /tmp/shuffle-data \
+           --mode "$2" --output . --iterations 3
+done
+```
+
+### With profiling
+
+```bash
+python benchmarks/run.py --engine comet-native-shuffle --profile local \
+    -- shuffle --benchmark shuffle-hash --data /tmp/shuffle-data \
+       --mode native --output . --iterations 3 --profile --profile-interval 1.0
+```
+
+## Output Format
+
+Results are written as JSON with the filename `{name}-{benchmark}-{timestamp_millis}.json`:
+
+```json
+{
+    "engine": "datafusion-comet",
+    "benchmark": "shuffle-hash",
+    "mode": "native",
+    "data_path": "/tmp/shuffle-data",
+    "spark_conf": { ... },
+    "shuffle-hash": [
+        {"duration_ms": 12345, "row_count": 10000000, "num_partitions": 200},
+        {"duration_ms": 11234, "row_count": 10000000, "num_partitions": 200},
+        {"duration_ms": 10987, "row_count": 10000000, "num_partitions": 200}
+    ]
+}
+```
+
+## Checking Results
+
+Open the Spark UI (default: http://localhost:4040) during each benchmark run
+to compare shuffle write sizes in the Stages tab.
diff --git a/benchmarks/suites/TPC.md b/benchmarks/suites/TPC.md
new file mode 100644
index 0000000000..7e7ff299b7
--- /dev/null
+++ b/benchmarks/suites/TPC.md
@@ -0,0 +1,139 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# TPC-H / TPC-DS Benchmark Suite
+
+Runs TPC-H (22 queries) or TPC-DS (99 queries) benchmarks against Parquet
+files or Iceberg tables.
+
+## Arguments
+
+| Argument             | Required | Default   | Description                                             |
+| -------------------- | -------- | --------- | ------------------------------------------------------- |
+| `--benchmark`        | yes      |           | `tpch` or `tpcds`                                       |
+| `--data`             | \*       |           | Path to Parquet data files                              |
+| `--catalog`          | \*       |           | Iceberg catalog name (mutually exclusive with `--data`) |
+| `--database`         | no       | `tpch`    | Database name (only with `--catalog`)                   |
+| `--format`           | no       | `parquet` | File format: parquet, csv, json (only with `--data`)    |
+| `--options`          | no       | `{}`      | Spark reader options as JSON string                     |
+| `--queries`          | yes      |           | Path to directory containing `q1.sql` ... `qN.sql`      |
+| `--iterations`       | no       | `1`       | Number of times to run all queries                      |
+| `--output`           | yes      |           | Directory for results JSON                              |
+| `--name`             | auto     |           | Result file prefix (auto-injected from engine config)   |
+| `--query`            | no       |           | Run a single query number (1-based)                     |
+| `--write`            | no       |           | Write query results as Parquet to this path             |
+| `--profile`          | no       |           | Enable JVM metrics profiling                            |
+| `--profile-interval` | no       | `2.0`     | Profiling poll interval in seconds                      |
+
+`*` Either `--data` or `--catalog` is required, but not both.
+
+## Examples
+
+### TPC-H with Comet (standalone cluster)
+
+```bash
+export SPARK_HOME=/opt/spark
+export SPARK_MASTER=spark://hostname:7077
+export COMET_JAR=/path/to/comet.jar
+export TPCH_DATA=/mnt/bigdata/tpch/sf100
+export TPCH_QUERIES=/mnt/bigdata/tpch/queries
+
+python benchmarks/run.py \
+    --engine comet --profile standalone-tpch --restart-cluster \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --iterations 1
+```
+
+### TPC-H with vanilla Spark (baseline)
+
+```bash
+python benchmarks/run.py \
+    --engine spark --profile standalone-tpch --restart-cluster \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --iterations 1
+```
+
+### TPC-H with Iceberg tables
+
+First, create Iceberg tables from Parquet data:
+
+```bash
+export ICEBERG_JAR=/path/to/iceberg-spark-runtime-3.5_2.12-1.8.1.jar
+export ICEBERG_WAREHOUSE=/mnt/bigdata/iceberg-warehouse
+
+$SPARK_HOME/bin/spark-submit \
+    --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 \
+    --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
+    --conf spark.sql.catalog.local.type=hadoop \
+    --conf spark.sql.catalog.local.warehouse=$ICEBERG_WAREHOUSE \
+    benchmarks/create-iceberg-tpch.py \
+    --parquet-path $TPCH_DATA --catalog local --database tpch
+```
+
+Then run the benchmark with Comet's native Iceberg scanning:
+
+```bash
+python benchmarks/run.py \
+    --engine comet-iceberg --profile standalone-tpch \
+    --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
+    --conf spark.sql.catalog.local.type=hadoop \
+    --conf spark.sql.catalog.local.warehouse=$ICEBERG_WAREHOUSE \
+    --conf spark.sql.defaultCatalog=local \
+    --restart-cluster \
+    -- tpc --benchmark tpch --catalog local --database tpch \
+       --queries $TPCH_QUERIES --output . --iterations 1
+```
+
+### Run a single query
+
+```bash
+python benchmarks/run.py --engine comet --profile local \
+    -- tpc --benchmark tpch --data $TPCH_DATA --queries $TPCH_QUERIES \
+       --output . --query 1
+```
+
+## Output Format
+
+Results are written as JSON with the filename `{name}-{benchmark}-{timestamp_millis}.json`:
+
+```json
+{
+    "engine": "datafusion-comet",
+    "benchmark": "tpch",
+    "query_path": "/path/to/queries",
+    "spark_conf": { ... },
+    "data_path": "/path/to/data",
+    "1": [12.34],
+    "2": [5.67],
+    ...
+}
+```
+
+Query keys are integers (serialised as strings by `json.dumps`). Each value
+is a list of elapsed seconds per iteration. This format is compatible with
+`analysis/compare.py` for chart generation.
+
+## Comparing Results
+
+```bash
+python -m benchmarks.analysis.compare \
+    comet-tpch-*.json spark-tpch-*.json \
+    --labels Comet Spark --benchmark tpch \
+    --title "TPC-H SF100" --output-dir ./charts
+```
diff --git a/benchmarks/suites/__init__.py b/benchmarks/suites/__init__.py
new file mode 100644
index 0000000000..0ccbeeeafb
--- /dev/null
+++ b/benchmarks/suites/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmarks/suites/micro.py b/benchmarks/suites/micro.py
new file mode 100644
index 0000000000..58c7a22283
--- /dev/null
+++ b/benchmarks/suites/micro.py
@@ -0,0 +1,172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Microbenchmark suite.
+
+Ports expression-level benchmarks (e.g. CometStringExpressionBenchmark) to the
+unified runner.  Each benchmark generates a small dataset, runs SQL expressions
+in a tight loop, and records per-iteration wall-clock times.
+"""
+
+import json
+import os
+import shutil
+import tempfile
+import time
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from pyspark.sql import SparkSession
+
+
+# ---------------------------------------------------------------------------
+# String expression benchmark
+# ---------------------------------------------------------------------------
+
+STRING_EXPRESSIONS: List[tuple] = [
+    ("ascii", "select ascii(c1) from parquetV1Table"),
+    ("bit_length", "select bit_length(c1) from parquetV1Table"),
+    ("btrim", "select btrim(c1) from parquetV1Table"),
+    ("chr", "select chr(c1) from parquetV1Table"),
+    ("concat", "select concat(c1, c1) from parquetV1Table"),
+    ("concat_ws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
+    ("contains", "select contains(c1, '123') from parquetV1Table"),
+    ("endswith", "select endswith(c1, '9') from parquetV1Table"),
+    ("initcap", "select initCap(c1) from parquetV1Table"),
+    ("instr", "select instr(c1, '123') from parquetV1Table"),
+    ("length", "select length(c1) from parquetV1Table"),
+    ("like", "select c1 like '%123%' from parquetV1Table"),
+    ("lower", "select lower(c1) from parquetV1Table"),
+    ("lpad", "select lpad(c1, 150, 'x') from parquetV1Table"),
+    ("ltrim", "select ltrim(c1) from parquetV1Table"),
+    ("octet_length", "select octet_length(c1) from parquetV1Table"),
+    ("regexp_replace", "select regexp_replace(c1, '[0-9]', 'X') from parquetV1Table"),
+    ("repeat", "select repeat(c1, 3) from parquetV1Table"),
+    ("replace", "select replace(c1, '123', 'ab') from parquetV1Table"),
+    ("reverse", "select reverse(c1) from parquetV1Table"),
+    ("rlike", "select c1 rlike '[0-9]+' from parquetV1Table"),
+    ("rpad", "select rpad(c1, 150, 'x') from parquetV1Table"),
+    ("rtrim", "select rtrim(c1) from parquetV1Table"),
+    ("space", "select space(2) from parquetV1Table"),
+    ("startswith", "select startswith(c1, '1') from parquetV1Table"),
+    ("substring", "select substring(c1, 1, 100) from parquetV1Table"),
+    ("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"),
+    ("trim", "select trim(c1) from parquetV1Table"),
+    ("upper", "select upper(c1) from parquetV1Table"),
+]
+
+BENCHMARKS = {
+    "string-expressions": "String expression microbenchmarks (29 expressions)",
+}
+
+
+def prepare_string_table(
+    spark: SparkSession, num_rows: int, temp_dir: str
+) -> None:
+    """Generate a string column table and register it as ``parquetV1Table``."""
+    path = os.path.join(temp_dir, "string_data")
+    spark.range(num_rows).selectExpr(
+        "REPEAT(CAST(id AS STRING), 10) AS c1"
+    ).write.mode("overwrite").option("compression", "snappy").parquet(path)
+    spark.read.parquet(path).createOrReplaceTempView("parquetV1Table")
+    print(f"Generated {num_rows} rows in {path}")
+
+
+def run_micro(
+    spark: SparkSession,
+    benchmark: str,
+    num_rows: int = 1024,
+    iterations: int = 3,
+    expression: Optional[str] = None,
+) -> Dict[str, List[float]]:
+    """Run a microbenchmark and return ``{expr_name: [elapsed_secs, ...]}``."""
+    if benchmark != "string-expressions":
+        raise ValueError(
+            f"Unknown micro benchmark: {benchmark}. "
+            f"Available: {', '.join(BENCHMARKS)}"
+        )
+
+    temp_dir = tempfile.mkdtemp(prefix="comet-micro-")
+    try:
+        prepare_string_table(spark, num_rows, temp_dir)
+
+        expressions = STRING_EXPRESSIONS
+        if expression is not None:
+            expressions = [(n, sql) for n, sql in expressions if n == expression]
+            if not expressions:
+                valid = [n for n, _ in STRING_EXPRESSIONS]
+                raise ValueError(
+                    f"Unknown expression: {expression}. Valid: {', '.join(valid)}"
+                )
+
+        timings: Dict[str, List[float]] = {}
+
+        for expr_name, sql in expressions:
+            print(f"\n{'=' * 60}")
+            print(f"Expression: {expr_name}")
+            print(f"{'=' * 60}")
+
+            for iteration in range(iterations):
+                spark.sparkContext.setJobDescription(
+                    f"{benchmark} {expr_name} iter{iteration + 1}"
+                )
+                start = time.time()
+                spark.sql(sql).foreach(lambda _: None)
+                elapsed = time.time() - start
+                print(f"  Iteration {iteration + 1}: {elapsed:.4f}s")
+                timings.setdefault(expr_name, []).append(elapsed)
+
+        return timings
+    finally:
+        shutil.rmtree(temp_dir, ignore_errors=True)
+
+
+def build_results(
+    spark: SparkSession,
+    benchmark: str,
+    name: str,
+    timings: Dict[str, List[float]],
+) -> Dict:
+    """Assemble the result dict for micro benchmarks."""
+    conf_dict = {k: v for k, v in spark.sparkContext.getConf().getAll()}
+
+    results: Dict = {
+        "engine": "datafusion-comet",
+        "benchmark": benchmark,
+        "spark_conf": conf_dict,
+    }
+    for expr_name, elapsed_list in timings.items():
+        results[expr_name] = elapsed_list
+
+    return results
+
+
+def write_results(
+    results: Dict,
+    output_dir: str,
+    name: str,
+    benchmark: str,
+) -> str:
+    """Write JSON results file.  Returns the path written."""
+    result_str = json.dumps(results, indent=4)
+    current_time_millis = int(datetime.now().timestamp() * 1000)
+    results_path = f"{output_dir}/{name}-{benchmark}-{current_time_millis}.json"
+    print(f"\nWriting results to {results_path}")
+    with open(results_path, "w") as f:
+        f.write(result_str)
+    return results_path
diff --git a/benchmarks/suites/shuffle.py b/benchmarks/suites/shuffle.py
new file mode 100644
index 0000000000..adabecf2be
--- /dev/null
+++ b/benchmarks/suites/shuffle.py
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Shuffle benchmark suite.
+
+Tests different partitioning strategies (hash, round-robin) across Spark,
+Comet JVM, and Comet Native shuffle implementations.
+"""
+
+import json
+import time
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from pyspark.sql import DataFrame, SparkSession
+
+
+BENCHMARKS = {
+    "shuffle-hash": "Shuffle all columns using hash partitioning on group_key",
+    "shuffle-roundrobin": "Shuffle all columns using round-robin partitioning",
+}
+
+
+def _repartition(
+    df: DataFrame, benchmark: str, num_partitions: int
+) -> DataFrame:
+    """Apply the partitioning strategy for the given benchmark."""
+    if benchmark == "shuffle-hash":
+        return df.repartition(num_partitions, "group_key")
+    elif benchmark == "shuffle-roundrobin":
+        return df.repartition(num_partitions)
+    else:
+        raise ValueError(
+            f"Unknown shuffle benchmark: {benchmark}. "
+            f"Available: {', '.join(BENCHMARKS)}"
+        )
+
+
+def run_shuffle(
+    spark: SparkSession,
+    benchmark: str,
+    data_path: str,
+    mode: str,
+    num_partitions: int = 200,
+    iterations: int = 1,
+) -> Dict[str, List[Dict[str, Any]]]:
+    """Run a shuffle benchmark and return per-iteration results.
+
+    Returns ``{benchmark_name: [{duration_ms, row_count, num_partitions}, ...]}``
+    so the structure parallels TPC output (query -> list of timings).
+    """
+    if benchmark not in BENCHMARKS:
+        raise ValueError(
+            f"Unknown shuffle benchmark: {benchmark}. "
+            f"Available: {', '.join(BENCHMARKS)}"
+        )
+
+    results: List[Dict[str, Any]] = []
+
+    # Read input data once
+    df = spark.read.parquet(data_path)
+    row_count = df.count()
+
+    for iteration in range(iterations):
+        print(f"\n{'=' * 60}")
+        print(f"Shuffle benchmark: {benchmark} | Mode: {mode.upper()}")
+        print(f"Iteration {iteration + 1} of {iterations}")
+        print(f"{'=' * 60}")
+        print(f"Data path: {data_path}")
+        print(f"Rows: {row_count:,} | Partitions: {num_partitions}")
+
+        # Print relevant Spark configuration
+        conf = spark.sparkContext.getConf()
+        print(f"Shuffle manager: {conf.get('spark.shuffle.manager', 'default')}")
+        print(f"Comet enabled: {conf.get('spark.comet.enabled', 'false')}")
+        print(
+            f"Comet shuffle enabled: "
+            f"{conf.get('spark.comet.exec.shuffle.enabled', 'false')}"
+        )
+        print(
+            f"Comet shuffle mode: "
+            f"{conf.get('spark.comet.exec.shuffle.mode', 'not set')}"
+        )
+
+        spark.catalog.clearCache()
+        spark.sparkContext.setJobDescription(f"{benchmark} iter{iteration + 1}")
+
+        start_time = time.time()
+
+        repartitioned = _repartition(df, benchmark, num_partitions)
+        output_path = f"/tmp/shuffle-benchmark-output-{mode}-{benchmark}"
+        repartitioned.write.mode("overwrite").parquet(output_path)
+        print(f"Wrote repartitioned data to: {output_path}")
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        print(f"Duration: {duration_ms:,} ms")
+
+        results.append({
+            "duration_ms": duration_ms,
+            "row_count": row_count,
+            "num_partitions": num_partitions,
+        })
+
+    return {benchmark: results}
+
+
+def build_results(
+    spark: SparkSession,
+    benchmark: str,
+    data_path: str,
+    mode: str,
+    name: str,
+    timings: Dict[str, List[Dict[str, Any]]],
+) -> Dict:
+    """Assemble the result dict for shuffle benchmarks."""
+    conf_dict = {k: v for k, v in spark.sparkContext.getConf().getAll()}
+
+    return {
+        "engine": "datafusion-comet",
+        "benchmark": benchmark,
+        "mode": mode,
+        "data_path": data_path,
+        "spark_conf": conf_dict,
+        **timings,
+    }
+
+
+def write_results(
+    results: Dict,
+    output_dir: str,
+    name: str,
+    benchmark: str,
+) -> str:
+    """Write JSON results file.  Returns the path written."""
+    result_str = json.dumps(results, indent=4)
+    current_time_millis = int(datetime.now().timestamp() * 1000)
+    results_path = f"{output_dir}/{name}-{benchmark}-{current_time_millis}.json"
+    print(f"\nWriting results to {results_path}")
+    with open(results_path, "w") as f:
+        f.write(result_str)
+    return results_path
diff --git a/benchmarks/suites/tpc.py b/benchmarks/suites/tpc.py
new file mode 100644
index 0000000000..19c020a135
--- /dev/null
+++ b/benchmarks/suites/tpc.py
@@ -0,0 +1,215 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""TPC-H / TPC-DS benchmark suite."""
+
+import json
+import time
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from pyspark.sql import SparkSession
+
+
+# Table definitions per benchmark
+TPCH_TABLES = [
+    "customer", "lineitem", "nation", "orders",
+    "part", "partsupp", "region", "supplier",
+]
+
+TPCDS_TABLES = [
+    "call_center", "catalog_page", "catalog_returns", "catalog_sales",
+    "customer", "customer_address", "customer_demographics", "date_dim",
+    "time_dim", "household_demographics", "income_band", "inventory",
+    "item", "promotion", "reason", "ship_mode", "store", "store_returns",
+    "store_sales", "warehouse", "web_page", "web_returns", "web_sales",
+    "web_site",
+]
+
+BENCHMARK_META = {
+    "tpch":  {"num_queries": 22, "tables": TPCH_TABLES},
+    "tpcds": {"num_queries": 99, "tables": TPCDS_TABLES},
+}
+
+
+def dedup_columns(df):
+    """Rename duplicate column aliases: a, a, b, b -> a, a_1, b, b_1."""
+    counts: Dict[str, int] = {}
+    new_cols: List[str] = []
+    for c in df.columns:
+        if c not in counts:
+            counts[c] = 0
+            new_cols.append(c)
+        else:
+            counts[c] += 1
+            new_cols.append(f"{c}_{counts[c]}")
+    return df.toDF(*new_cols)
+
+
+def register_tables(
+    spark: SparkSession,
+    benchmark: str,
+    data_path: Optional[str],
+    catalog: Optional[str],
+    database: str,
+    file_format: str,
+    reader_options: Optional[Dict[str, str]],
+) -> bool:
+    """Register TPC tables as temp views.
+
+    Returns True when using Iceberg catalog, False for file-based tables.
+    """
+    if benchmark not in BENCHMARK_META:
+        raise ValueError(f"Invalid benchmark: {benchmark}")
+    tables = BENCHMARK_META[benchmark]["tables"]
+    using_iceberg = catalog is not None
+
+    for table in tables:
+        if using_iceberg:
+            source = f"{catalog}.{database}.{table}"
+            print(f"Registering table {table} from {source}")
+            df = spark.table(source)
+        else:
+            source = f"{data_path}/{table}.{file_format}"
+            print(f"Registering table {table} from {source}")
+            df = spark.read.format(file_format).options(**(reader_options or {})).load(source)
+        df.createOrReplaceTempView(table)
+
+    return using_iceberg
+
+
+def run_queries(
+    spark: SparkSession,
+    benchmark: str,
+    query_path: str,
+    iterations: int,
+    query_num: Optional[int] = None,
+    write_path: Optional[str] = None,
+) -> Dict[int, List[float]]:
+    """Execute TPC queries and return {query_num: [elapsed_secs_per_iter]}."""
+    meta = BENCHMARK_META[benchmark]
+    num_queries = meta["num_queries"]
+    results: Dict[int, List[float]] = {}
+
+    for iteration in range(iterations):
+        print(f"\n{'=' * 60}")
+        print(f"Starting iteration {iteration + 1} of {iterations}")
+        print(f"{'=' * 60}")
+        iter_start_time = time.time()
+
+        if query_num is not None:
+            if query_num < 1 or query_num > num_queries:
+                raise ValueError(
+                    f"Query number {query_num} out of range. "
+                    f"Valid: 1-{num_queries} for {benchmark}"
+                )
+            queries_to_run = [query_num]
+        else:
+            queries_to_run = range(1, num_queries + 1)
+
+        for query in queries_to_run:
+            spark.sparkContext.setJobDescription(f"{benchmark} q{query}")
+            path = f"{query_path}/q{query}.sql"
+            print(f"\nRunning query {query} from {path}")
+
+            with open(path, "r") as f:
+                text = f.read()
+                queries_sql = text.split(";")
+
+                start_time = time.time()
+                for sql in queries_sql:
+                    sql = sql.strip().replace("create view", "create temp view")
+                    if len(sql) > 0:
+                        print(f"Executing: {sql[:100]}...")
+                        df = spark.sql(sql)
+                        df.explain("formatted")
+
+                        if write_path is not None:
+                            if len(df.columns) > 0:
+                                output_path = f"{write_path}/q{query}"
+                                deduped = dedup_columns(df)
+                                deduped.orderBy(*deduped.columns).coalesce(1).write.mode(
+                                    "overwrite"
+                                ).parquet(output_path)
+                                print(f"Results written to {output_path}")
+                        else:
+                            rows = df.collect()
+                            print(f"Query {query} returned {len(rows)} rows")
+
+                end_time = time.time()
+                elapsed = end_time - start_time
+                print(f"Query {query} took {elapsed:.2f} seconds")
+
+                results.setdefault(query, []).append(elapsed)
+
+        iter_end_time = time.time()
+        print(
+            f"\nIteration {iteration + 1} took "
+            f"{iter_end_time - iter_start_time:.2f} seconds"
+        )
+
+    return results
+
+
+def build_results(
+    spark: SparkSession,
+    benchmark: str,
+    query_path: str,
+    data_path: Optional[str],
+    catalog: Optional[str],
+    database: str,
+    using_iceberg: bool,
+    name: str,
+    timings: Dict[int, List[float]],
+) -> Dict:
+    """Assemble the result dict with the same schema as tpcbench.py."""
+    conf_dict = {k: v for k, v in spark.sparkContext.getConf().getAll()}
+
+    results: Dict = {
+        "engine": "datafusion-comet",
+        "benchmark": benchmark,
+        "query_path": query_path,
+        "spark_conf": conf_dict,
+    }
+    if using_iceberg:
+        results["catalog"] = catalog
+        results["database"] = database
+    else:
+        results["data_path"] = data_path
+
+    # Integer query keys — json.dumps serialises them as strings, matching
+    # the format that generate-comparison.py expects (str(query)).
+    for query, elapsed_list in timings.items():
+        results[query] = elapsed_list
+
+    return results
+
+
+def write_results(
+    results: Dict,
+    output_dir: str,
+    name: str,
+    benchmark: str,
+) -> str:
+    """Write JSON results file.  Returns the path written."""
+    result_str = json.dumps(results, indent=4)
+    current_time_millis = int(datetime.now().timestamp() * 1000)
+    results_path = f"{output_dir}/{name}-{benchmark}-{current_time_millis}.json"
+    print(f"\nWriting results to {results_path}")
+    with open(results_path, "w") as f:
+        f.write(result_str)
+    return results_path
diff --git a/dev/benchmarks/.gitignore b/dev/benchmarks/.gitignore
deleted file mode 100644
index 477aaef0c3..0000000000
--- a/dev/benchmarks/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.json
-*.png
\ No newline at end of file
diff --git a/dev/benchmarks/README.md b/dev/benchmarks/README.md
deleted file mode 100644
index b3ea674199..0000000000
--- a/dev/benchmarks/README.md
+++ /dev/null
@@ -1,151 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-# Comet Benchmarking Scripts
-
-This directory contains scripts used for generating benchmark results that are published in this repository and in
-the Comet documentation.
-
-For full instructions on running these benchmarks on an EC2 instance, see the [Comet Benchmarking on EC2 Guide].
-
-[Comet Benchmarking on EC2 Guide]: https://datafusion.apache.org/comet/contributor-guide/benchmarking_aws_ec2.html
-
-## Example usage
-
-Set Spark environment variables:
-
-```shell
-export SPARK_HOME=/opt/spark-3.5.3-bin-hadoop3/
-export SPARK_MASTER=spark://yourhostname:7077
-```
-
-Set path to queries and data:
-
-```shell
-export TPCH_QUERIES=/mnt/bigdata/tpch/queries/
-export TPCH_DATA=/mnt/bigdata/tpch/sf100/
-```
-
-Run Spark benchmark:
-
-```shell
-export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
-sudo ./drop-caches.sh
-./spark-tpch.sh
-```
-
-Run Comet benchmark:
-
-```shell
-export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
-export COMET_JAR=/opt/comet/comet-spark-spark3.5_2.12-0.10.0.jar
-sudo ./drop-caches.sh
-./comet-tpch.sh
-```
-
-Run Gluten benchmark:
-
-```shell
-export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
-export GLUTEN_JAR=/opt/gluten/gluten-velox-bundle-spark3.5_2.12-linux_amd64-1.4.0.jar
-sudo ./drop-caches.sh
-./gluten-tpch.sh
-```
-
-Generating charts:
-
-```shell
-python3 generate-comparison.py --benchmark tpch --labels "Spark 3.5.3" "Comet 0.9.0" "Gluten 1.4.0" --title "TPC-H @ 100 GB (single executor, 8 cores, local Parquet files)" spark-tpch-1752338506381.json comet-tpch-1752337818039.json gluten-tpch-1752337474344.json
-```
-
-## Iceberg Benchmarking
-
-Comet includes native Iceberg support via iceberg-rust integration. This enables benchmarking TPC-H queries
-against Iceberg tables with native scan acceleration.
-
-### Prerequisites
-
-Download the Iceberg Spark runtime JAR (required for running the benchmark):
-
-```shell
-wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.8.1/iceberg-spark-runtime-3.5_2.12-1.8.1.jar
-export ICEBERG_JAR=/path/to/iceberg-spark-runtime-3.5_2.12-1.8.1.jar
-```
-
-Note: Table creation uses `--packages` which auto-downloads the dependency.
-
-### Create Iceberg TPC-H tables
-
-Convert existing Parquet TPC-H data to Iceberg format:
-
-```shell
-export ICEBERG_WAREHOUSE=/mnt/bigdata/iceberg-warehouse
-export ICEBERG_CATALOG=${ICEBERG_CATALOG:-local}
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.executor.memory=16g \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}=org.apache.iceberg.spark.SparkCatalog \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}.type=hadoop \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}.warehouse=$ICEBERG_WAREHOUSE \
-    create-iceberg-tpch.py \
-    --parquet-path $TPCH_DATA \
-    --catalog $ICEBERG_CATALOG \
-    --database tpch
-```
-
-### Run Iceberg benchmark
-
-```shell
-export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
-export COMET_JAR=/opt/comet/comet-spark-spark3.5_2.12-0.10.0.jar
-export ICEBERG_JAR=/path/to/iceberg-spark-runtime-3.5_2.12-1.8.1.jar
-export ICEBERG_WAREHOUSE=/mnt/bigdata/iceberg-warehouse
-export TPCH_QUERIES=/mnt/bigdata/tpch/queries/
-sudo ./drop-caches.sh
-./comet-tpch-iceberg.sh
-```
-
-The benchmark uses `spark.comet.scan.icebergNative.enabled=true` to enable Comet's native iceberg-rust
-integration. Verify native scanning is active by checking for `CometIcebergNativeScanExec` in the
-physical plan output.
-
-### Iceberg-specific options
-
-| Environment Variable | Default    | Description                         |
-| -------------------- | ---------- | ----------------------------------- |
-| `ICEBERG_CATALOG`    | `local`    | Iceberg catalog name                |
-| `ICEBERG_DATABASE`   | `tpch`     | Database containing TPC-H tables    |
-| `ICEBERG_WAREHOUSE`  | (required) | Path to Iceberg warehouse directory |
-
-### Comparing Parquet vs Iceberg performance
-
-Run both benchmarks and compare:
-
-```shell
-python3 generate-comparison.py --benchmark tpch \
-    --labels "Comet (Parquet)" "Comet (Iceberg)" \
-    --title "TPC-H @ 100 GB: Parquet vs Iceberg" \
-    comet-tpch-*.json comet-iceberg-tpch-*.json
-```
diff --git a/dev/benchmarks/blaze-tpcds.sh b/dev/benchmarks/blaze-tpcds.sh
deleted file mode 100755
index 90a4a48468..0000000000
--- a/dev/benchmarks/blaze-tpcds.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --jars $BLAZE_JAR \
-    --driver-class-path $BLAZE_JAR \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=2 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=16 \
-    --conf spark.executor.memory=16g \
-    --conf spark.executor.memoryOverhead=16g \
-    --conf spark.memory.offHeap.enabled=false \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.driver.extraClassPath=$BLAZE_JAR \
-    --conf spark.executor.extraClassPath=$BLAZE_JAR \
-    --conf spark.sql.extensions=org.apache.spark.sql.blaze.BlazeSparkSessionExtension \
-    --conf spark.shuffle.manager=org.apache.spark.sql.execution.blaze.shuffle.BlazeShuffleManager \
-    --conf spark.blaze.enable=true \
-    --conf spark.blaze.forceShuffledHashJoin=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name blaze \
-    --benchmark tpcds \
-    --data $TPCDS_DATA \
-    --queries $TPCDS_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/blaze-tpch.sh b/dev/benchmarks/blaze-tpch.sh
deleted file mode 100755
index 2c6878737d..0000000000
--- a/dev/benchmarks/blaze-tpch.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --jars $BLAZE_JAR \
-    --driver-class-path $BLAZE_JAR \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.executor.memory=16g \
-    --conf spark.executor.memoryOverhead=16g \
-    --conf spark.memory.offHeap.enabled=false \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.driver.extraClassPath=$BLAZE_JAR \
-    --conf spark.executor.extraClassPath=$BLAZE_JAR \
-    --conf spark.sql.extensions=org.apache.spark.sql.blaze.BlazeSparkSessionExtension \
-    --conf spark.shuffle.manager=org.apache.spark.sql.execution.blaze.shuffle.BlazeShuffleManager \
-    --conf spark.blaze.enable=true \
-    --conf spark.blaze.forceShuffledHashJoin=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name blaze \
-    --benchmark tpch \
-    --data $TPCH_DATA \
-    --queries $TPCH_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/comet-tpcds.sh b/dev/benchmarks/comet-tpcds.sh
deleted file mode 100755
index b55b27188c..0000000000
--- a/dev/benchmarks/comet-tpcds.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --jars $COMET_JAR \
-    --driver-class-path $COMET_JAR \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=2 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=16 \
-    --conf spark.executor.memory=16g \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.driver.extraClassPath=$COMET_JAR \
-    --conf spark.executor.extraClassPath=$COMET_JAR \
-    --conf spark.plugins=org.apache.spark.CometPlugin \
-    --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-    --conf spark.comet.scan.impl=native_datafusion \
-    --conf spark.comet.expression.Cast.allowIncompatible=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name comet \
-    --benchmark tpcds \
-    --data $TPCDS_DATA \
-    --queries $TPCDS_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/comet-tpch-iceberg.sh b/dev/benchmarks/comet-tpch-iceberg.sh
deleted file mode 100755
index 7907125c82..0000000000
--- a/dev/benchmarks/comet-tpch-iceberg.sh
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-# TPC-H benchmark using Iceberg tables with Comet's native iceberg-rust integration.
-#
-# Required environment variables:
-#   SPARK_HOME      - Path to Spark installation
-#   SPARK_MASTER    - Spark master URL (e.g., spark://localhost:7077)
-#   COMET_JAR       - Path to Comet JAR
-#   ICEBERG_JAR     - Path to Iceberg Spark runtime JAR
-#   ICEBERG_WAREHOUSE - Path to Iceberg warehouse directory
-#   TPCH_QUERIES    - Path to TPC-H query files
-#
-# Optional:
-#   ICEBERG_CATALOG - Catalog name (default: local)
-#   ICEBERG_DATABASE - Database name (default: tpch)
-#
-# Setup (run once to create Iceberg tables from Parquet):
-#   $SPARK_HOME/bin/spark-submit \
-#       --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 \
-#       --conf spark.sql.catalog.local=org.apache.iceberg.spark.SparkCatalog \
-#       --conf spark.sql.catalog.local.type=hadoop \
-#       --conf spark.sql.catalog.local.warehouse=$ICEBERG_WAREHOUSE \
-#       create-iceberg-tpch.py \
-#       --parquet-path $TPCH_DATA \
-#       --catalog local \
-#       --database tpch
-
-set -e
-
-# Defaults
-ICEBERG_CATALOG=${ICEBERG_CATALOG:-local}
-ICEBERG_DATABASE=${ICEBERG_DATABASE:-tpch}
-
-# Validate required variables
-if [ -z "$SPARK_HOME" ]; then
-    echo "Error: SPARK_HOME is not set"
-    exit 1
-fi
-if [ -z "$COMET_JAR" ]; then
-    echo "Error: COMET_JAR is not set"
-    exit 1
-fi
-if [ -z "$ICEBERG_JAR" ]; then
-    echo "Error: ICEBERG_JAR is not set"
-    echo "Download from: https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.8.1/"
-    exit 1
-fi
-if [ -z "$ICEBERG_WAREHOUSE" ]; then
-    echo "Error: ICEBERG_WAREHOUSE is not set"
-    exit 1
-fi
-if [ -z "$TPCH_QUERIES" ]; then
-    echo "Error: TPCH_QUERIES is not set"
-    exit 1
-fi
-
-$SPARK_HOME/sbin/stop-master.sh 2>/dev/null || true
-$SPARK_HOME/sbin/stop-worker.sh 2>/dev/null || true
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --jars $COMET_JAR,$ICEBERG_JAR \
-    --driver-class-path $COMET_JAR:$ICEBERG_JAR \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.executor.memory=16g \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.driver.extraClassPath=$COMET_JAR:$ICEBERG_JAR \
-    --conf spark.executor.extraClassPath=$COMET_JAR:$ICEBERG_JAR \
-    --conf spark.plugins=org.apache.spark.CometPlugin \
-    --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-    --conf spark.comet.exec.replaceSortMergeJoin=true \
-    --conf spark.comet.expression.Cast.allowIncompatible=true \
-    --conf spark.comet.enabled=true \
-    --conf spark.comet.exec.enabled=true \
-    --conf spark.comet.scan.icebergNative.enabled=true \
-    --conf spark.comet.explainFallback.enabled=true \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}=org.apache.iceberg.spark.SparkCatalog \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}.type=hadoop \
-    --conf spark.sql.catalog.${ICEBERG_CATALOG}.warehouse=$ICEBERG_WAREHOUSE \
-    --conf spark.sql.defaultCatalog=${ICEBERG_CATALOG} \
-    tpcbench.py \
-    --name comet-iceberg \
-    --benchmark tpch \
-    --catalog $ICEBERG_CATALOG \
-    --database $ICEBERG_DATABASE \
-    --queries $TPCH_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/comet-tpch.sh b/dev/benchmarks/comet-tpch.sh
deleted file mode 100755
index a748a02319..0000000000
--- a/dev/benchmarks/comet-tpch.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --jars $COMET_JAR \
-    --driver-class-path $COMET_JAR \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.executor.memory=16g \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.driver.extraClassPath=$COMET_JAR \
-    --conf spark.executor.extraClassPath=$COMET_JAR \
-    --conf spark.plugins=org.apache.spark.CometPlugin \
-    --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
-    --conf spark.comet.scan.impl=native_datafusion \
-    --conf spark.comet.exec.replaceSortMergeJoin=true \
-    --conf spark.comet.expression.Cast.allowIncompatible=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name comet \
-    --benchmark tpch \
-    --data $TPCH_DATA \
-    --queries $TPCH_QUERIES \
-    --output . \
-    --iterations 1 \
-    --format parquet
diff --git a/dev/benchmarks/generate-comparison.py b/dev/benchmarks/generate-comparison.py
deleted file mode 100644
index eb57cc1e45..0000000000
--- a/dev/benchmarks/generate-comparison.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import argparse
-import json
-import matplotlib.pyplot as plt
-import numpy as np
-
-def geomean(data):
-    return np.prod(data) ** (1 / len(data))
-
-def generate_query_rel_speedup_chart(baseline, comparison, label1: str, label2: str, benchmark: str, title: str):
-    results = []
-    for query in range(1, query_count(benchmark)+1):
-        if query == 999:
-            continue
-        a = np.median(np.array(baseline[str(query)]))
-        b = np.median(np.array(comparison[str(query)]))
-        if a > b:
-            speedup = a/b-1
-        else:
-            speedup = -(1/(a/b)-1)
-        results.append(("q" + str(query), round(speedup*100, 0)))
-
-    results = sorted(results, key=lambda x: -x[1])
-
-    queries, speedups = zip(*results)
-
-    # Create figure and axis
-    if benchmark == "tpch":
-        fig, ax = plt.subplots(figsize=(10, 6))
-    else:
-        fig, ax = plt.subplots(figsize=(35, 10))
-
-    # Create bar chart
-    bars = ax.bar(queries, speedups, color='skyblue')
-
-    # Add text annotations
-    for bar, speedup in zip(bars, speedups):
-        yval = bar.get_height()
-        if yval >= 0:
-            ax.text(bar.get_x() + bar.get_width() / 2.0, min(800, yval+5), f'{yval:.0f}%', va='bottom', ha='center', fontsize=8,
-                    color='blue', rotation=90)
-        else:
-            ax.text(bar.get_x() + bar.get_width() / 2.0, yval, f'{yval:.0f}%', va='top', ha='center', fontsize=8,
-                    color='blue', rotation=90)
-
-    # Add title and labels
-    ax.set_title(label2 + " speedup over " + label1 + " (" + title + ")")
-    ax.set_ylabel('Speedup Percentage (100% speedup = 2x faster)')
-    ax.set_xlabel('Query')
-
-    # Customize the y-axis to handle both positive and negative values better
-    ax.axhline(0, color='black', linewidth=0.8)
-    min_value = (min(speedups) // 100) * 100
-    max_value = ((max(speedups) // 100) + 1) * 100 + 50
-    if benchmark == "tpch":
-        ax.set_ylim(min_value, max_value)
-    else:
-        # TODO improve this
-        ax.set_ylim(-250, 300)
-
-    # Show grid for better readability
-    ax.yaxis.grid(True)
-
-    # Save the plot as an image file
-    plt.savefig(f'{benchmark}_queries_speedup_rel.png', format='png')
-
-def generate_query_abs_speedup_chart(baseline, comparison, label1: str, label2: str, benchmark: str, title: str):
-    results = []
-    for query in range(1, query_count(benchmark)+1):
-        if query == 999:
-            continue
-        a = np.median(np.array(baseline[str(query)]))
-        b = np.median(np.array(comparison[str(query)]))
-        speedup = a-b
-        results.append(("q" + str(query), round(speedup, 1)))
-
-    results = sorted(results, key=lambda x: -x[1])
-
-    queries, speedups = zip(*results)
-
-    # Create figure and axis
-    if benchmark == "tpch":
-        fig, ax = plt.subplots(figsize=(10, 6))
-    else:
-        fig, ax = plt.subplots(figsize=(35, 10))
-
-    # Create bar chart
-    bars = ax.bar(queries, speedups, color='skyblue')
-
-    # Add text annotations
-    for bar, speedup in zip(bars, speedups):
-        yval = bar.get_height()
-        if yval >= 0:
-            ax.text(bar.get_x() + bar.get_width() / 2.0, min(800, yval+5), f'{yval:.1f}', va='bottom', ha='center', fontsize=8,
-                    color='blue', rotation=90)
-        else:
-            ax.text(bar.get_x() + bar.get_width() / 2.0, yval, f'{yval:.1f}', va='top', ha='center', fontsize=8,
-                    color='blue', rotation=90)
-
-    # Add title and labels
-    ax.set_title(label2 + " speedup over " + label1 + " (" + title + ")")
-    ax.set_ylabel('Speedup (in seconds)')
-    ax.set_xlabel('Query')
-
-    # Customize the y-axis to handle both positive and negative values better
-    ax.axhline(0, color='black', linewidth=0.8)
-    min_value = min(speedups) * 2 - 20
-    max_value = max(speedups) * 1.5
-    ax.set_ylim(min_value, max_value)
-
-    # Show grid for better readability
-    ax.yaxis.grid(True)
-
-    # Save the plot as an image file
-    plt.savefig(f'{benchmark}_queries_speedup_abs.png', format='png')
-
-def generate_query_comparison_chart(results, labels, benchmark: str, title: str):
-    queries = []
-    benches = []
-    for _ in results:
-        benches.append([])
-    for query in range(1, query_count(benchmark)+1):
-        if query == 999:
-            continue
-        queries.append("q" + str(query))
-        for i in range(0, len(results)):
-            benches[i].append(np.median(np.array(results[i][str(query)])))
-
-    # Define the width of the bars
-    bar_width = 0.3
-
-    # Define the positions of the bars on the x-axis
-    index = np.arange(len(queries)) * 1.5
-
-    # Create a bar chart
-    if benchmark == "tpch":
-        fig, ax = plt.subplots(figsize=(15, 6))
-    else:
-        fig, ax = plt.subplots(figsize=(35, 6))
-
-    for i in range(0, len(results)):
-        bar = ax.bar(index + i * bar_width, benches[i], bar_width, label=labels[i])
-
-    # Add labels, title, and legend
-    ax.set_title(title)
-    ax.set_xlabel('Queries')
-    ax.set_ylabel('Query Time (seconds)')
-    ax.set_xticks(index + bar_width / 2)
-    ax.set_xticklabels(queries)
-    ax.legend()
-
-    # Save the plot as an image file
-    plt.savefig(f'{benchmark}_queries_compare.png', format='png')
-
-def generate_summary(results, labels, benchmark: str, title: str):
-    timings = []
-    for _ in results:
-        timings.append(0)
-
-    num_queries = query_count(benchmark)
-    for query in range(1, num_queries + 1):
-        if query == 999:
-            continue
-        for i in range(0, len(results)):
-            timings[i] += np.median(np.array(results[i][str(query)]))
-
-    # Create figure and axis
-    fig, ax = plt.subplots()
-    fig.set_size_inches(10, 6)
-
-    # Add title and labels
-    ax.set_title(title)
-    ax.set_ylabel(f'Time in seconds to run all {num_queries} {benchmark} queries (lower is better)')
-
-    times = [round(x,0) for x in timings]
-
-    # Create bar chart
-    bars = ax.bar(labels, times, color='skyblue', width=0.8)
-
-    # Add text annotations
-    for bar in bars:
-        yval = bar.get_height()
-        ax.text(bar.get_x() + bar.get_width() / 2.0, yval, f'{yval}', va='bottom')  # va: vertical alignment
-
-    plt.savefig(f'{benchmark}_allqueries.png', format='png')
-
-def query_count(benchmark: str):
-    if benchmark == "tpch":
-        return 22
-    elif benchmark == "tpcds":
-        return 99
-    else:
-        raise "invalid benchmark name"
-
-def main(files, labels, benchmark: str, title: str):
-    results = []
-    for filename in files:
-        with open(filename) as f:
-            results.append(json.load(f))
-    generate_summary(results, labels, benchmark, title)
-    generate_query_comparison_chart(results, labels, benchmark, title)
-    if len(files) == 2:
-        generate_query_abs_speedup_chart(results[0], results[1], labels[0], labels[1], benchmark, title)
-        generate_query_rel_speedup_chart(results[0], results[1], labels[0], labels[1], benchmark, title)
-
-if __name__ == '__main__':
-    argparse = argparse.ArgumentParser(description='Generate comparison')
-    argparse.add_argument('filenames', nargs='+', type=str, help='JSON result files')
-    argparse.add_argument('--labels', nargs='+', type=str, help='Labels')
-    argparse.add_argument('--benchmark', type=str, help='Benchmark name (tpch or tpcds)')
-    argparse.add_argument('--title', type=str, help='Chart title')
-    args = argparse.parse_args()
-    main(args.filenames, args.labels, args.benchmark, args.title)
diff --git a/dev/benchmarks/gluten-tpcds.sh b/dev/benchmarks/gluten-tpcds.sh
deleted file mode 100755
index 7c475c79c0..0000000000
--- a/dev/benchmarks/gluten-tpcds.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-export TZ=UTC
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=2 \
-    --conf spark.executor.memory=16G \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=16 \
-    --conf spark.eventLog.enabled=true \
-    --jars $GLUTEN_JAR \
-    --conf spark.plugins=org.apache.gluten.GlutenPlugin \
-    --conf spark.driver.extraClassPath=${GLUTEN_JAR} \
-    --conf spark.executor.extraClassPath=${GLUTEN_JAR} \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.gluten.sql.columnar.forceShuffledHashJoin=true \
-    --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
-    --conf spark.sql.session.timeZone=UTC \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name gluten \
-    --benchmark tpcds \
-    --data $TPCDS_DATA \
-    --queries $TPCDS_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/gluten-tpch.sh b/dev/benchmarks/gluten-tpch.sh
deleted file mode 100755
index 46c3ed7527..0000000000
--- a/dev/benchmarks/gluten-tpch.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-export TZ=UTC
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.memory=16G \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.eventLog.enabled=true \
-    --jars $GLUTEN_JAR \
-    --conf spark.plugins=org.apache.gluten.GlutenPlugin \
-    --conf spark.driver.extraClassPath=${GLUTEN_JAR} \
-    --conf spark.executor.extraClassPath=${GLUTEN_JAR} \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.gluten.sql.columnar.forceShuffledHashJoin=true \
-    --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \
-    --conf spark.sql.session.timeZone=UTC \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name gluten \
-    --benchmark tpch \
-    --data $TPCH_DATA \
-    --queries $TPCH_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/spark-tpcds.sh b/dev/benchmarks/spark-tpcds.sh
deleted file mode 100755
index dad079ba23..0000000000
--- a/dev/benchmarks/spark-tpcds.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=2 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=16 \
-    --conf spark.executor.memory=16g \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name spark \
-    --benchmark tpcds \
-    --data $TPCDS_DATA \
-    --queries $TPCDS_QUERIES \
-    --output . \
-    --iterations 1
diff --git a/dev/benchmarks/spark-tpch.sh b/dev/benchmarks/spark-tpch.sh
deleted file mode 100755
index ae359f049f..0000000000
--- a/dev/benchmarks/spark-tpch.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-$SPARK_HOME/sbin/stop-master.sh
-$SPARK_HOME/sbin/stop-worker.sh
-
-$SPARK_HOME/sbin/start-master.sh
-$SPARK_HOME/sbin/start-worker.sh $SPARK_MASTER
-
-$SPARK_HOME/bin/spark-submit \
-    --master $SPARK_MASTER \
-    --conf spark.driver.memory=8G \
-    --conf spark.executor.instances=1 \
-    --conf spark.executor.cores=8 \
-    --conf spark.cores.max=8 \
-    --conf spark.executor.memory=16g \
-    --conf spark.memory.offHeap.enabled=true \
-    --conf spark.memory.offHeap.size=16g \
-    --conf spark.eventLog.enabled=true \
-    --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \
-    --conf spark.hadoop.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain \
-    tpcbench.py \
-    --name spark \
-    --benchmark tpch \
-    --data $TPCH_DATA \
-    --queries $TPCH_QUERIES \
-    --output . \
-    --iterations 1 \
-    --format parquet
diff --git a/dev/benchmarks/tpcbench.py b/dev/benchmarks/tpcbench.py
deleted file mode 100644
index 400ccd175a..0000000000
--- a/dev/benchmarks/tpcbench.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-TPC-H / TPC-DS benchmark runner.
-
-Supports two data sources:
-  - Files: use --data with --format (parquet, csv, json) and optional --options
-  - Iceberg tables: use --catalog and --database to specify the catalog location
-"""
-
-import argparse
-from datetime import datetime
-import json
-from pyspark.sql import SparkSession
-import time
-from typing import Dict
-
-
-def dedup_columns(df):
-    """Rename duplicate column aliases: a, a, b, b -> a, a_1, b, b_1"""
-    counts = {}
-    new_cols = []
-    for c in df.columns:
-        if c not in counts:
-            counts[c] = 0
-            new_cols.append(c)
-        else:
-            counts[c] += 1
-            new_cols.append(f"{c}_{counts[c]}")
-    return df.toDF(*new_cols)
-
-
-def main(
-    benchmark: str,
-    data_path: str,
-    catalog: str,
-    database: str,
-    query_path: str,
-    iterations: int,
-    output: str,
-    name: str,
-    format: str,
-    query_num: int = None,
-    write_path: str = None,
-    options: Dict[str, str] = None
-):
-    if options is None:
-        options = {}
-
-    spark = SparkSession.builder \
-        .appName(f"{name} benchmark derived from {benchmark}") \
-        .getOrCreate()
-
-    # Define tables for each benchmark
-    if benchmark == "tpch":
-        num_queries = 22
-        table_names = [
-            "customer", "lineitem", "nation", "orders",
-            "part", "partsupp", "region", "supplier"
-        ]
-    elif benchmark == "tpcds":
-        num_queries = 99
-        table_names = [
-            "call_center", "catalog_page", "catalog_returns", "catalog_sales",
-            "customer", "customer_address", "customer_demographics", "date_dim",
-            "time_dim", "household_demographics", "income_band", "inventory",
-            "item", "promotion", "reason", "ship_mode", "store", "store_returns",
-            "store_sales", "warehouse", "web_page", "web_returns", "web_sales",
-            "web_site"
-        ]
-    else:
-        raise ValueError(f"Invalid benchmark: {benchmark}")
-
-    # Register tables from either files or Iceberg catalog
-    using_iceberg = catalog is not None
-    for table in table_names:
-        if using_iceberg:
-            source = f"{catalog}.{database}.{table}"
-            print(f"Registering table {table} from {source}")
-            df = spark.table(source)
-        else:
-            source = f"{data_path}/{table}.{format}"
-            print(f"Registering table {table} from {source}")
-            df = spark.read.format(format).options(**options).load(source)
-        df.createOrReplaceTempView(table)
-
-    conf_dict = {k: v for k, v in spark.sparkContext.getConf().getAll()}
-
-    results = {
-        'engine': 'datafusion-comet',
-        'benchmark': benchmark,
-        'query_path': query_path,
-        'spark_conf': conf_dict,
-    }
-    if using_iceberg:
-        results['catalog'] = catalog
-        results['database'] = database
-    else:
-        results['data_path'] = data_path
-
-    for iteration in range(iterations):
-        print(f"\n{'='*60}")
-        print(f"Starting iteration {iteration + 1} of {iterations}")
-        print(f"{'='*60}")
-        iter_start_time = time.time()
-
-        # Determine which queries to run
-        if query_num is not None:
-            if query_num < 1 or query_num > num_queries:
-                raise ValueError(
-                    f"Query number {query_num} out of range. "
-                    f"Valid: 1-{num_queries} for {benchmark}"
-                )
-            queries_to_run = [query_num]
-        else:
-            queries_to_run = range(1, num_queries + 1)
-
-        for query in queries_to_run:
-            spark.sparkContext.setJobDescription(f"{benchmark} q{query}")
-
-            path = f"{query_path}/q{query}.sql"
-            print(f"\nRunning query {query} from {path}")
-
-            with open(path, "r") as f:
-                text = f.read()
-                queries = text.split(";")
-
-                start_time = time.time()
-                for sql in queries:
-                    sql = sql.strip().replace("create view", "create temp view")
-                    if len(sql) > 0:
-                        print(f"Executing: {sql[:100]}...")
-                        df = spark.sql(sql)
-                        df.explain("formatted")
-
-                        if write_path is not None:
-                            if len(df.columns) > 0:
-                                output_path = f"{write_path}/q{query}"
-                                deduped = dedup_columns(df)
-                                deduped.orderBy(*deduped.columns).coalesce(1).write.mode("overwrite").parquet(output_path)
-                                print(f"Results written to {output_path}")
-                        else:
-                            rows = df.collect()
-                            print(f"Query {query} returned {len(rows)} rows")
-
-                end_time = time.time()
-                elapsed = end_time - start_time
-                print(f"Query {query} took {elapsed:.2f} seconds")
-
-                query_timings = results.setdefault(query, [])
-                query_timings.append(elapsed)
-
-        iter_end_time = time.time()
-        print(f"\nIteration {iteration + 1} took {iter_end_time - iter_start_time:.2f} seconds")
-
-    # Write results
-    result_str = json.dumps(results, indent=4)
-    current_time_millis = int(datetime.now().timestamp() * 1000)
-    results_path = f"{output}/{name}-{benchmark}-{current_time_millis}.json"
-    print(f"\nWriting results to {results_path}")
-    with open(results_path, "w") as f:
-        f.write(result_str)
-
-    spark.stop()
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="TPC-H/TPC-DS benchmark runner for files or Iceberg tables"
-    )
-    parser.add_argument(
-        "--benchmark", required=True,
-        help="Benchmark to run (tpch or tpcds)"
-    )
-
-    # Data source - mutually exclusive: either file path or Iceberg catalog
-    source_group = parser.add_mutually_exclusive_group(required=True)
-    source_group.add_argument(
-        "--data",
-        help="Path to data files"
-    )
-    source_group.add_argument(
-        "--catalog",
-        help="Iceberg catalog name"
-    )
-
-    # Options for file-based reading
-    parser.add_argument(
-        "--format", default="parquet",
-        help="Input file format: parquet, csv, json (only used with --data)"
-    )
-    parser.add_argument(
-        "--options", type=json.loads, default={},
-        help='Spark reader options as JSON string, e.g., \'{"header": "true"}\' (only used with --data)'
-    )
-
-    # Options for Iceberg
-    parser.add_argument(
-        "--database", default="tpch",
-        help="Database containing TPC tables (only used with --catalog)"
-    )
-
-    parser.add_argument(
-        "--queries", required=True,
-        help="Path to query SQL files"
-    )
-    parser.add_argument(
-        "--iterations", type=int, default=1,
-        help="Number of iterations"
-    )
-    parser.add_argument(
-        "--output", required=True,
-        help="Path to write results JSON"
-    )
-    parser.add_argument(
-        "--name", required=True,
-        help="Prefix for result file"
-    )
-    parser.add_argument(
-        "--query", type=int,
-        help="Specific query number (1-based). If omitted, run all."
-    )
-    parser.add_argument(
-        "--write",
-        help="Path to save query results as Parquet"
-    )
-    args = parser.parse_args()
-
-    main(
-        args.benchmark,
-        args.data,
-        args.catalog,
-        args.database,
-        args.queries,
-        args.iterations,
-        args.output,
-        args.name,
-        args.format,
-        args.query,
-        args.write,
-        args.options
-    )
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 4ea10c1dff..ee80a51eeb 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -28,3 +28,5 @@ spark/src/test/resources/tpch-extended/q*.sql
 spark/src/test/resources/test-data/*.csv
 spark/src/test/resources/test-data/*.ndjson
 spark/inspections/CometTPC*results.txt
+benchmarks/queries/tpch/q*.sql
+benchmarks/queries/tpcds/q*.sql
diff --git a/pom.xml b/pom.xml
index 1b33fc4757..6ece20552d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1058,6 +1058,7 @@ under the License.
             <exclude>dev/deploy-file</exclude>
             <exclude>**/test/resources/**</exclude>
             <exclude>**/benchmarks/*.txt</exclude>
+            <exclude>benchmarks/queries/**/*.sql</exclude>
             <exclude>**/inspections/*.txt</exclude>
             <exclude>tpcds-kit/**</exclude>
             <exclude>tpcds-sf-1/**</exclude>