From 7f01f10f3086cf0d820c120745ce1b986c58925d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 14:27:03 -0700 Subject: [PATCH 1/3] fixg --- .../src/test/scala/org/apache/comet/CometCastSuite.scala | 1 + .../sql/benchmark/CometStringExpressionBenchmark.scala | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index a7bd6febf8..6f58811f80 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DataType, DataTypes, DecimalType, IntegerType, LongType, ShortType, StringType, StructField, StructType} +import org.apache.comet.CometSparkSessionExtensions.isSpark40Plus import org.apache.comet.expressions.{CometCast, CometEvalMode} import org.apache.comet.rules.CometScanTypeChecker import org.apache.comet.serde.Compatible diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 41eabb8513..1ba7c2d70a 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -71,7 +71,16 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase { StringExprConfig("chr", "select chr(c1) from parquetV1Table"), StringExprConfig("initCap", "select initCap(c1) from parquetV1Table"), StringExprConfig("trim", "select trim(c1) from parquetV1Table"), + StringExprConfig("btrim", "select btrim(c1) from parquetV1Table"), + StringExprConfig("ltrim", "select ltrim(c1) from parquetV1Table"), + StringExprConfig("rtrim", "select rtrim(c1) from parquetV1Table"), + StringExprConfig("lpad", "select lpad(c1, 120, 'x') from parquetV1Table"), + StringExprConfig("rpad", "select rpad(c1, 120, 'x') from parquetV1Table"), + StringExprConfig("concat", "select concat(c1, c1) from parquetV1Table"), StringExprConfig("concatws", "select concat_ws(' ', c1, c1) from parquetV1Table"), + StringExprConfig("contains", "select contains(c1, '123') from parquetV1Table"), + StringExprConfig("startsWith", "select startswith(c1, '123') from parquetV1Table"), + StringExprConfig("endsWith", "select endswith(c1, '123') from parquetV1Table"), StringExprConfig("length", "select length(c1) from parquetV1Table"), StringExprConfig("repeat", "select repeat(c1, 3) from parquetV1Table"), StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"), From d97afe06fe2522680c453a267c5c9ff2b95cf051 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 17:10:06 -0700 Subject: [PATCH 2/3] create input data once --- .../CometStringExpressionBenchmark.scala | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 1ba7c2d70a..0e08aea81d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -44,22 +44,6 @@ case class StringExprConfig( // spotless:on object CometStringExpressionBenchmark extends CometBenchmarkBase { - /** - * Generic method to run a string expression benchmark with the given configuration. - */ - def runStringExprBenchmark(config: StringExprConfig, values: Int): Unit = { - withTempPath { dir => - withTempTable("parquetV1Table") { - prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl")) - - val extraConfigs = - Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") ++ config.extraCometConfigs - - runExpressionBenchmark(config.name, values, config.query, extraConfigs) - } - } - } - // Configuration for all string expression benchmarks private val stringExpressions = List( StringExprConfig("Substring", "select substring(c1, 1, 100) from parquetV1Table"), @@ -90,11 +74,24 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase { StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table")) override def runCometBenchmark(mainArgs: Array[String]): Unit = { - val values = 1024 * 1024; + val values = 1024 * 1024 + + runBenchmarkWithTable("String expressions", values) { v => + withTempPath { dir => + withTempTable("parquetV1Table") { + prepareTable( + dir, + spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1 FROM $tbl")) + + val extraConfigs = Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") - stringExpressions.foreach { config => - runBenchmarkWithTable(config.name, values) { v => - runStringExprBenchmark(config, v) + stringExpressions.foreach { config => + val allConfigs = extraConfigs ++ config.extraCometConfigs + runBenchmark(config.name) { + runExpressionBenchmark(config.name, v, config.query, allConfigs) + } + } + } } } } From 3df25fe0bc8d81832cc90cec19a9ec72ef68489b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 22 Dec 2025 17:25:56 -0700 Subject: [PATCH 3/3] smaller data --- .../spark/sql/benchmark/CometStringExpressionBenchmark.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 0e08aea81d..8f0082bf91 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -74,9 +74,7 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase { StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table")) override def runCometBenchmark(mainArgs: Array[String]): Unit = { - val values = 1024 * 1024 - - runBenchmarkWithTable("String expressions", values) { v => + runBenchmarkWithTable("String expressions", 1024) { v => withTempPath { dir => withTempTable("parquetV1Table") { prepareTable(