Skip to content

Commit f25579a

Browse files
committed
Fixes esql class cast bug in STATS at planning level (#137511)
1 parent 15168d0 commit f25579a

File tree

9 files changed

+815
-19
lines changed

9 files changed

+815
-19
lines changed

docs/changelog/137511.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137511
2+
summary: Fixes esql class cast bug in STATS at planning level
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/testFixtures/src/main/resources/inlinestats.csv-spec

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,3 +1001,20 @@ FROM hosts METADATA _index
10011001
description:text | host:keyword | ip0:ip | _index:keyword | x:ip | ip1:long | host_group:text | card:keyword
10021002
alpha db server |alpha |127.0.0.1 |hosts |127.0.0.1|1 |DB servers |eth0
10031003
;
1004+
1005+
fixClassCastBugWithSeveralCounts
1006+
required_capability: inline_stats
1007+
required_capability: fix_stats_classcast_exception
1008+
1009+
FROM sample_data, sample_data_str
1010+
| EVAL one_ip = client_ip::ip
1011+
| INLINE STATS count1=count(client_ip::ip), count2=count(one_ip)
1012+
| KEEP count1, count2
1013+
| LIMIT 3
1014+
;
1015+
1016+
count1:long |count2:long
1017+
14 |14
1018+
14 |14
1019+
14 |14
1020+
;

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3344,3 +3344,103 @@ VALUES(color):keyword | color:keyword
33443344
yellow | yellow
33453345
// end::mv-group-values-expand-result[]
33463346
;
3347+
<<<<<<< HEAD
3348+
=======
3349+
3350+
filterOrdinalValues
3351+
required_capability: fix_filter_ordinals
3352+
from employees
3353+
| MV_EXPAND job_positions
3354+
| WHERE job_positions != "Purchase Manager"
3355+
| STATS employees=count(*) BY job_positions
3356+
| SORT job_positions
3357+
| LIMIT 1000
3358+
;
3359+
3360+
employees:long | job_positions:keyword
3361+
18 | Accountant
3362+
13 | Architect
3363+
11 | Business Analyst
3364+
13 | Data Scientist
3365+
10 | Head Human Resources
3366+
15 | Internship
3367+
14 | Junior Developer
3368+
20 | Principal Support Engineer
3369+
13 | Python Developer
3370+
15 | Reporting Analyst
3371+
20 | Senior Python Developer
3372+
15 | Senior Team Lead
3373+
11 | Support Engineer
3374+
15 | Tech Lead
3375+
;
3376+
3377+
fixClassCastBugWithCountDistinct
3378+
required_capability: fix_stats_classcast_exception
3379+
3380+
from airports
3381+
| rename scalerank AS x
3382+
| stats a = count(x), b = count(x) + count(x), c = count_distinct(x)
3383+
;
3384+
3385+
a:long | b:long | c:long
3386+
891 | 1782 | 8
3387+
;
3388+
3389+
fixClassCastBugWithValuesFn
3390+
required_capability: fix_stats_classcast_exception
3391+
3392+
ROW x = [1,2,3]
3393+
| STATS a = MV_COUNT(VALUES(x)), b = VALUES(x), c = SUM(x)
3394+
;
3395+
3396+
a:integer | b:integer | c:long
3397+
3 | [1, 2, 3] | 6
3398+
;
3399+
3400+
fixClassCastBugWithSeveralCountDistincts
3401+
required_capability: fix_stats_classcast_exception
3402+
3403+
ROW x = 1
3404+
| STATS a = 2*COUNT_DISTINCT(x), b = COUNT_DISTINCT(x), c = MAX(x)
3405+
;
3406+
3407+
a:long | b:long | c:integer
3408+
2 | 1 | 1
3409+
;
3410+
3411+
fixClassCastBugWithMedianPlusCountDistinct
3412+
required_capability: fix_stats_classcast_exception
3413+
3414+
FROM sample_data_ts_long
3415+
| EVAL sym1 = 0, sym5 = 1
3416+
| STATS sym2 = median(sym5) + 0, sym3 = median(sym5), sym4 = count_distinct(sym1)
3417+
;
3418+
3419+
sym2:double |sym3:double | sym4:long
3420+
1.0 | 1.0 | 1
3421+
;
3422+
3423+
fixClassCastBugWithFoldableLiterals
3424+
required_capability: fix_stats_classcast_exception
3425+
3426+
from airports
3427+
| rename scalerank AS x
3428+
| stats a = count(x), b = count(x) + count(x), c = count_distinct(x, 10), d = count_distinct(x, 10 + 1 - 1)
3429+
;
3430+
3431+
a:long | b:long | c:long | d:long
3432+
891 | 1782 | 8 | 8
3433+
;
3434+
3435+
fixClassCastBugWithSurrogateExpressions
3436+
required_capability: fix_stats_classcast_exception
3437+
3438+
from airports
3439+
| rename scalerank AS x
3440+
| stats a = median(x), b = percentile(x, 50), c = count_distinct(x)
3441+
;
3442+
3443+
a:double | b:double | c:long
3444+
6.0 | 6.0 | 8
3445+
;
3446+
>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,13 @@ public enum Cap {
12821282
*/
12831283
STATS_WITH_FILTERED_SURROGATE_FIXED,
12841284

1285+
/**
1286+
* Fix for ClassCastException in STATS
1287+
* https://github.com/elastic/elasticsearch/issues/133992
1288+
* https://github.com/elastic/elasticsearch/issues/136598
1289+
*/
1290+
FIX_STATS_CLASSCAST_EXCEPTION,
1291+
12851292
/**
12861293
* {@link org.elasticsearch.xpack.esql.optimizer.rules.logical.ReplaceAliasingEvalWithProject} did not fully account for shadowing.
12871294
* https://github.com/elastic/elasticsearch/issues/137019.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineEvals;
1818
import org.elasticsearch.xpack.esql.optimizer.rules.logical.CombineProjections;
1919
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ConstantFolding;
20+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggs;
2021
import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter;
2122
import org.elasticsearch.xpack.esql.optimizer.rules.logical.FoldNull;
2223
import org.elasticsearch.xpack.esql.optimizer.rules.logical.LiteralsOnTheRight;
@@ -171,6 +172,14 @@ protected static Batch<LogicalPlan> operators(boolean local) {
171172
new SplitInWithFoldableValue(),
172173
new PropagateEvalFoldables(),
173174
new ConstantFolding(),
175+
/* Then deduplicate aggregations
176+
We need this after the constant folding
177+
because we could have expressions like
178+
count_distinct(_, 9 + 1)
179+
count_distinct(_, 10)
180+
which are semantically identical
181+
*/
182+
new DeduplicateAggs(),
174183
new PartiallyFoldCase(),
175184
// boolean
176185
new BooleanSimplification(),
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
9+
10+
/**
11+
* This rule handles duplicate aggregate functions to avoid duplicate compute
12+
* stats a = min(x), b = min(x), c = count(*), d = count() by g
13+
* becomes
14+
* stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g
15+
*/
16+
public final class DeduplicateAggs extends ReplaceAggregateAggExpressionWithEval {
17+
18+
public DeduplicateAggs() {
19+
super(false);
20+
}
21+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,17 @@
4343
* becomes
4444
* stats a = min(x), c = count(*) by g | eval b = a, d = c | keep a, b, c, d, g
4545
*/
46-
public final class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule<Aggregate> {
46+
public class ReplaceAggregateAggExpressionWithEval extends OptimizerRules.OptimizerRule<Aggregate> {
47+
private final boolean replaceNestedExpressions;
48+
49+
public ReplaceAggregateAggExpressionWithEval(boolean replaceNestedExpressions) {
50+
super(OptimizerRules.TransformDirection.UP);
51+
this.replaceNestedExpressions = replaceNestedExpressions;
52+
}
53+
4754
public ReplaceAggregateAggExpressionWithEval() {
4855
super(OptimizerRules.TransformDirection.UP);
56+
this.replaceNestedExpressions = true;
4957
}
5058

5159
@Override
@@ -88,7 +96,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
8896
// common case - handle duplicates
8997
if (child instanceof AggregateFunction af) {
9098
// canonical representation, with resolved aliases
91-
AggregateFunction canonical = (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e));
99+
AggregateFunction canonical = getCannonical(af, aliases);
92100

93101
Alias found = rootAggs.get(canonical);
94102
// aggregate is new
@@ -106,14 +114,15 @@ protected LogicalPlan rule(Aggregate aggregate) {
106114
}
107115
// nested expression over aggregate function or groups
108116
// replace them with reference and move the expression into a follow-up eval
109-
else {
117+
else if (replaceNestedExpressions) {
110118
changed.set(true);
111119
Expression aggExpression = child.transformUp(AggregateFunction.class, af -> {
112-
AggregateFunction canonical = (AggregateFunction) af.canonical();
120+
// canonical representation, with resolved aliases
121+
AggregateFunction canonical = getCannonical(af, aliases);
113122
Alias alias = rootAggs.get(canonical);
114123
if (alias == null) {
115-
// create synthetic alias ove the found agg function
116-
alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), canonical, null, true);
124+
// create synthetic alias over the found agg function
125+
alias = new Alias(af.source(), syntheticName(canonical, child, counter[0]++), af.canonical(), null, true);
117126
// and remember it to remove duplicates
118127
rootAggs.put(canonical, alias);
119128
// add it to the list of aggregates and continue
@@ -132,6 +141,9 @@ protected LogicalPlan rule(Aggregate aggregate) {
132141
Alias alias = as.replaceChild(aggExpression);
133142
newEvals.add(alias);
134143
newProjections.add(alias.toAttribute());
144+
} else {
145+
newAggs.add(agg);
146+
newProjections.add(agg.toAttribute());
135147
}
136148
}
137149
// not an alias (e.g. grouping field)
@@ -155,6 +167,10 @@ protected LogicalPlan rule(Aggregate aggregate) {
155167
return plan;
156168
}
157169

170+
private static AggregateFunction getCannonical(AggregateFunction af, AttributeMap<Expression> aliases) {
171+
return (AggregateFunction) af.canonical().transformUp(e -> aliases.resolve(e, e));
172+
}
173+
158174
private static String syntheticName(Expression expression, Expression af, int counter) {
159175
return TemporaryNameUtils.temporaryName(expression, af, counter);
160176
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,13 @@
178178
import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE;
179179
import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LT;
180180
import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.LTE;
181+
<<<<<<< HEAD
182+
=======
183+
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aggFieldName;
184+
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.DeduplicateAggsTests.aliased;
185+
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.DOWN;
186+
import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP;
187+
>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
181188
import static org.hamcrest.Matchers.allOf;
182189
import static org.hamcrest.Matchers.anyOf;
183190
import static org.hamcrest.Matchers.contains;
@@ -473,6 +480,7 @@ public void testAggsWithOverridingInputAndGrouping() throws Exception {
473480
}
474481

475482
/**
483+
<<<<<<< HEAD
476484
* Project[[s{r}#4 AS d, s{r}#4, last_name{f}#21, first_name{f}#18]]
477485
* \_Limit[1000[INTEGER]]
478486
* \_Aggregate[[last_name{f}#21, first_name{f}#18],[SUM(salary{f}#22) AS s, last_name{f}#21, first_name{f}#18]]
@@ -495,6 +503,9 @@ public void testCombineProjectionWithDuplicateAggregation() {
495503
}
496504

497505
/**
506+
=======
507+
* <pre>{@code
508+
>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
498509
* Limit[1000[INTEGER]]
499510
* \_Aggregate[STANDARD,[],[SUM(salary{f}#12,true[BOOLEAN]) AS sum(salary), SUM(salary{f}#12,last_name{f}#11 == [44 6f 65][KEYW
500511
* ORD]) AS sum(salary) WheRe last_name == "Doe"]]
@@ -3809,6 +3820,7 @@ public void testPruneRenameOnAggBy() {
38093820
}
38103821

38113822
/**
3823+
<<<<<<< HEAD
38123824
* Expects
38133825
* Project[[c1{r}#2, c2{r}#4, cs{r}#6, cm{r}#8, cexp{r}#10]]
38143826
* \_Eval[[c1{r}#2 AS c2, c1{r}#2 AS cs, c1{r}#2 AS cm, c1{r}#2 AS cexp]]
@@ -3971,6 +3983,8 @@ public void testEliminateDuplicateRenamedGroupings() {
39713983
}
39723984

39733985
/**
3986+
=======
3987+
>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
39743988
* Expected
39753989
* Limit[2[INTEGER]]
39763990
* \_Filter[a{r}#6 > 2[INTEGER]]
@@ -4061,19 +4075,6 @@ public void testLimitZeroUsesLocalRelation() {
40614075
assertThat(plan, instanceOf(LocalRelation.class));
40624076
}
40634077

4064-
private <T> T aliased(Expression exp, Class<T> clazz) {
4065-
var alias = as(exp, Alias.class);
4066-
return as(alias.child(), clazz);
4067-
}
4068-
4069-
private <T extends AggregateFunction> void aggFieldName(Expression exp, Class<T> aggType, String fieldName) {
4070-
var alias = as(exp, Alias.class);
4071-
var af = as(alias.child(), aggType);
4072-
var field = af.field();
4073-
var name = field.foldable() ? BytesRefs.toString(field.fold(FoldContext.small())) : Expressions.name(field);
4074-
assertThat(name, is(fieldName));
4075-
}
4076-
40774078
/**
40784079
* Expects
40794080
* Limit[1000[INTEGER]]
@@ -4831,6 +4832,7 @@ public void testStatsExpOverAggsWithScalars() {
48314832

48324833
/**
48334834
* Expects
4835+
<<<<<<< HEAD
48344836
* Project[[a{r}#5, b{r}#9, $$max(salary)_+_3>$COUNT$2{r}#46 AS d, $$count(salary)_->$MIN$3{r}#47 AS e, $$avg(salary)_+_m
48354837
* >$MAX$1{r}#45 AS g]]
48364838
* \_Eval[[$$$$avg(salary)_+_m>$AVG$0$SUM$0{r}#48 / $$max(salary)_+_3>$COUNT$2{r}#46 AS $$avg(salary)_+_m>$AVG$0, $$avg(
@@ -4902,6 +4904,9 @@ public void testStatsExpOverAggsWithScalarAndDuplicateAggs() {
49024904

49034905
/**
49044906
* Expects
4907+
=======
4908+
* <pre>{@code
4909+
>>>>>>> 0a81875703ed (Fixes esql class cast bug in STATS at planning level (#137511))
49054910
* Project[[a{r}#5, a{r}#5 AS b, w{r}#12]]
49064911
* \_Limit[1000[INTEGER]]
49074912
* \_Aggregate[[w{r}#12],[SUM($$salary_/_2_+_la>$SUM$0{r}#26) AS a, w{r}#12]]

0 commit comments

Comments
 (0)