Skip to content

Commit 21fe40a

Browse files
authored
ESQL: Add optimization to purge join on null merge key (#127583)
This adds a new logical optimization rule to purge a Join in case the merge key(s) are null. The null detection is based on recognizing a tree pattern where the join sits atop a project and/or eval (possibly a few nodes deep) which contains a reference to a `null`, reference which matches the join key. It works at coordinator planning level, but it's most useful locally, after insertions of `nulls` in the plan on detecting missing fields. The Join is substituted with a projection with the same attributes as the join, atop an eval with all join's right fields aliased to null. Closes #125577.
1 parent 52bc94e commit 21fe40a

File tree

11 files changed

+433
-61
lines changed

11 files changed

+433
-61
lines changed

docs/changelog/127583.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127583
2+
summary: Add optimization to purge join on null merge key
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 125577

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ public String toString() {
138138

139139
@Override
140140
public String nodeString() {
141-
return child.nodeString() + " AS " + name();
141+
return child.nodeString() + " AS " + name() + "#" + id();
142142
}
143143

144144
/**

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4561,3 +4561,22 @@ language_code_float:double | language_code_double:double | language_name:keyword
45614561
2.147483648E9 | 2.147483646E9 | max_int_minus_1
45624562
2.147483648E9 | 2.147483647E9 | max_int
45634563
;
4564+
4565+
nullifiedJoinKeyToPurgeTheJoin
4566+
required_capability: join_lookup_v12
4567+
4568+
FROM employees
4569+
| RENAME languages AS language_code
4570+
| SORT emp_no, language_code
4571+
| LIMIT 4
4572+
| EVAL language_code = TO_INTEGER(NULL)
4573+
| LOOKUP JOIN languages_lookup ON language_code
4574+
| KEEP emp_no, language_code, language_name
4575+
;
4576+
4577+
emp_no:integer | language_code:integer | language_name:keyword
4578+
10001 |null |null
4579+
10002 |null |null
4580+
10003 |null |null
4581+
10004 |null |null
4582+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
6666
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogatePlans;
6767
import org.elasticsearch.xpack.esql.optimizer.rules.logical.TranslateTimeSeriesAggregate;
68+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.PruneLeftJoinOnNullMatchingField;
6869
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
6970
import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor;
7071
import org.elasticsearch.xpack.esql.rule.RuleExecutor;
@@ -201,7 +202,8 @@ protected static Batch<LogicalPlan> operators() {
201202
new PushDownEnrich(),
202203
new PushDownAndCombineOrderBy(),
203204
new PruneRedundantOrderBy(),
204-
new PruneRedundantSortClauses()
205+
new PruneRedundantSortClauses(),
206+
new PruneLeftJoinOnNullMatchingField()
205207
);
206208
}
207209

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules;
9+
10+
import org.elasticsearch.common.util.Maps;
11+
import org.elasticsearch.core.Tuple;
12+
import org.elasticsearch.xpack.esql.core.expression.Alias;
13+
import org.elasticsearch.xpack.esql.core.expression.Attribute;
14+
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
15+
import org.elasticsearch.xpack.esql.core.expression.Expression;
16+
import org.elasticsearch.xpack.esql.core.expression.Literal;
17+
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
18+
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
19+
import org.elasticsearch.xpack.esql.core.type.DataType;
20+
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
21+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
22+
23+
import java.util.ArrayList;
24+
import java.util.List;
25+
import java.util.Map;
26+
import java.util.function.Predicate;
27+
28+
public final class RuleUtils {
29+
30+
private RuleUtils() {}
31+
32+
/**
33+
* Returns a tuple of two lists:
34+
* 1. A list of aliases to null literals for those data types in the {@param outputAttributes} that {@param shouldBeReplaced}.
35+
* 2. A list of named expressions where attributes that match the predicate are replaced with their corresponding null alias.
36+
*
37+
* @param outputAttributes The original output attributes.
38+
* @param shouldBeReplaced A predicate to determine which attributes should be replaced with null aliases.
39+
*/
40+
public static Tuple<List<Alias>, List<NamedExpression>> aliasedNulls(
41+
List<Attribute> outputAttributes,
42+
Predicate<Attribute> shouldBeReplaced
43+
) {
44+
Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
45+
List<NamedExpression> newProjections = new ArrayList<>(outputAttributes.size());
46+
for (Attribute attr : outputAttributes) {
47+
NamedExpression projection;
48+
if (shouldBeReplaced.test(attr)) {
49+
DataType dt = attr.dataType();
50+
Alias nullAlias = nullLiterals.get(dt);
51+
// save the first field as null (per datatype)
52+
if (nullAlias == null) {
53+
// Keep the same id so downstream query plans don't need updating
54+
// NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
55+
// In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
56+
// on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
57+
// layouts due to a duplicate name id.
58+
// If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
59+
// give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
60+
Alias alias = new Alias(attr.source(), attr.name(), Literal.of(attr, null), attr.id());
61+
nullLiterals.put(dt, alias);
62+
projection = alias.toAttribute();
63+
}
64+
// otherwise point to it since this avoids creating field copies
65+
else {
66+
projection = new Alias(attr.source(), attr.name(), nullAlias.toAttribute(), attr.id());
67+
}
68+
} else {
69+
projection = attr;
70+
}
71+
newProjections.add(projection);
72+
}
73+
74+
return new Tuple<>(new ArrayList<>(nullLiterals.values()), newProjections);
75+
}
76+
77+
/**
78+
* Collects references to foldables from the given logical plan, returning an {@link AttributeMap} that maps
79+
* foldable aliases to their corresponding literal values.
80+
*
81+
* @param plan The logical plan to analyze.
82+
* @param ctx The optimizer context providing fold context.
83+
* @return An {@link AttributeMap} containing foldable references and their literal values.
84+
*/
85+
public static AttributeMap<Expression> foldableReferences(LogicalPlan plan, LogicalOptimizerContext ctx) {
86+
AttributeMap.Builder<Expression> collectRefsBuilder = AttributeMap.builder();
87+
88+
// collect aliases bottom-up
89+
plan.forEachExpressionUp(Alias.class, a -> {
90+
var c = a.child();
91+
boolean shouldCollect = c.foldable();
92+
// try to resolve the expression based on an existing foldables
93+
if (shouldCollect == false) {
94+
c = c.transformUp(ReferenceAttribute.class, r -> collectRefsBuilder.build().resolve(r, r));
95+
shouldCollect = c.foldable();
96+
}
97+
if (shouldCollect) {
98+
collectRefsBuilder.put(a.toAttribute(), Literal.of(ctx.foldCtx(), c));
99+
}
100+
});
101+
102+
return collectRefsBuilder.build();
103+
}
104+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PropagateEvalFoldables.java

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77

88
package org.elasticsearch.xpack.esql.optimizer.rules.logical;
99

10-
import org.elasticsearch.xpack.esql.core.expression.Alias;
1110
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
1211
import org.elasticsearch.xpack.esql.core.expression.Expression;
13-
import org.elasticsearch.xpack.esql.core.expression.Literal;
1412
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
1513
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
14+
import org.elasticsearch.xpack.esql.optimizer.rules.RuleUtils;
1615
import org.elasticsearch.xpack.esql.plan.logical.Eval;
1716
import org.elasticsearch.xpack.esql.plan.logical.Filter;
1817
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -26,24 +25,8 @@ public final class PropagateEvalFoldables extends ParameterizedRule<LogicalPlan,
2625

2726
@Override
2827
public LogicalPlan apply(LogicalPlan plan, LogicalOptimizerContext ctx) {
29-
AttributeMap.Builder<Expression> collectRefsBuilder = AttributeMap.builder();
30-
31-
java.util.function.Function<ReferenceAttribute, Expression> replaceReference = r -> collectRefsBuilder.build().resolve(r, r);
32-
33-
// collect aliases bottom-up
34-
plan.forEachExpressionUp(Alias.class, a -> {
35-
var c = a.child();
36-
boolean shouldCollect = c.foldable();
37-
// try to resolve the expression based on an existing foldables
38-
if (shouldCollect == false) {
39-
c = c.transformUp(ReferenceAttribute.class, replaceReference);
40-
shouldCollect = c.foldable();
41-
}
42-
if (shouldCollect) {
43-
collectRefsBuilder.put(a.toAttribute(), Literal.of(ctx.foldCtx(), c));
44-
}
45-
});
46-
if (collectRefsBuilder.isEmpty()) {
28+
AttributeMap<Expression> collectRefs = RuleUtils.foldableReferences(plan, ctx);
29+
if (collectRefs.isEmpty()) {
4730
return plan;
4831
}
4932

@@ -52,7 +35,7 @@ public LogicalPlan apply(LogicalPlan plan, LogicalOptimizerContext ctx) {
5235
// TODO: also allow aggregates once aggs on constants are supported.
5336
// C.f. https://github.com/elastic/elasticsearch/issues/100634
5437
if (p instanceof Filter || p instanceof Eval) {
55-
p = p.transformExpressionsOnly(ReferenceAttribute.class, replaceReference);
38+
p = p.transformExpressionsOnly(ReferenceAttribute.class, r -> collectRefs.resolve(r, r));
5639
}
5740
return p;
5841
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical.local;
9+
10+
import org.elasticsearch.xpack.esql.core.expression.AttributeMap;
11+
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
12+
import org.elasticsearch.xpack.esql.core.expression.Expression;
13+
import org.elasticsearch.xpack.esql.core.expression.Expressions;
14+
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
15+
import org.elasticsearch.xpack.esql.optimizer.rules.RuleUtils;
16+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules;
17+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
18+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
19+
import org.elasticsearch.xpack.esql.plan.logical.Project;
20+
import org.elasticsearch.xpack.esql.plan.logical.join.Join;
21+
22+
import static org.elasticsearch.xpack.esql.core.expression.Expressions.isGuaranteedNull;
23+
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;
24+
25+
/**
26+
* The rule checks if the join's performed on a field which is aliased to null (in type or value); if that's the case, it prunes the join,
27+
* replacing it with an Eval - returning aliases to null for all the fields added in by the right side of the Join - plus a Project on top
28+
* of it. The rule can apply on the coordinator already, but it's more likely to be effective on the data nodes, where null aliasing is
29+
* inserted due to locally missing fields. This rule relies on that behavior -- see {@link ReplaceFieldWithConstantOrNull}.
30+
*/
31+
public class PruneLeftJoinOnNullMatchingField extends OptimizerRules.ParameterizedOptimizerRule<Join, LogicalOptimizerContext> {
32+
33+
public PruneLeftJoinOnNullMatchingField() {
34+
super(OptimizerRules.TransformDirection.DOWN);
35+
}
36+
37+
@Override
38+
protected LogicalPlan rule(Join join, LogicalOptimizerContext ctx) {
39+
LogicalPlan plan = join;
40+
if (join.config().type() == LEFT) { // other types will have different replacement logic
41+
AttributeMap<Expression> attributeMap = RuleUtils.foldableReferences(join, ctx);
42+
43+
for (var attr : AttributeSet.of(join.config().matchFields())) {
44+
var resolved = attributeMap.resolve(attr);
45+
if (resolved != null && isGuaranteedNull(resolved)) {
46+
plan = replaceJoin(join);
47+
break;
48+
}
49+
}
50+
}
51+
return plan;
52+
}
53+
54+
private static LogicalPlan replaceJoin(Join join) {
55+
var joinRightOutput = join.rightOutputFields();
56+
// can be empty when the join key is null and the rest of the right side entries pruned (such as by an agg)
57+
if (joinRightOutput.isEmpty()) {
58+
return join.left();
59+
}
60+
var aliasedNulls = RuleUtils.aliasedNulls(joinRightOutput, a -> true);
61+
var eval = new Eval(join.source(), join.left(), aliasedNulls.v1());
62+
return new Project(join.source(), eval, join.computeOutput(join.left().output(), Expressions.asAttributes(aliasedNulls.v2())));
63+
}
64+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceFieldWithConstantOrNull.java

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,15 @@
77

88
package org.elasticsearch.xpack.esql.optimizer.rules.logical.local;
99

10-
import org.elasticsearch.common.util.Maps;
1110
import org.elasticsearch.index.IndexMode;
12-
import org.elasticsearch.xpack.esql.core.expression.Alias;
1311
import org.elasticsearch.xpack.esql.core.expression.Attribute;
1412
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
1513
import org.elasticsearch.xpack.esql.core.expression.Expression;
1614
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
1715
import org.elasticsearch.xpack.esql.core.expression.Literal;
18-
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
19-
import org.elasticsearch.xpack.esql.core.type.DataType;
2016
import org.elasticsearch.xpack.esql.core.type.PotentiallyUnmappedKeywordEsField;
2117
import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
18+
import org.elasticsearch.xpack.esql.optimizer.rules.RuleUtils;
2219
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
2320
import org.elasticsearch.xpack.esql.plan.logical.Eval;
2421
import org.elasticsearch.xpack.esql.plan.logical.Filter;
@@ -29,7 +26,6 @@
2926
import org.elasticsearch.xpack.esql.plan.logical.TopN;
3027
import org.elasticsearch.xpack.esql.rule.ParameterizedRule;
3128

32-
import java.util.ArrayList;
3329
import java.util.HashMap;
3430
import java.util.List;
3531
import java.util.Map;
@@ -92,42 +88,18 @@ private LogicalPlan replaceWithNullOrConstant(
9288
// \_Eval[field1 = null, field3 = null]
9389
// \_EsRelation[field1, field2, field3]
9490
List<Attribute> relationOutput = relation.output();
95-
Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
96-
List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());
97-
for (int i = 0, size = relationOutput.size(); i < size; i++) {
98-
Attribute attr = relationOutput.get(i);
99-
NamedExpression projection;
100-
if (attr instanceof FieldAttribute f && shouldBeRetained.test(f) == false) {
101-
DataType dt = f.dataType();
102-
Alias nullAlias = nullLiterals.get(dt);
103-
// save the first field as null (per datatype)
104-
if (nullAlias == null) {
105-
// Keep the same id so downstream query plans don't need updating
106-
// NOTE: THIS IS BRITTLE AND CAN LEAD TO BUGS.
107-
// In case some optimizer rule or so inserts a plan node that requires the field BEFORE the Eval that we're adding
108-
// on top of the EsRelation, this can trigger a field extraction in the physical optimizer phase, causing wrong
109-
// layouts due to a duplicate name id.
110-
// If someone reaches here AGAIN when debugging e.g. ClassCastExceptions NPEs from wrong layouts, we should probably
111-
// give up on this approach and instead insert EvalExecs in InsertFieldExtraction.
112-
Alias alias = new Alias(f.source(), f.name(), Literal.of(f, null), f.id());
113-
nullLiterals.put(dt, alias);
114-
projection = alias.toAttribute();
115-
}
116-
// otherwise point to it since this avoids creating field copies
117-
else {
118-
projection = new Alias(f.source(), f.name(), nullAlias.toAttribute(), f.id());
119-
}
120-
} else {
121-
projection = attr;
122-
}
123-
newProjections.add(projection);
124-
}
91+
var aliasedNulls = RuleUtils.aliasedNulls(
92+
relationOutput,
93+
attr -> attr instanceof FieldAttribute f && shouldBeRetained.test(f) == false
94+
);
95+
var nullLiterals = aliasedNulls.v1();
96+
var newProjections = aliasedNulls.v2();
12597

12698
if (nullLiterals.size() == 0) {
12799
return plan;
128100
}
129101

130-
Eval eval = new Eval(plan.source(), relation, new ArrayList<>(nullLiterals.values()));
102+
Eval eval = new Eval(plan.source(), relation, nullLiterals);
131103
// This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it).
132104
return new Project(plan.source(), eval, newProjections);
133105
}

0 commit comments

Comments
 (0)