Skip to content

Commit 3739f35

Browse files
committed
Bug#33848486: Assertion failure in join_optimizer.cc
Inconsistent row estimates were seen in some queries that had a cyclic query graph, and where one of the join predicates could be pushed down to a REF access, but the join predicate could not be fully subsumed by the REF access (because the types of the columns in the predicate didn't match exactly). The inconsistencies were caused by ApplyDelayedPredicatesAfterJoin() applying the selectivity of the join predicate even though the selectivity had already been applied to the REF access below the join node. Fixed by making ApplyDelayedPredicatesAfterJoin() not apply the selectivity of already applied sargable join predicates. Change-Id: I5cf8b22303e2b6b5927adde9c062a34bec656b17
1 parent 68ad0e5 commit 3739f35

File tree

2 files changed

+69
-1
lines changed

2 files changed

+69
-1
lines changed

sql/join_optimizer/join_optimizer.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3170,8 +3170,8 @@ void CostingReceiver::ApplyDelayedPredicatesAfterJoin(
31703170
} else {
31713171
join_path->cost += cost.cost_if_not_materialized;
31723172
}
3173-
join_path->num_output_rows *= pred.selectivity;
31743173
if (!already_applied_as_sargable) {
3174+
join_path->num_output_rows *= pred.selectivity;
31753175
filter_predicates.SetBit(pred_idx);
31763176
}
31773177
}

unittest/gunit/hypergraph_optimizer-t.cc

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#include "unittest/gunit/fake_table.h"
7474
#include "unittest/gunit/handler-t.h"
7575
#include "unittest/gunit/mock_field_datetime.h"
76+
#include "unittest/gunit/mock_field_long.h"
7677
#include "unittest/gunit/parsertest.h"
7778
#include "unittest/gunit/test_utils.h"
7879

@@ -1986,6 +1987,73 @@ TEST_F(HypergraphOptimizerTest, DoNotApplyBothSargableJoinAndFilterJoin) {
19861987
EXPECT_EQ("t2.x", ItemToString(inner_inner->ref().ref->items[0]));
19871988
}
19881989

1990+
// The selectivity of sargable join predicates could in some cases be
1991+
// double-counted when the sargable join predicate was part of a cycle in the
1992+
// join graph.
1993+
TEST_F(HypergraphOptimizerTest, SargableJoinPredicateSelectivity) {
1994+
// The inconsistent row estimates were only seen if the sargable predicate
1995+
// t1.x=t2.x was not fully subsumed by a ref access on t1.x. Achieved by
1996+
// giving t2.x a different type (UNSIGNED) than t1.x (SIGNED).
1997+
Mock_field_long t2_x("x", /*is_nullable=*/false, /*is_unsigned=*/true);
1998+
Mock_field_long t2_y("y", /*is_nullable=*/false, /*is_unsigned=*/false);
1999+
Fake_TABLE *t2 = new (m_initializer.thd()->mem_root) Fake_TABLE(&t2_x, &t2_y);
2000+
m_fake_tables["t2"] = t2;
2001+
t2->set_created();
2002+
2003+
Query_block *query_block = ParseAndResolve(
2004+
"SELECT 1 FROM t1, t2, t3 "
2005+
"WHERE t1.x = t2.x AND t1.y = t3.x AND t2.y = t3.y",
2006+
/*nullable=*/false);
2007+
2008+
// Add an index on t1(x) to make t1.x=t2.x sargable.
2009+
Fake_TABLE *t1 = m_fake_tables["t1"];
2010+
const int t1_idx =
2011+
t1->create_index(t1->field[0], /*column2=*/nullptr, /*unique=*/false);
2012+
ulong rec_per_key_int[] = {1};
2013+
float rec_per_key[] = {1.0f};
2014+
t1->key_info[t1_idx].set_rec_per_key_array(rec_per_key_int, rec_per_key);
2015+
2016+
Fake_TABLE *t3 = m_fake_tables["t3"];
2017+
t1->file->stats.records = 1000;
2018+
t1->file->stats.data_file_length = 1e6;
2019+
t2->file->stats.records = 100;
2020+
t2->file->stats.data_file_length = 1e5;
2021+
t3->file->stats.records = 10;
2022+
t3->file->stats.data_file_length = 1e4;
2023+
2024+
string trace;
2025+
AccessPath *root = FindBestQueryPlan(m_thd, query_block, &trace);
2026+
SCOPED_TRACE(trace); // Prints out the trace on failure.
2027+
// Prints out the query plan on failure.
2028+
SCOPED_TRACE(PrintQueryPlan(0, root, query_block->join,
2029+
/*is_root_of_join=*/true));
2030+
2031+
// We don't really care about which exact plan is chosen, but the inconsistent
2032+
// row estimates were caused by REF access, so make sure our plan has one.
2033+
AccessPath *ref_path = nullptr;
2034+
WalkAccessPaths(root, query_block->join,
2035+
WalkAccessPathPolicy::STOP_AT_MATERIALIZATION,
2036+
[&ref_path](AccessPath *path, const JOIN *) {
2037+
if (path->type == AccessPath::REF) {
2038+
EXPECT_EQ(nullptr, ref_path);
2039+
ref_path = path;
2040+
}
2041+
return false;
2042+
});
2043+
ASSERT_NE(nullptr, ref_path);
2044+
EXPECT_STREQ("t1", ref_path->ref().table->alias);
2045+
EXPECT_EQ(string("t2.x"), ItemToString(ref_path->ref().ref->items[0]));
2046+
2047+
// We do care about the estimated cardinality of the result. It used to be
2048+
// much too low because the selectivity of the sargable predicate was applied
2049+
// twice.
2050+
EXPECT_FLOAT_EQ(
2051+
/* Rows from t1: */ rec_per_key[0] *
2052+
/* Rows from t2: */ t2->file->stats.records * COND_FILTER_EQUALITY *
2053+
/* Rows from t3: */ t3->file->stats.records * COND_FILTER_EQUALITY,
2054+
root->num_output_rows);
2055+
}
2056+
19892057
TEST_F(HypergraphOptimizerTest, AntiJoinGetsSameEstimateWithAndWithoutIndex) {
19902058
double ref_output_rows = 0.0;
19912059
for (bool has_index : {false, true}) {

0 commit comments

Comments
 (0)