Skip to content

Commit 0354e46

Browse files
author
Dag Wanvik
committed
WL#15540 Transform correlated scalar subqueries: allow column references as function arguments
WL#13520 "Transform correlated scalar subqueries" has a requirement (#9), which states that the equality expression operands must be simple column references, e.g. in the below transformation example from WL#13520, t2.a and t1.a — the operands of the WHERE clause equality predicate — are simple references. This WL lifts this restriction and allows the column references to be arguments of functions. SELECT * FROM t1 WHERE (SELECT a FROM t2 WHERE t2.a=t1.a) > 0; -> (with non-strict grouping and a built-in assert to signal any cardinality error): SELECT t1.a AS a FROM t1 JOIN (SELECT t2.a AS a, COUNT(0) AS cnt FROM t2 WHERE (t2.a > 0) GROUP BY t2.a) derived WHERE t1.a = derived.a AND reject_if(derived.cnt > 1) The outer reference, t1.a, can already be embedded as a function argument, but the non-outer (inner for short) column that we place in the GROUP BY above, t2.a, cannot. With this WL, we will also be able to transform queries like SELECT * FROM t1 WHERE (SELECT func(t2.a) FROM t2 WHERE func(t2.a) = t1.a ) > 0; -> SELECT t1.* FROM t1 JOIN ( SELECT func(t2.a) AS a, COUNT(0) AS cnt FROM t2 WHERE (t2.a > 0) GROUP BY func(t2.a)) derived WHERE derived.a = t1.a AND reject_if(derived.cnt > 1) This transformation is needed by Heatwave/RAPID, which doesn't support subqueries in the WHERE clause. Limitations: due to a phase problem in the resolver, functional dependencies are not always handled correctly: if the transformed subquery has explicit grouping, functional dependency analysis may be too pessimistic and give an error when the transform is used (but not without). See worklog for details. A work-around is to wrap the offending column in ANY_VALUE. Change-Id: I06fc0bbf8931f87ca964dc784329f4b2e24e048f
1 parent 46e9bcf commit 0354e46

11 files changed

+1274
-349
lines changed

mysql-test/r/subquery_scalar_to_derived_correlated.result

Lines changed: 305 additions & 5 deletions
Large diffs are not rendered by default.

mysql-test/t/subquery_scalar_to_derived_correlated.test

Lines changed: 203 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,29 @@ INSERT INTO t2 VALUES(1), (2);
1111
CREATE TABLE t0 AS SELECT *FROM t1;
1212
CREATE TABLE t3(a INT, b INT);
1313
INSERT INTO t3 VALUES(1, 3), (2, 3);
14+
CREATE VIEW v3 AS SELECT * FROM t3;
1415
ANALYZE TABLE t1, t2, t0, t3;
1516

1617
--echo #
1718
--echo # example supported query
1819
--echo #
1920

20-
# inner field present in SELECT list (t2.a)
21+
--echo # inner field present in SELECT list (t2.a)
2122
let $query =
2223
SELECT * FROM t1 WHERE(SELECT a FROM t2 WHERE t2.a = t1.a) > 0;
2324

2425
eval $query;
2526
eval EXPLAIN $query;
2627

27-
# inner field not present in SELECT list (t3.a)
28+
--echo # function(inner) present in select list
29+
let $query =
30+
SELECT * FROM t1 WHERE(SELECT -a FROM t2 WHERE -t2.a = -t1.a) < 0;
31+
32+
--sorted_result
33+
eval $query;
34+
eval EXPLAIN $query;
35+
36+
--echo # inner field not present in SELECT list (t3.a)
2837
let $query =
2938
SELECT * FROM t1 WHERE(SELECT b FROM t3 WHERE t3.a = t1.a) > 0;
3039

@@ -36,6 +45,88 @@ SELECT * FROM t1 WHERE(SELECT ABS(a) FROM t2 WHERE t2.a = t1.a) > 0;
3645

3746
eval $query;
3847
eval EXPLAIN $query;
48+
49+
--echo # function(inner) not present in SELECT list
50+
let $query =
51+
SELECT * FROM t1 WHERE(SELECT a FROM t2 WHERE -t2.a = -t1.a) > 0;
52+
53+
--sorted_result
54+
eval $query;
55+
eval EXPLAIN $query;
56+
57+
let $query =
58+
SELECT * FROM t1 WHERE(SELECT a FROM t2 WHERE -t2.a = -t1.a AND t2.a = t1.a) > 0;
59+
60+
eval $query;
61+
eval EXPLAIN $query;
62+
63+
let $query =
64+
SELECT * FROM t1 WHERE(SELECT -t2.a FROM t2 WHERE -t2.a = -t1.a AND t2.a = t1.a) > 0;
65+
66+
eval $query;
67+
eval EXPLAIN $query;
68+
69+
let $query =
70+
SELECT * FROM t1 WHERE(SELECT ABS(t2.a) FROM t2 WHERE -t2.a = -t1.a AND t2.a = t1.a) > 0;
71+
72+
eval $query;
73+
eval EXPLAIN $query;
74+
75+
--echo # Could have been transformed, since t1.a selected is equal to t2.a
76+
--echo # (functionally dependent here, but this analysis is not done at the
77+
--echo # moment.
78+
let $query =
79+
SELECT * FROM t1 WHERE(SELECT t1.a FROM t2 WHERE -t2.a = -t1.a AND t2.a = t1.a) > 0;
80+
81+
eval $query;
82+
eval EXPLAIN $query;
83+
84+
# multiple inner fields in a function
85+
let $query =
86+
SELECT * FROM t1 WHERE(SELECT t3.a FROM t3 WHERE t3.a + t3.b = t1.a ) > 0;
87+
88+
eval $query;
89+
eval EXPLAIN $query;
90+
91+
let $query =
92+
SELECT * FROM t1 WHERE(SELECT ABS(t3.a) FROM t3 WHERE t3.a + t3.b = t1.a ) > 0;
93+
94+
eval $query;
95+
eval EXPLAIN $query;
96+
97+
--echo # We don't group on abs(t3.a), since abs(t3.a) is functionally dependent
98+
--echo # on t3.a.
99+
let $query =
100+
SELECT * FROM t1 WHERE(SELECT ABS(t3.a) FROM t3 WHERE t3.a + t3.b = t1.a AND t3.a = t1.a) > 0;
101+
102+
eval $query;
103+
eval EXPLAIN $query;
104+
105+
--echo # Not transformed, non-deterministic function
106+
let $query =
107+
SELECT * FROM t1 WHERE(SELECT ABS(t3.a) + ROUND(RAND()*10) FROM t3 WHERE t3.a + t3.b = t1.a AND t3.a = t1.a) > 0;
108+
109+
eval $query;
110+
eval EXPLAIN $query;
111+
112+
let $query =
113+
SELECT * FROM t1 WHERE(SELECT v3.a FROM v3 WHERE v3.a = t1.a) > 0;
114+
115+
eval $query;
116+
eval EXPLAIN $query;
117+
118+
let $query =
119+
SELECT * FROM t1 WHERE(SELECT v3.a FROM v3 WHERE v3.a + v3.b = t1.a) > 0;
120+
121+
eval $query;
122+
eval EXPLAIN $query;
123+
124+
let $query =
125+
SELECT * FROM t1 WHERE(SELECT v3.a + v3.b FROM v3 WHERE v3.a + v3.b = t1.a) > 0;
126+
127+
eval $query;
128+
eval EXPLAIN $query;
129+
39130
--echo #
40131
--echo # example supported query: more than one correlated field
41132
--echo #
@@ -151,16 +242,24 @@ eval EXPLAIN $query;
151242
--echo # add GROUP BY
152243
--echo #
153244
INSERT INTO t2 VALUES (2);
154-
let $query=
245+
let $query_field=
155246
SELECT * FROM t1 WHERE (SELECT a FROM t2 WHERE t2.a = t1.a) > 0;
247+
let $query_func=
248+
SELECT * FROM t1 WHERE (SELECT a FROM t2 WHERE ABS(t2.a) = t1.a) > 0;
156249

157250
--error ER_SUBQUERY_NO_1_ROW
158-
eval $query;
159-
eval EXPLAIN $query;
251+
eval $query_field;
252+
eval EXPLAIN $query_field;
253+
--error ER_SUBQUERY_NO_1_ROW
254+
eval $query_func;
255+
eval EXPLAIN $query_func;
160256
SET optimizer_switch = 'subquery_to_derived=default';
161257
--error ER_SUBQUERY_NO_1_ROW
162-
eval $query;
163-
eval EXPLAIN $query;
258+
eval $query_field;
259+
eval EXPLAIN $query_field;
260+
--error ER_SUBQUERY_NO_1_ROW
261+
eval $query_func;
262+
eval EXPLAIN $query_func;
164263

165264
SET optimizer_switch = 'subquery_to_derived=on';
166265

@@ -181,6 +280,21 @@ SELECT * FROM t1 WHERE (SELECT COUNT(a) FROM t3 WHERE t3.a = t1.a GROUP BY b) >
181280
eval $query;
182281
eval EXPLAIN $query;
183282

283+
let $query=
284+
SELECT * FROM t1 WHERE (SELECT COUNT(a) FROM t3 WHERE ANY_VALUE(t3.a) = t1.a GROUP BY b) > 0;
285+
eval $query;
286+
eval EXPLAIN $query;
287+
288+
let $query=
289+
SELECT * FROM t1 WHERE (SELECT COUNT(a) FROM t3 WHERE ABS(t3.a) = t1.a GROUP BY b) > 0;
290+
--error ER_WRONG_FIELD_WITH_GROUP
291+
eval $query;
292+
293+
let $query=
294+
SELECT * FROM t1 WHERE (SELECT COUNT(a) FROM t3 WHERE ABS(ANY_VALUE(t3.a)) = t1.a GROUP BY b) > 0;
295+
eval $query;
296+
eval EXPLAIN $query;
297+
184298
--echo #
185299
--echo # Test case that used to yield wrong result before we corrected
186300
--echo # computations of slice positions to accommodate non-hidden fields
@@ -209,6 +323,7 @@ eval EXPLAIN $query;
209323

210324
DROP TABLE p, l;
211325

326+
DROP VIEW v3;
212327
DROP TABLE t0, t1, t2, t3;
213328

214329
--echo #
@@ -240,15 +355,23 @@ INSERT INTO t1 VALUES (1), (2), (3), (4);
240355
INSERT INTO t2 VALUES (1, 3, 3), (2, 3, 3);
241356
ANALYZE TABLE t1, t2;
242357

243-
let $query=
358+
let $query_field=
244359
SELECT * FROM t1 WHERE (SELECT a FROM t2 WHERE t2.b = t1.a and t2.c = t1.a GROUP BY b) > 0;
360+
let $query_func=
361+
SELECT * FROM t1 WHERE (SELECT ABS(a) FROM t2 WHERE ABS(t2.b) = t1.a and ABS(t2.c) = t1.a GROUP BY b) > 0;
245362
--error 1055
246-
eval EXPLAIN $query;
363+
eval EXPLAIN $query_field;
364+
--error 1055
365+
eval EXPLAIN $query_func;
366+
247367
SET sql_mode='';
248-
eval EXPLAIN $query;
249-
eval $query;
368+
eval EXPLAIN $query_field;
369+
eval $query_field;
370+
eval EXPLAIN $query_func;
371+
eval $query_func;
250372
DELETE FROM t2 WHERE a = 2;
251-
eval $query;
373+
eval $query_field;
374+
eval $query_func;
252375
SET sql_mode=default;
253376

254377
let $query=
@@ -258,10 +381,37 @@ eval EXPLAIN $query;
258381

259382
# bug found during review of Bug#35508108: used to get cardinality error
260383
let $query=
261-
SELECT * FROM t1 WHERE (SELECT b FROM t2 WHERE t2.b = t1.a and t2.c = t1.a GROUP BY b) > 0;
384+
SELECT * FROM t1 WHERE (SELECT b FROM t2 WHERE t2.b = t1.a AND t2.c = t1.a GROUP BY b) > 0;
262385
eval $query;
263386
eval EXPLAIN $query;
264387

388+
--echo # This query gets imprecise full group by checking: accepted without transform
389+
let $query=
390+
SELECT * FROM t1 WHERE (SELECT ABS(b) FROM t2 WHERE ABS(t2.b) = t1.a AND t2.c + 1 = t1.a + 1 GROUP BY b) > 0;
391+
--error ER_WRONG_FIELD_WITH_GROUP
392+
eval $query;
393+
394+
--echo # Ok without transform
395+
SET optimizer_switch = 'subquery_to_derived=off';
396+
eval $query;
397+
SET optimizer_switch = 'subquery_to_derived=on';
398+
399+
--echo # Remedied with an ANY_VALUE:
400+
let $query=
401+
SELECT * FROM t1 WHERE (SELECT ABS(b) FROM t2 WHERE ABS(t2.b) = t1.a AND ANY_VALUE(t2.c) + 1 = t1.a + 1 GROUP BY b) > 0;
402+
eval $query;
403+
eval EXPLAIN $query;
404+
405+
--echo # Wrong both with transform and without
406+
let $query=
407+
SELECT * FROM t1 WHERE (SELECT abs(a-b) FROM t2 WHERE ABS(a-b) = t1.a AND t2.c + 1 = t1.a + 1 GROUP BY a+b) > 0;
408+
--error ER_WRONG_FIELD_WITH_GROUP
409+
eval $query;
410+
SET optimizer_switch = 'subquery_to_derived=off';
411+
--error ER_WRONG_FIELD_WITH_GROUP
412+
eval $query;
413+
SET optimizer_switch = 'subquery_to_derived=on';
414+
265415
DROP TABLE t1, t2;
266416

267417
--echo #
@@ -275,7 +425,6 @@ INSERT INTO t3 VALUES (1, 3), (2, 3), (1, 4);
275425
ANALYZE TABLE t1, t3;
276426
let $query =
277427
SELECT * FROM t1 WHERE (SELECT COUNT(a) FROM t3 WHERE t3.a = t1.a GROUP BY b) > 0;
278-
279428
--error ER_SUBQUERY_NO_1_ROW
280429
eval $query;
281430
eval EXPLAIN $query;
@@ -841,6 +990,46 @@ eval EXPLAIN $query;
841990

842991
DROP TABLE t1, t2;
843992

993+
--echo Somewhat realistic example
994+
CREATE TABLE employees(employee_id INT,
995+
name VARCHAR(255),
996+
salary DECIMAL(8,2),
997+
department VARCHAR(255));
998+
INSERT INTO employees
999+
VALUES (1, 'Bob', 250000, 'English'),
1000+
(2, 'Charles', 250000, 'English'),
1001+
(3, 'Kari', 250000, 'English'),
1002+
(4, 'Per', 250000, 'ENGLISH'),
1003+
(6, 'Ole', 300000, 'English'),
1004+
(7, 'Heinrich', 250000, 'English'),
1005+
(8, 'James', 250000, 'Math'),
1006+
(9, 'Dag', 300000, 'MATHEMATICS'),
1007+
(10, 'Norvald', 250000, 'Math'),
1008+
(11, 'Edward', 250000, 'Math'),
1009+
(12, 'Rose', 250000, 'MATH'),
1010+
(13, 'Sally', 250000, 'MATH');
1011+
1012+
--echo # Sloppy department data doesn't give right answer for maths dept.
1013+
--echo # WL#13520:
1014+
SELECT employee_id, name
1015+
FROM employees oemp
1016+
WHERE salary > (
1017+
SELECT AVG(salary)
1018+
FROM employees
1019+
WHERE department = oemp.department);
1020+
--echo # In addition to default case insignificance, use only first four letters
1021+
--echo # to get desired answer
1022+
--echo # WL#15540
1023+
SELECT employee_id, name
1024+
FROM employees oemp
1025+
WHERE salary > (
1026+
SELECT AVG(salary)
1027+
FROM employees
1028+
WHERE SUBSTRING(department,1,4) = SUBSTRING(oemp.department,1,4));
1029+
1030+
DROP TABLE employees;
1031+
1032+
8441033
--echo #
8451034
--echo # Bug#36060557 WL#15540: Expression containing Non-deterministic function
8461035
--echo # UUID() is transformed

sql/aggregate_check.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,9 +569,26 @@ class Group_check : public Item_tree_walker {
569569
whole_tables_fd(0),
570570
recheck_nullable_keys(0),
571571
mat_tables(root),
572-
failed_ident(nullptr) {}
572+
failed_ident(nullptr) {
573+
if (select->m_no_of_added_exprs > 0) {
574+
// Temporarily shorten GROUP BY list for full group by checking of
575+
// non-field expressions stemming from transform of correlated scalar
576+
// subquery to join with derived table, cf.
577+
// Query_block::m_no_of_added_exprs
578+
select->group_list.split_after(
579+
select->group_list.elements - select->m_no_of_added_exprs,
580+
&m_added_by_transform);
581+
}
582+
}
573583

574-
~Group_check() { std::destroy_n(mat_tables.data(), mat_tables.size()); }
584+
~Group_check() {
585+
std::destroy_n(mat_tables.data(), mat_tables.size());
586+
if (select->m_no_of_added_exprs > 0) {
587+
// restore GROUP BY list
588+
select->group_list.push_back(&m_added_by_transform);
589+
m_added_by_transform.clear();
590+
}
591+
}
575592
Group_check(const Group_check &) = delete;
576593
Group_check &operator=(const Group_check &) = delete;
577594

@@ -641,6 +658,8 @@ class Group_check : public Item_tree_walker {
641658
Mem_root_array<Group_check *> mat_tables;
642659
/// Identifier which triggered an error
643660
Item_ident *failed_ident;
661+
/// GROUP BY list non single column expressions entries added by transform
662+
SQL_I_List<ORDER> m_added_by_transform;
644663

645664
bool is_fd_on_source(Item *item);
646665
bool is_child() const { return table != nullptr; }

sql/item.cc

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,7 @@ bool Item_field::collect_item_field_or_ref_processor(uchar *arg) {
987987
if (already_collected->eq(this, true)) return false;
988988
}
989989
}
990-
info->m_items->push_back(this);
991-
return false;
990+
return info->m_items->push_back(this);
992991
}
993992

994993
bool Item_field::collect_item_field_or_view_ref_processor(uchar *arg) {
@@ -1010,8 +1009,7 @@ bool Item_field::collect_item_field_or_view_ref_processor(uchar *arg) {
10101009
}
10111010
}
10121011
m_protected_by_any_value = info->m_any_value_level > 0;
1013-
info->m_item_fields_or_view_refs->push_back(this);
1014-
return false;
1012+
return info->m_item_fields_or_view_refs->push_back(this);
10151013
}
10161014

10171015
bool Item_field::add_field_to_set_processor(uchar *arg) {
@@ -9020,7 +9018,10 @@ bool Item_view_ref::collect_item_field_or_view_ref_processor(uchar *arg) {
90209018
: ((depended_from == info->m_transformed_block) // 3
90219019
? this
90229020
: nullptr));
9023-
if (item != nullptr) info->m_item_fields_or_view_refs->push_back(item);
9021+
bool error = false;
9022+
if (item != nullptr)
9023+
error = info->m_item_fields_or_view_refs->push_back(item);
9024+
if (error) return true;
90249025
info->stop_at(this);
90259026
return false;
90269027
}

sql/item.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3190,6 +3190,16 @@ class Item : public Parse_tree_node {
31903190
m_default_value(default_value) {}
31913191
};
31923192

3193+
struct Item_func_call_replacement : Item_replacement {
3194+
Item_func *m_target; ///< The function call to be replaced
3195+
Item_field *m_item; ///< The replacement field
3196+
Item_func_call_replacement(Item_func *func_target, Item_field *item,
3197+
Query_block *select)
3198+
: Item_replacement(select, select),
3199+
m_target(func_target),
3200+
m_item(item) {}
3201+
};
3202+
31933203
struct Item_view_ref_replacement : Item_replacement {
31943204
Item *m_target; ///< The item identifying the view_ref to be replaced
31953205
Field *m_field; ///< The replacement field
@@ -3221,6 +3231,7 @@ class Item : public Parse_tree_node {
32213231
operation of the original transformed query block.
32223232
*/
32233233
virtual Item *replace_item_field(uchar *) { return this; }
3234+
virtual Item *replace_func_call(uchar *) { return this; }
32243235
virtual Item *replace_item_view_ref(uchar *) { return this; }
32253236
virtual Item *replace_aggregate(uchar *) { return this; }
32263237
virtual Item *replace_outer_ref(uchar *) { return this; }

0 commit comments

Comments
 (0)