Skip to content

Commit b08d97b

Browse files
committed
Bug#25418534: JSON_EXTRACT USING WILDCARDS TAKES FOREVER
Patch #1: When find_child_doms() evaluates an ellipsis path leg, it adds each matching child twice. First once for every immediate child in the top-level call to find_child_doms(), and then again in the recursive call to find_child_doms() on each of the children. This doesn't cause any wrong results, since duplicate elimination prevents them from being added to the result, but it is unnecessary work. This patch makes find_child_dom() stop calling add_if_missing() on the children before it recurses into them, so that they are only added once. Additionally: Make find_child_dom() a free, static function instead of a member of Json_dom. It is a helper function for Json_dom::seek(), and doesn't use any non-public members of Json_dom, so it doesn't have to be part of Json_dom's interface. Remove unnecessary checks for only_need_one in the ellipsis processing. Json_dom::seek() sets the flag to true only in the last path leg, and the JSON path parser rejects paths that end with an ellipsis, so this cannot happen. Added an assert instead. Microbenchmarks (64-bit, Intel Core i7-4770 3.4 GHz, GCC 6.3): BM_JsonDomSearchEllipsis 79880 ns/iter [+32.0%] BM_JsonDomSearchEllipsis_OnlyOne 75872 ns/iter [+35.3%] BM_JsonDomSearchKey 129 ns/iter [ -1.6%] BM_JsonBinarySearchEllipsis 320920 ns/iter [+11.8%] BM_JsonBinarySearchEllipsis_OnlyOne 315458 ns/iter [+12.6%] BM_JsonBinarySearchKey 86 ns/iter [ -2.3%] Change-Id: I865af789b90b820a6e180ad822f2fb68f411516b
1 parent fbc878c commit b08d97b

File tree

2 files changed

+65
-84
lines changed

2 files changed

+65
-84
lines changed

sql/json_dom.cc

Lines changed: 65 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ static bool add_if_missing(Json_dom *candidate,
228228

229229

230230
/**
231-
Check if a seek operation performed by Json_dom::find_child_doms()
231+
Check if a seek operation performed by find_child_doms()
232232
or Json_dom::seek() is done.
233233
234234
@return true if only one result is needed and a result has been found
@@ -240,13 +240,31 @@ static inline bool is_seek_done(const Result_vector *hits, bool only_need_one)
240240
}
241241

242242

243-
bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
244-
bool auto_wrap,
245-
bool only_need_one,
246-
Json_dom_vector *duplicates,
247-
Json_dom_vector *result)
248-
{
249-
enum_json_type dom_type= json_type();
243+
/**
244+
Return the child Json_doms identified by the given path leg.
245+
The child doms are added to a vector.
246+
247+
See the header comment for Json_wrapper.seek() for a discussion
248+
of complexities involving path expressions with more than one
249+
ellipsis (**) token.
250+
251+
@param[in] dom the DOM to search
252+
@param[in] path_leg identifies the child
253+
@param[in] auto_wrap if true, match final scalar with [0] is need be
254+
@param[in] only_need_one true if we can stop after finding one match
255+
@param[in,out] duplicates helps to identify duplicate arrays and objects
256+
introduced by daisy-chained ** tokens
257+
@param[in,out] result the vector of qualifying children
258+
@return false on success, true on error
259+
*/
260+
static bool find_child_doms(Json_dom *dom,
261+
const Json_path_leg *path_leg,
262+
bool auto_wrap,
263+
bool only_need_one,
264+
Json_dom_vector *duplicates,
265+
Json_dom_vector *result)
266+
{
267+
enum_json_type dom_type= dom->json_type();
250268
enum_json_path_leg_type leg_type= path_leg->get_type();
251269

252270
if (is_seek_done(result, only_need_one))
@@ -256,17 +274,17 @@ bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
256274
if (auto_wrap && dom_type != enum_json_type::J_ARRAY &&
257275
path_leg->is_autowrap())
258276
{
259-
return !seen_already(result, this) &&
260-
add_if_missing(this, duplicates, result);
277+
return !seen_already(result, dom) &&
278+
add_if_missing(dom, duplicates, result);
261279
}
262280

263281
switch (leg_type)
264282
{
265283
case jpl_array_cell:
266284
if (dom_type == enum_json_type::J_ARRAY)
267285
{
268-
auto array= down_cast<const Json_array *>(this);
269-
Json_array_index idx= path_leg->first_array_index(array->size());
286+
const auto array= down_cast<const Json_array *>(dom);
287+
const Json_array_index idx= path_leg->first_array_index(array->size());
270288
return idx.within_bounds() &&
271289
add_if_missing((*array)[idx.position()], duplicates, result);
272290
}
@@ -275,8 +293,8 @@ bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
275293
case jpl_array_cell_wildcard:
276294
if (dom_type == enum_json_type::J_ARRAY)
277295
{
278-
const auto array= down_cast<const Json_array *>(this);
279-
auto range= path_leg->get_array_range(array->size());
296+
const auto array= down_cast<const Json_array *>(dom);
297+
const auto range= path_leg->get_array_range(array->size());
280298
for (size_t i= range.m_begin; i < range.m_end; ++i)
281299
{
282300
if (add_if_missing((*array)[i], duplicates, result))
@@ -288,56 +306,39 @@ bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
288306
return false;
289307
case jpl_ellipsis:
290308
{
291-
if (add_if_missing(this, duplicates, result))
309+
/*
310+
Paths that end with an ellipsis are rejected by the JSON path
311+
parser, so there is no need to check if we can stop after the
312+
first match on this path leg.
313+
*/
314+
DBUG_ASSERT(!only_need_one);
315+
316+
// The ellipsis matches the value on which it is called ...
317+
if (add_if_missing(dom, duplicates, result))
292318
return true; /* purecov: inspected */
293319

320+
// ... and, recursively, all the values contained in it.
294321
if (dom_type == enum_json_type::J_ARRAY)
295322
{
296-
const Json_array * const array= down_cast<const Json_array *>(this);
297-
323+
const auto array= down_cast<const Json_array *>(dom);
298324
for (unsigned eidx= 0; eidx < array->size(); eidx++)
299325
{
300-
Json_dom * child= (*array)[eidx];
301-
if (add_if_missing(child, duplicates, result))
326+
Json_dom *child= (*array)[eidx];
327+
// Now recurse and add the child and values under it.
328+
if (find_child_doms(child, path_leg, auto_wrap, only_need_one,
329+
duplicates, result))
302330
return true; /* purecov: inspected */
303-
if (is_seek_done(result, only_need_one))
304-
return false; /* purecov: inspected */
305-
306-
enum_json_type child_type= child->json_type();
307-
if ((child_type == enum_json_type::J_ARRAY) ||
308-
(child_type == enum_json_type::J_OBJECT))
309-
{
310-
// now recurse and add all objects and arrays under the child
311-
if (child->find_child_doms(path_leg, auto_wrap, only_need_one,
312-
duplicates, result))
313-
return true; /* purecov: inspected */
314-
}
315331
} // end of loop through children
316332
}
317333
else if (dom_type == enum_json_type::J_OBJECT)
318334
{
319-
const Json_object *const object=
320-
down_cast<const Json_object *>(this);
321-
322-
for (Json_object::const_iterator iter= object->begin();
323-
iter != object->end(); ++iter)
335+
for (const auto &member : *down_cast<const Json_object *>(dom))
324336
{
325-
Json_dom *child= iter->second;
326-
enum_json_type child_type= child->json_type();
327-
328-
if (add_if_missing(child, duplicates, result))
337+
Json_dom *child= member.second;
338+
// Now recurse and add the child and values under it.
339+
if (find_child_doms(child, path_leg, auto_wrap, only_need_one,
340+
duplicates, result))
329341
return true; /* purecov: inspected */
330-
if (is_seek_done(result, only_need_one))
331-
return false; /* purecov: inspected */
332-
333-
if ((child_type == enum_json_type::J_ARRAY) ||
334-
(child_type == enum_json_type::J_OBJECT))
335-
{
336-
// now recurse and add all objects and arrays under the child
337-
if (child->find_child_doms(path_leg, auto_wrap, only_need_one,
338-
duplicates, result))
339-
return true; /* purecov: inspected */
340-
}
341342
} // end of loop through children
342343
}
343344

@@ -347,7 +348,7 @@ bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
347348
{
348349
if (dom_type == enum_json_type::J_OBJECT)
349350
{
350-
const Json_object * object= down_cast<const Json_object *>(this);
351+
const auto object= down_cast<const Json_object *>(dom);
351352
Json_dom *child= object->get(path_leg->get_member_name());
352353

353354
if (child != NULL && add_if_missing(child, duplicates, result))
@@ -360,12 +361,9 @@ bool Json_dom::find_child_doms(const Json_path_leg *path_leg,
360361
{
361362
if (dom_type == enum_json_type::J_OBJECT)
362363
{
363-
const Json_object * object= down_cast<const Json_object *>(this);
364-
365-
for (Json_object::const_iterator iter= object->begin();
366-
iter != object->end(); ++iter)
364+
for (const auto &member : *down_cast<const Json_object *>(dom))
367365
{
368-
if (add_if_missing(iter->second, duplicates, result))
366+
if (add_if_missing(member.second, duplicates, result))
369367
return true; /* purecov: inspected */
370368
if (is_seek_done(result, only_need_one))
371369
return false;
@@ -2280,12 +2278,17 @@ bool Json_dom::seek(const Json_seekable_path &path,
22802278
duplicates.clear();
22812279
candidates.clear();
22822280

2283-
for (Json_dom_vector::iterator it= hits->begin(); it != hits->end(); ++it)
2281+
/*
2282+
On the last path leg, we can stop after the first match if only
2283+
one match is requested by the caller.
2284+
*/
2285+
const bool stop_after_first_match=
2286+
only_need_one && (path_idx == path_leg_count - 1);
2287+
2288+
for (Json_dom *hit : *hits)
22842289
{
2285-
if ((*it)->find_child_doms(path_leg, auto_wrap,
2286-
(only_need_one &&
2287-
(path_idx == (path_leg_count-1))),
2288-
&duplicates, &candidates))
2290+
if (find_child_doms(hit, path_leg, auto_wrap, stop_after_first_match,
2291+
&duplicates, &candidates))
22892292
return true; /* purecov: inspected */
22902293
}
22912294

sql/json_dom.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -303,28 +303,6 @@ class Json_dom
303303

304304
/** Parent pointer */
305305
Json_dom *m_parent;
306-
307-
/**
308-
Return the child Json_doms identified by the given path leg.
309-
The child doms are added to a vector.
310-
311-
See the header comment for Json_wrapper.seek() for a discussion
312-
of complexities involving path expressions with more than one
313-
ellipsis (**) token.
314-
315-
@param[in] path_leg identifies the child
316-
@param[in] auto_wrap if true, match final scalar with [0] is need be
317-
@param[in] only_need_one True if we can stop after finding one match
318-
@param[in,out] duplicates helps to identify duplicate arrays and objects
319-
introduced by daisy-chained ** tokens
320-
@param[in,out] result the vector of qualifying children
321-
@return false on success, true on error
322-
*/
323-
bool find_child_doms(const Json_path_leg *path_leg,
324-
bool auto_wrap,
325-
bool only_need_one,
326-
Json_dom_vector *duplicates,
327-
Json_dom_vector *result);
328306
};
329307

330308

0 commit comments

Comments
 (0)