Skip to content

Commit 8111a34

Browse files
committed
Bug#25418534: JSON_EXTRACT USING WILDCARDS TAKES FOREVER
Patch #6: Use depth-first search in Json_dom::seek(), like in Json_wrapper::seek(). This allows the search to stop earlier than the original breadth-first search when the need_only_one flag is given. It also avoids building up a vector of candidate results for each path leg. Microbenchmarks (64-bit, Intel Core i7-4770 3.4 GHz, GCC 6.3): BM_JsonDomSearchEllipsis 21666 ns/iter [ +0.5%] BM_JsonDomSearchEllipsis_OnlyOne 135 ns/iter [+11371.1%] BM_JsonDomSearchKey 120 ns/iter [ +5.0%] BM_JsonBinarySearchEllipsis 71022 ns/iter [ -0.1%] BM_JsonBinarySearchEllipsis_OnlyOne 98 ns/iter [ -1.0%] BM_JsonBinarySearchKey 80 ns/iter [ -1.3%] Change-Id: Ice7476f6e087a80d69474a38ab4fca918667b332
1 parent df61f96 commit 8111a34

File tree

1 file changed

+77
-121
lines changed

1 file changed

+77
-121
lines changed

sql/json_dom.cc

Lines changed: 77 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -256,15 +256,17 @@ static inline bool is_seek_done(const Result_vector *hits, bool only_need_one)
256256

257257

258258
/**
259-
Return the child Json_doms identified by the given path leg.
259+
Find the child Json_dom objects identified by the given path.
260260
The child doms are added to a vector.
261261
262-
See the header comment for Json_wrapper.seek() for a discussion
262+
See the header comment for #Json_wrapper::seek() for a discussion
263263
of complexities involving path expressions with more than one
264-
ellipsis (**) token.
264+
ellipsis (**) token, or a combination of ellipsis and auto-wrapping
265+
path legs.
265266
266267
@param[in] dom the DOM to search
267-
@param[in] path_leg identifies the child
268+
@param[in] path the path to search
269+
@param[in] current_leg the 0-based index of the first path leg to look at
268270
@param[in] auto_wrap if true, auto-wrap non-arrays when matching against
269271
array path legs
270272
@param[in] only_need_one true if we can stop after finding one match
@@ -276,14 +278,19 @@ static inline bool is_seek_done(const Result_vector *hits, bool only_need_one)
276278
@return false on success, true on error
277279
*/
278280
static bool find_child_doms(Json_dom *dom,
279-
const Json_path_leg *path_leg,
281+
const Json_seekable_path &path,
282+
size_t current_leg,
280283
bool auto_wrap,
281284
bool only_need_one,
282285
Json_dom_vector *duplicates,
283286
Json_dom_vector *result)
284287
{
285-
enum_json_type dom_type= dom->json_type();
286-
enum_json_path_leg_type leg_type= path_leg->get_type();
288+
if (current_leg == path.leg_count())
289+
return add_if_missing(dom, duplicates, result);
290+
291+
const enum_json_type dom_type= dom->json_type();
292+
const Json_path_leg *const path_leg= path.get_leg_at(current_leg);
293+
const enum_json_path_leg_type leg_type= path_leg->get_type();
287294

288295
switch (leg_type)
289296
{
@@ -293,11 +300,13 @@ static bool find_child_doms(Json_dom *dom,
293300
const auto array= down_cast<const Json_array *>(dom);
294301
const Json_array_index idx= path_leg->first_array_index(array->size());
295302
return idx.within_bounds() &&
296-
add_if_missing((*array)[idx.position()], duplicates, result);
303+
find_child_doms((*array)[idx.position()], path, current_leg + 1,
304+
auto_wrap, only_need_one, duplicates, result);
297305
}
298306
// Handle auto-wrapping of non-arrays.
299307
return auto_wrap && path_leg->is_autowrap() &&
300-
add_if_missing(dom, duplicates, result);
308+
find_child_doms(dom, path, current_leg + 1, auto_wrap, only_need_one,
309+
duplicates, result);
301310
case jpl_array_range:
302311
case jpl_array_cell_wildcard:
303312
if (dom_type == enum_json_type::J_ARRAY)
@@ -306,27 +315,23 @@ static bool find_child_doms(Json_dom *dom,
306315
const auto range= path_leg->get_array_range(array->size());
307316
for (size_t i= range.m_begin; i < range.m_end; ++i)
308317
{
309-
if (add_if_missing((*array)[i], duplicates, result))
318+
if (find_child_doms((*array)[i], path, current_leg + 1, auto_wrap,
319+
only_need_one, duplicates, result))
310320
return true; /* purecov: inspected */
311-
if (only_need_one)
321+
if (is_seek_done(result, only_need_one))
312322
return false;
313323
}
314324
return false;
315325
}
316326
// Handle auto-wrapping of non-arrays.
317327
return auto_wrap && path_leg->is_autowrap() &&
318-
add_if_missing(dom, duplicates, result);
328+
find_child_doms(dom, path, current_leg + 1, auto_wrap, only_need_one,
329+
duplicates, result);
319330
case jpl_ellipsis:
320331
{
321-
/*
322-
Paths that end with an ellipsis are rejected by the JSON path
323-
parser, so there is no need to check if we can stop after the
324-
first match on this path leg.
325-
*/
326-
DBUG_ASSERT(!only_need_one);
327-
328332
// The ellipsis matches the value on which it is called ...
329-
if (add_if_missing(dom, duplicates, result))
333+
if (find_child_doms(dom, path, current_leg + 1, auto_wrap, only_need_one,
334+
duplicates, result))
330335
return true; /* purecov: inspected */
331336

332337
// ... and, recursively, all the values contained in it.
@@ -335,21 +340,25 @@ static bool find_child_doms(Json_dom *dom,
335340
const auto array= down_cast<const Json_array *>(dom);
336341
for (unsigned eidx= 0; eidx < array->size(); eidx++)
337342
{
338-
Json_dom *child= (*array)[eidx];
343+
if (is_seek_done(result, only_need_one))
344+
return false;
345+
339346
// Now recurse and add the child and values under it.
340-
if (find_child_doms(child, path_leg, auto_wrap, only_need_one,
341-
duplicates, result))
347+
if (find_child_doms((*array)[eidx], path, current_leg, auto_wrap,
348+
only_need_one, duplicates, result))
342349
return true; /* purecov: inspected */
343350
} // end of loop through children
344351
}
345352
else if (dom_type == enum_json_type::J_OBJECT)
346353
{
347354
for (const auto &member : *down_cast<const Json_object *>(dom))
348355
{
349-
Json_dom *child= member.second;
356+
if (is_seek_done(result, only_need_one))
357+
return false;
358+
350359
// Now recurse and add the child and values under it.
351-
if (find_child_doms(child, path_leg, auto_wrap, only_need_one,
352-
duplicates, result))
360+
if (find_child_doms(member.second, path, current_leg, auto_wrap,
361+
only_need_one, duplicates, result))
353362
return true; /* purecov: inspected */
354363
} // end of loop through children
355364
}
@@ -362,7 +371,9 @@ static bool find_child_doms(Json_dom *dom,
362371
{
363372
const auto object= down_cast<const Json_object *>(dom);
364373
Json_dom *child= object->get(path_leg->get_member_name());
365-
return child != nullptr && result->push_back(child);
374+
return child != nullptr &&
375+
find_child_doms(child, path, current_leg + 1, auto_wrap,
376+
only_need_one, duplicates, result);
366377
}
367378

368379
return false;
@@ -373,9 +384,10 @@ static bool find_child_doms(Json_dom *dom,
373384
{
374385
for (const auto &member : *down_cast<const Json_object *>(dom))
375386
{
376-
if (result->push_back(member.second))
387+
if (find_child_doms(member.second, path, current_leg + 1, auto_wrap,
388+
only_need_one, duplicates, result))
377389
return true; /* purecov: inspected */
378-
if (only_need_one)
390+
if (is_seek_done(result, only_need_one))
379391
return false;
380392
}
381393
}
@@ -391,6 +403,37 @@ static bool find_child_doms(Json_dom *dom,
391403
}
392404

393405

406+
/**
407+
Does a search on this path, using Json_dom::seek() or
408+
Json_wrapper::seek(), need duplicate elimination?
409+
410+
Duplicate elimination is needed if the path contains multiple
411+
ellipses, or if it contains an auto-wrapping array path leg after an
412+
ellipses. See #Json_wrapper::seek() for more details.
413+
414+
@param path the path to search for
415+
@param auto_wrap true if array auto-wrapping is used
416+
417+
@retval true if duplicate elimination is needed
418+
@retval false if the path won't produce duplicates
419+
*/
420+
static bool path_gives_duplicates(const Json_seekable_path &path,
421+
bool auto_wrap)
422+
{
423+
const size_t legs= path.leg_count();
424+
bool has_ellipsis= false;
425+
for (size_t i= 0; i < legs; ++i)
426+
{
427+
const Json_path_leg *leg= path.get_leg_at(i);
428+
if (has_ellipsis && (leg->get_type() == jpl_ellipsis ||
429+
(auto_wrap && leg->is_autowrap())))
430+
return true;
431+
has_ellipsis|= (leg->get_type() == jpl_ellipsis);
432+
}
433+
return false;
434+
}
435+
436+
394437
Json_object::Json_object()
395438
: Json_dom(),
396439
m_map(Json_object_map::key_compare(),
@@ -2275,58 +2318,12 @@ bool Json_dom::seek(const Json_seekable_path &path,
22752318
Json_dom_vector *hits,
22762319
bool auto_wrap, bool only_need_one)
22772320
{
2278-
Json_dom_vector candidates(key_memory_JSON);
22792321
Json_dom_vector duplicates(key_memory_JSON);
2322+
Json_dom_vector *dup_vector=
2323+
path_gives_duplicates(path, auto_wrap) ? &duplicates : nullptr;
22802324

2281-
if (hits->push_back(this))
2282-
return true; /* purecov: inspected */
2283-
2284-
bool seen_ellipsis= false;
2285-
2286-
size_t path_leg_count= path.leg_count();
2287-
for (size_t path_idx= 0; path_idx < path_leg_count; path_idx++)
2288-
{
2289-
const Json_path_leg *path_leg= path.get_leg_at(path_idx);
2290-
2291-
/*
2292-
When we have multiple ellipses in the path, or an ellipsis
2293-
followed by an auto-wrapping array path leg, we need to
2294-
eliminate duplicates from the result. It's not needed for the
2295-
first ellipsis. See explanation in add_if_missing() and
2296-
Json_wrapper::seek().
2297-
*/
2298-
Json_dom_vector *dup_vector= nullptr;
2299-
if (seen_ellipsis && (path_leg->get_type() == jpl_ellipsis ||
2300-
(auto_wrap && path_leg->is_autowrap())))
2301-
{
2302-
dup_vector= &duplicates;
2303-
dup_vector->clear();
2304-
}
2305-
seen_ellipsis|= path_leg->get_type() == jpl_ellipsis;
2306-
2307-
/*
2308-
On the last path leg, we can stop after the first match if only
2309-
one match is requested by the caller.
2310-
*/
2311-
const bool stop_after_first_match=
2312-
only_need_one && (path_idx == path_leg_count - 1);
2313-
2314-
for (Json_dom *hit : *hits)
2315-
{
2316-
if (find_child_doms(hit, path_leg, auto_wrap, stop_after_first_match,
2317-
dup_vector, &candidates))
2318-
return true; /* purecov: inspected */
2319-
2320-
if (is_seek_done(&candidates, stop_after_first_match))
2321-
break;
2322-
}
2323-
2324-
// swap the two lists so that they can be re-used
2325-
hits->swap(candidates);
2326-
candidates.clear();
2327-
}
2328-
2329-
return false;
2325+
return find_child_doms(this, path, 0, auto_wrap, only_need_one,
2326+
dup_vector, hits);
23302327
}
23312328

23322329

@@ -2506,47 +2503,6 @@ static bool seek_no_dup_elimination(const Json_wrapper &wrapper,
25062503
}
25072504

25082505

2509-
/**
2510-
Should Json_wrapper::seek() delegate to Json_dom::seek() for this
2511-
search?
2512-
2513-
@param wrapper the wrapper being searched
2514-
@param path the path to search for
2515-
@param auto_wrap true if array auto-wrapping is used
2516-
2517-
@retval true if Json_dom::seek() should be used
2518-
@retval false if the search should use the Json_wrapper interface
2519-
*/
2520-
static bool seek_as_dom(const Json_wrapper *wrapper,
2521-
const Json_seekable_path &path,
2522-
bool auto_wrap)
2523-
{
2524-
// If the wrapper contains a DOM, search the DOM directly.
2525-
if (wrapper->is_dom())
2526-
return true;
2527-
2528-
/*
2529-
If the path requires duplicate elimination, Json_wrapper::seek()
2530-
should convert the value to a DOM and seek using Json_dom::seek(),
2531-
which handles duplicate elimination. Duplicate elimination is
2532-
required if the path contains multiple ellipses, or if it contains
2533-
an auto-wrapping array path leg after an ellipsis.
2534-
*/
2535-
const size_t legs= path.leg_count();
2536-
bool has_ellipsis= false;
2537-
for (size_t i= 0; i < legs; ++i)
2538-
{
2539-
const Json_path_leg *leg= path.get_leg_at(i);
2540-
if (has_ellipsis && (leg->get_type() == jpl_ellipsis ||
2541-
(auto_wrap && leg->is_autowrap())))
2542-
return true;
2543-
has_ellipsis|= (leg->get_type() == jpl_ellipsis);
2544-
}
2545-
2546-
return false;
2547-
}
2548-
2549-
25502506
bool Json_wrapper::seek(const Json_seekable_path &path,
25512507
Json_wrapper_vector *hits,
25522508
bool auto_wrap, bool only_need_one)
@@ -2561,7 +2517,7 @@ bool Json_wrapper::seek(const Json_seekable_path &path,
25612517
duplicate elimination, convert to DOM since duplicate detection is
25622518
difficult on binary values.
25632519
*/
2564-
if (seek_as_dom(this, path, auto_wrap))
2520+
if (is_dom() || path_gives_duplicates(path, auto_wrap))
25652521
{
25662522
Json_dom *dom= to_dom(current_thd);
25672523
if (dom == nullptr)

0 commit comments

Comments
 (0)