Skip to content

Commit df61f96

Browse files
committed
Bug#25418534: JSON_EXTRACT USING WILDCARDS TAKES FOREVER
Patch #5: Json_wrapper::seek() currently always converts the wrapper to a DOM if the path contains an ellipsis. This is because paths with ellipses may require duplicate elimination, which is harder to do using the JSON binary interface. It is not the case that all paths with ellipses need duplicate elimination. It is only needed if the path contains more than one ellipsis, or if it contains an ellipsis followed by an auto-wrapping array path leg. This patch makes Json_wrapper::seek() only convert the wrapper to a DOM if the path requires duplicate elimination. It also makes Json_wrapper::seek() forward to Json_dom::seek() in the case where the Json_wrapper is wrapping a DOM, to avoid the overhead of the Json_wrapper interface when navigating through the DOM. Microbenchmarks (64-bit, Intel Core i7-4770 3.4 GHz, GCC 6.3): BM_JsonDomSearchEllipsis 21764 ns/iter [ +3.9%] BM_JsonDomSearchEllipsis_OnlyOne 15486 ns/iter [ +4.4%] BM_JsonDomSearchKey 126 ns/iter [ +2.4%] BM_JsonBinarySearchEllipsis 70979 ns/iter [ +225.2%] BM_JsonBinarySearchEllipsis_OnlyOne 97 ns/iter [+229941.2%] BM_JsonBinarySearchKey 79 ns/iter [ +8.9%] Change-Id: I0f024cf8a2da46d8d7e33560c4599f619efe09bd
1 parent 06b2dcc commit df61f96

File tree

2 files changed

+177
-116
lines changed

2 files changed

+177
-116
lines changed

sql/json_dom.cc

Lines changed: 156 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2330,33 +2330,49 @@ bool Json_dom::seek(const Json_seekable_path &path,
23302330
}
23312331

23322332

2333-
bool Json_wrapper::seek_no_ellipsis(const Json_seekable_path &path,
2333+
/**
2334+
Finds all of the JSON sub-documents which match the path expression.
2335+
Puts the matches on an evolving vector of results. This is a
2336+
fast-track method for paths which don't need duplicate elimination
2337+
due to multiple ellipses or the combination of ellipses and
2338+
auto-wrapping. Those paths can take advantage of the efficient
2339+
positioning logic of json_binary::Value.
2340+
2341+
@param[in] wrapper the wrapper to search
2342+
@param[in] path the (possibly wildcarded) address of the sub-documents
2343+
@param[out] hits the result of the search
2344+
@param[in] current_leg the 0-based index of the first path leg to look at.
2345+
Should be the same as the depth at which the document in this
2346+
wrapper is located. Usually called on the root document with the
2347+
value 0, and then increased by one in recursive calls within the
2348+
function itself.
2349+
@param[in] last_leg the 0-based index of the leg just behind the last leg to
2350+
look at. If equal to the length of the path, the entire path is
2351+
used. If shorter than the length of the path, the search stops
2352+
at one of the ancestors of the value pointed to by the full
2353+
path.
2354+
@param[in] auto_wrap true if a non-array should be wrapped as a
2355+
single-element array before it is matched against an array path leg
2356+
@param[in] only_need_one true if we can stop after finding one match
2357+
2358+
@returns false if there was no error, otherwise true on error
2359+
*/
2360+
static bool seek_no_dup_elimination(const Json_wrapper &wrapper,
2361+
const Json_seekable_path &path,
23342362
Json_wrapper_vector *hits,
23352363
size_t current_leg,
23362364
size_t last_leg,
23372365
bool auto_wrap,
2338-
bool only_need_one) const
2366+
bool only_need_one)
23392367
{
2368+
// DOMs are searched using Json_dom::seek() instead.
2369+
DBUG_ASSERT(!wrapper.is_dom());
2370+
23402371
if (current_leg >= last_leg)
2341-
{
2342-
if (m_is_dom)
2343-
{
2344-
Json_wrapper clone(m_dom_value->clone());
2345-
return clone.empty() || hits->push_back(std::move(clone));
2346-
}
2347-
return hits->push_back(*this);
2348-
}
2372+
return hits->push_back(wrapper);
23492373

23502374
const Json_path_leg *path_leg= path.get_leg_at(current_leg);
2351-
const enum_json_type jtype= type();
2352-
2353-
// Handle auto-wrapping of non-arrays.
2354-
if (auto_wrap && jtype != enum_json_type::J_ARRAY && path_leg->is_autowrap())
2355-
{
2356-
// recursion
2357-
return seek_no_ellipsis(path, hits, current_leg + 1, last_leg,
2358-
auto_wrap, only_need_one);
2359-
}
2375+
const enum_json_type jtype= wrapper.type();
23602376

23612377
switch(path_leg->get_type())
23622378
{
@@ -2366,12 +2382,13 @@ bool Json_wrapper::seek_no_ellipsis(const Json_seekable_path &path,
23662382
{
23672383
case enum_json_type::J_OBJECT:
23682384
{
2369-
Json_wrapper member= lookup(path_leg->get_member_name());
2385+
Json_wrapper member= wrapper.lookup(path_leg->get_member_name());
23702386

23712387
if (member.type() != enum_json_type::J_ERROR)
23722388
{
23732389
// recursion
2374-
if (member.seek_no_ellipsis(path, hits, current_leg + 1, last_leg,
2390+
if (seek_no_dup_elimination(member, path, hits,
2391+
current_leg + 1, last_leg,
23752392
auto_wrap, only_need_one))
23762393
return true; /* purecov: inspected */
23772394
}
@@ -2391,19 +2408,16 @@ bool Json_wrapper::seek_no_ellipsis(const Json_seekable_path &path,
23912408
{
23922409
case enum_json_type::J_OBJECT:
23932410
{
2394-
for (Json_wrapper_object_iterator iter= object_iterator();
2411+
for (Json_wrapper_object_iterator iter= wrapper.object_iterator();
23952412
!iter.empty(); iter.next())
23962413
{
23972414
if (is_seek_done(hits, only_need_one))
23982415
return false;
23992416

24002417
// recursion
2401-
if (iter.elt().second.seek_no_ellipsis(path,
2402-
hits,
2403-
current_leg + 1,
2404-
last_leg,
2405-
auto_wrap,
2406-
only_need_one))
2418+
if (seek_no_dup_elimination(iter.elt().second, path, hits,
2419+
current_leg + 1, last_leg,
2420+
auto_wrap, only_need_one))
24072421
return true; /* purecov: inspected */
24082422
}
24092423
return false;
@@ -2419,95 +2433,155 @@ bool Json_wrapper::seek_no_ellipsis(const Json_seekable_path &path,
24192433
case jpl_array_cell:
24202434
if (jtype == enum_json_type::J_ARRAY)
24212435
{
2422-
Json_array_index idx= path_leg->first_array_index(length());
2436+
const Json_array_index idx= path_leg->first_array_index(wrapper.length());
24232437
return idx.within_bounds() &&
2424-
(*this)[idx.position()].seek_no_ellipsis(path, hits, current_leg + 1,
2425-
last_leg, auto_wrap,
2426-
only_need_one);
2438+
seek_no_dup_elimination(wrapper[idx.position()], path, hits,
2439+
current_leg + 1, last_leg,
2440+
auto_wrap, only_need_one);
24272441
}
2428-
return false;
2442+
return auto_wrap && path_leg->is_autowrap() &&
2443+
seek_no_dup_elimination(wrapper, path, hits, current_leg + 1, last_leg,
2444+
auto_wrap, only_need_one);
24292445

24302446
case jpl_array_range:
24312447
case jpl_array_cell_wildcard:
24322448
if (jtype == enum_json_type::J_ARRAY)
24332449
{
2434-
auto range= path_leg->get_array_range(length());
2450+
const auto range= path_leg->get_array_range(wrapper.length());
24352451
for (size_t idx= range.m_begin; idx < range.m_end; idx++)
24362452
{
24372453
if (is_seek_done(hits, only_need_one))
24382454
return false;
24392455

24402456
// recursion
2441-
Json_wrapper cell= (*this)[idx];
2442-
if (cell.seek_no_ellipsis(path, hits, current_leg + 1, last_leg,
2443-
auto_wrap, only_need_one))
2457+
if (seek_no_dup_elimination(wrapper[idx], path, hits, current_leg + 1,
2458+
last_leg, auto_wrap, only_need_one))
24442459
return true; /* purecov: inspected */
24452460
}
2461+
return false;
24462462
}
2447-
return false;
2463+
return auto_wrap && path_leg->is_autowrap() &&
2464+
seek_no_dup_elimination(wrapper, path, hits, current_leg + 1, last_leg,
2465+
auto_wrap, only_need_one);
24482466

2449-
default:
2450-
// should never be called on a path which contains an ellipsis
2451-
DBUG_ASSERT(false); /* purecov: inspected */
2452-
return true; /* purecov: inspected */
2467+
case jpl_ellipsis:
2468+
// recursion
2469+
if (seek_no_dup_elimination(wrapper, path, hits, current_leg + 1, last_leg,
2470+
auto_wrap, only_need_one))
2471+
return true; /* purecov: inspected */
2472+
if (jtype == enum_json_type::J_ARRAY)
2473+
{
2474+
const size_t length= wrapper.length();
2475+
for (size_t idx= 0; idx < length; ++idx)
2476+
{
2477+
if (is_seek_done(hits, only_need_one))
2478+
return false;
2479+
2480+
// recursion
2481+
if (seek_no_dup_elimination(wrapper[idx], path, hits,
2482+
current_leg, last_leg,
2483+
auto_wrap, only_need_one))
2484+
return true; /* purecov: inspected */
2485+
}
2486+
}
2487+
else if (jtype == enum_json_type::J_OBJECT)
2488+
{
2489+
for (Json_wrapper_object_iterator iter= wrapper.object_iterator();
2490+
!iter.empty(); iter.next())
2491+
{
2492+
if (is_seek_done(hits, only_need_one))
2493+
return false;
2494+
2495+
// recursion
2496+
if (seek_no_dup_elimination(iter.elt().second, path, hits, current_leg,
2497+
last_leg, auto_wrap, only_need_one))
2498+
return true; /* purecov: inspected */
2499+
}
2500+
}
2501+
return false;
24532502
} // end outer switch on leg type
2503+
2504+
DBUG_ASSERT(false); /* purecov: deadcode */
2505+
return true; /* purecov: deadcode */
24542506
}
24552507

24562508

2457-
namespace
2458-
{
2509+
/**
2510+
Should Json_wrapper::seek() delegate to Json_dom::seek() for this
2511+
search?
2512+
2513+
@param wrapper the wrapper being searched
2514+
@param path the path to search for
2515+
@param auto_wrap true if array auto-wrapping is used
24592516
2460-
/// Does the path contain an ellipsis token?
2461-
bool contains_ellipsis(const Json_seekable_path &path)
2517+
@retval true if Json_dom::seek() should be used
2518+
@retval false if the search should use the Json_wrapper interface
2519+
*/
2520+
static bool seek_as_dom(const Json_wrapper *wrapper,
2521+
const Json_seekable_path &path,
2522+
bool auto_wrap)
24622523
{
2463-
const size_t size= path.leg_count();
2464-
for (size_t i= 0; i < size; i++)
2465-
if (path.get_leg_at(i)->get_type() == jpl_ellipsis)
2524+
// If the wrapper contains a DOM, search the DOM directly.
2525+
if (wrapper->is_dom())
2526+
return true;
2527+
2528+
/*
2529+
If the path requires duplicate elimination, Json_wrapper::seek()
2530+
should convert the value to a DOM and seek using Json_dom::seek(),
2531+
which handles duplicate elimination. Duplicate elimination is
2532+
required if the path contains multiple ellipses, or if it contains
2533+
an auto-wrapping array path leg after an ellipsis.
2534+
*/
2535+
const size_t legs= path.leg_count();
2536+
bool has_ellipsis= false;
2537+
for (size_t i= 0; i < legs; ++i)
2538+
{
2539+
const Json_path_leg *leg= path.get_leg_at(i);
2540+
if (has_ellipsis && (leg->get_type() == jpl_ellipsis ||
2541+
(auto_wrap && leg->is_autowrap())))
24662542
return true;
2543+
has_ellipsis|= (leg->get_type() == jpl_ellipsis);
2544+
}
2545+
24672546
return false;
24682547
}
24692548

2470-
} // namespace
2471-
24722549

24732550
bool Json_wrapper::seek(const Json_seekable_path &path,
24742551
Json_wrapper_vector *hits,
24752552
bool auto_wrap, bool only_need_one)
24762553
{
2477-
if (empty())
2478-
{
2479-
/* purecov: begin inspected */
2480-
DBUG_ASSERT(false);
2481-
return false;
2482-
/* purecov: end */
2483-
}
2484-
2485-
// use fast-track code if the path doesn't have any ellipses
2486-
if (!contains_ellipsis(path))
2487-
{
2488-
return seek_no_ellipsis(path, hits, 0, path.leg_count(),
2489-
auto_wrap, only_need_one);
2490-
}
2554+
DBUG_ASSERT(!empty());
24912555

24922556
/*
2493-
FIXME.
2557+
If the wrapper wraps a DOM, let's call Json_dom::seek() directly,
2558+
to avoid the overhead of going through the Json_wrapper interface.
24942559
2495-
Materialize the dom if the path contains ellipses. Duplicate
2496-
detection is difficult on binary values.
2497-
*/
2498-
to_dom(current_thd);
2499-
2500-
Json_dom_vector dhits(key_memory_JSON);
2501-
if (m_dom_value->seek(path, &dhits, auto_wrap, only_need_one))
2502-
return true; /* purecov: inspected */
2503-
for (const Json_dom *dom : dhits)
2560+
If ellipsis and auto-wrapping are used in a way that requires
2561+
duplicate elimination, convert to DOM since duplicate detection is
2562+
difficult on binary values.
2563+
*/
2564+
if (seek_as_dom(this, path, auto_wrap))
25042565
{
2505-
Json_wrapper clone(dom->clone());
2506-
if (clone.empty() || hits->push_back(std::move(clone)))
2566+
Json_dom *dom= to_dom(current_thd);
2567+
if (dom == nullptr)
2568+
return true; /* purecov: inspected */
2569+
2570+
Json_dom_vector dom_hits(key_memory_JSON);
2571+
if (dom->seek(path, &dom_hits, auto_wrap, only_need_one))
25072572
return true; /* purecov: inspected */
2573+
2574+
for (const Json_dom *hit : dom_hits)
2575+
{
2576+
if (hits->emplace_back(hit->clone()) || hits->back().empty())
2577+
return true; /* purecov: inspected */
2578+
}
2579+
2580+
return false;
25082581
}
25092582

2510-
return false;
2583+
return seek_no_dup_elimination(*this, path, hits, 0, path.leg_count(),
2584+
auto_wrap, only_need_one);
25112585
}
25122586

25132587

@@ -3901,7 +3975,8 @@ bool Json_wrapper::attempt_binary_update(const Field_json *field,
39013975

39023976
// Find the parent of the value we want to modify.
39033977
Json_wrapper_vector hits(key_memory_JSON);
3904-
if (seek_no_ellipsis(path, &hits, 0, path.leg_count() - 1, false, true))
3978+
if (seek_no_dup_elimination(*this, path, &hits, 0, path.leg_count() - 1,
3979+
false, true))
39053980
return true; /* purecov: inspected */
39063981

39073982
if (hits.empty())
@@ -4046,7 +4121,8 @@ bool Json_wrapper::binary_remove(const Field_json *field,
40464121
*found_path= false;
40474122

40484123
Json_wrapper_vector hits(key_memory_JSON);
4049-
if (seek_no_ellipsis(path, &hits, 0, path.leg_count() - 1, false, true))
4124+
if (seek_no_dup_elimination(*this, path, &hits, 0, path.leg_count() - 1,
4125+
false, true))
40504126
return true; /* purecov: inspected */
40514127

40524128
DBUG_ASSERT(hits.size() <= 1);

0 commit comments

Comments
 (0)