@@ -368,7 +368,6 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
368
368
IO_CACHE chunk_file; // For saving Merge_chunk structs.
369
369
IO_CACHE *outfile; // Contains the final, sorted result.
370
370
Sort_param param;
371
- bool multi_byte_charset;
372
371
Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare_queue_key>
373
372
pq ((Malloc_allocator<uchar*>
374
373
(key_memory_Filesort_info_record_pointers)));
@@ -414,8 +413,7 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
414
413
415
414
param.init_for_filesort (filesort,
416
415
make_array (filesort->sortorder , s_length),
417
- sortlength (thd, filesort->sortorder , s_length,
418
- &multi_byte_charset),
416
+ sortlength (thd, filesort->sortorder , s_length),
419
417
table,
420
418
thd->variables .max_length_for_sort_data ,
421
419
max_rows, sort_positions);
@@ -430,8 +428,7 @@ bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
430
428
// If number of rows is not known, use as much of sort buffer as possible.
431
429
num_rows_estimate= table->file ->estimate_rows_upper_bound ();
432
430
433
- if (multi_byte_charset &&
434
- !(param.tmp_buffer = (char *)
431
+ if (!(param.tmp_buffer = (char *)
435
432
my_malloc (key_memory_Sort_param_tmp_buffer,
436
433
param.max_compare_length (), MYF (MY_WME))))
437
434
goto err;
@@ -1508,7 +1505,6 @@ uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
1508
1505
}
1509
1506
1510
1507
const CHARSET_INFO *cs=item->collation .collation ;
1511
- char fill_char= ((cs->state & MY_CS_BINSORT) ? (char ) 0 : ' ' );
1512
1508
1513
1509
/* All item->str() to use some extra byte for end null.. */
1514
1510
String tmp ((char *) to,sort_field->length +4 ,cs);
@@ -1539,46 +1535,29 @@ uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
1539
1535
break ;
1540
1536
}
1541
1537
uint length= static_cast <uint>(res->length ());
1542
- if (sort_field->need_strnxfrm )
1538
+ const char *from= res->ptr ();
1539
+ if (pointer_cast<const uchar *>(from) == to)
1543
1540
{
1544
- char *from=(char *) res->ptr ();
1545
- size_t tmp_length MY_ATTRIBUTE ((unused));
1546
- if ((uchar*) from == to)
1547
- {
1548
- DBUG_ASSERT (sort_field->length >= length);
1549
- set_if_smaller (length,sort_field->length );
1550
- memcpy (tmp_buffer, from, length);
1551
- from= tmp_buffer;
1552
- }
1553
- tmp_length=
1554
- cs->coll ->strnxfrm (cs, to, sort_field->length ,
1555
- item->max_char_length (),
1556
- (uchar*) from, length,
1557
- MY_STRXFRM_PAD_TO_MAXLEN);
1558
- DBUG_ASSERT (tmp_length == sort_field->length );
1541
+ DBUG_ASSERT (sort_field->length >= length);
1542
+ set_if_smaller (length,sort_field->length );
1543
+ memcpy (tmp_buffer, from, length);
1544
+ from= tmp_buffer;
1559
1545
}
1560
- else
1546
+ uint sort_field_length= sort_field->length ;
1547
+ if (sort_field->suffix_length )
1561
1548
{
1562
- size_t diff;
1563
- uint sort_field_length= sort_field->length -
1564
- sort_field->suffix_length ;
1565
- if (sort_field_length < length)
1566
- {
1567
- diff= 0 ;
1568
- length= sort_field_length;
1569
- }
1570
- else
1571
- diff= sort_field_length - length;
1572
- if (sort_field->suffix_length )
1573
- {
1574
- /* Store length last in result_string */
1575
- store_length (to + sort_field_length, length,
1576
- sort_field->suffix_length );
1577
- }
1578
-
1579
- my_strnxfrm (cs, to,length,(const uchar*)res->ptr (),length);
1580
- cs->cset ->fill (cs, (char *)to+length,diff,fill_char);
1549
+ /* Store length last in result_string */
1550
+ sort_field_length-= sort_field->suffix_length ;
1551
+ store_length (to + sort_field_length, length, sort_field->suffix_length );
1581
1552
}
1553
+
1554
+ size_t tmp_length MY_ATTRIBUTE ((unused));
1555
+ tmp_length=
1556
+ cs->coll ->strnxfrm (cs, to, sort_field_length,
1557
+ item->max_char_length (),
1558
+ pointer_cast<const uchar*>(from), length,
1559
+ MY_STRXFRM_PAD_TO_MAXLEN);
1560
+ DBUG_ASSERT (tmp_length == sort_field_length);
1582
1561
break ;
1583
1562
}
1584
1563
case INT_RESULT:
@@ -2394,32 +2373,25 @@ static uint suffix_length(ulong string_length)
2394
2373
@param thd Thread handler
2395
2374
@param sortorder Order of items to sort
2396
2375
@param s_length Number of items to sort
2397
- @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
2398
- (In which case we have to use strnxfrm())
2399
2376
2400
2377
@note
2401
2378
sortorder->length is updated for each sort item.
2402
- @n
2403
- sortorder->need_strnxfrm is set 1 if we have to use strnxfrm
2404
2379
2405
2380
@return
2406
2381
Total length of sort buffer in bytes
2407
2382
*/
2408
2383
2409
2384
uint
2410
- sortlength (THD *thd, st_sort_field *sortorder, uint s_length,
2411
- bool *multi_byte_charset)
2385
+ sortlength (THD *thd, st_sort_field *sortorder, uint s_length)
2412
2386
{
2413
2387
uint total_length= 0 ;
2414
- *multi_byte_charset= false ;
2415
2388
2416
2389
// Heed the contract that strnxfrm() needs an even number of bytes.
2417
2390
const uint max_sort_length_even=
2418
2391
(thd->variables .max_sort_length + 1 ) & ~1 ;
2419
2392
2420
2393
for (; s_length-- ; sortorder++)
2421
2394
{
2422
- DBUG_ASSERT (!sortorder->need_strnxfrm );
2423
2395
DBUG_ASSERT (sortorder->suffix_length == 0 );
2424
2396
if (sortorder->field )
2425
2397
{
@@ -2428,16 +2400,12 @@ sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
2428
2400
sortorder->length = field->sort_length ();
2429
2401
sortorder->is_varlen = field->sort_key_is_varlen ();
2430
2402
2431
- if (use_strnxfrm (cs))
2432
- {
2433
- // How many bytes do we need (including sort weights) for strnxfrm()?
2434
- sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2435
- sortorder->need_strnxfrm = true ;
2436
- *multi_byte_charset= 1 ;
2437
- }
2403
+ // How many bytes do we need (including sort weights) for strnxfrm()?
2404
+ sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2405
+
2438
2406
/*
2439
2407
NOTE: The corresponding test below also has a check for
2440
- cs == &my_charset_bin to sort truncated blobs deterministically;
2408
+ NO PAD collations to sort truncated blobs deterministically;
2441
2409
however, that part is dealt by in Field_blob/Field_varstring,
2442
2410
so we don't need it here.
2443
2411
*/
@@ -2468,16 +2436,20 @@ sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
2468
2436
const CHARSET_INFO *cs= item->collation .collation ;
2469
2437
sortorder->length = item->max_length ;
2470
2438
set_if_smaller (sortorder->length , max_sort_length_even);
2471
- if (use_strnxfrm (cs))
2472
- {
2473
- // How many bytes do we need (including sort weights) for strnxfrm()?
2474
- sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2475
- sortorder->need_strnxfrm = true ;
2476
- *multi_byte_charset= 1 ;
2477
- }
2478
- else if (cs->pad_attribute == NO_PAD)
2439
+
2440
+ // How many bytes do we need (including sort weights) for strnxfrm()?
2441
+ sortorder->length = cs->coll ->strnxfrmlen (cs, sortorder->length );
2442
+
2443
+ if (cs->pad_attribute == NO_PAD)
2479
2444
{
2480
- /* Store length last to be able to sort blob/varbinary */
2445
+ /*
2446
+ Store length last, which makes it into a tie-breaker. This is
2447
+ so that e.g. 'a' < 'a\0' for the binary collation, even though
2448
+ the field is fixed-width and pads with '\0'. The utf8mb4_0900_*
2449
+ collations technically don't need this, since they pad with 0
2450
+ (which does not match any real weight), but we'd like not to
2451
+ rely on such implementation details in filesort.
2452
+ */
2481
2453
sortorder->suffix_length = suffix_length (sortorder->length );
2482
2454
sortorder->length += sortorder->suffix_length ;
2483
2455
}
0 commit comments