@@ -60,10 +60,11 @@ MY_UCA_INFO my_uca_v400=
60
60
0xFFFF , /* maxchar */
61
61
uca_length,
62
62
uca_weight,
63
- { /* Contractions: */
64
- 0 , /* nitems */
65
- NULL , /* item */
66
- NULL /* flags */
63
+ { /* Contractions: */
64
+ false , /* has_contractions */
65
+ {0 }, /* nitems */
66
+ {nullptr }, /* item */
67
+ nullptr /* flags */
67
68
},
68
69
69
70
/* Logical positions */
@@ -96,10 +97,11 @@ MY_UCA_INFO my_uca_v520=
96
97
0x10FFFF , /* maxchar */
97
98
uca520_length,
98
99
uca520_weight,
99
- { /* Contractions: */
100
- 0 , /* nitems */
101
- NULL , /* item */
102
- NULL /* flags */
100
+ { /* Contractions: */
101
+ false , /* has_contractions */
102
+ {0 }, /* nitems */
103
+ {nullptr }, /* item */
104
+ nullptr /* flags */
103
105
},
104
106
105
107
0x0009 , /* first_non_ignorable p != ignore */
@@ -849,15 +851,15 @@ static MY_CONTRACTION *
849
851
my_uca_add_contraction (MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
850
852
my_bool with_context)
851
853
{
852
- MY_CONTRACTION *next= &list->item [list->nitems ];
853
- size_t i;
854
854
/*
855
855
Contraction is always at least two code points.
856
856
Contraction is never longer than MY_UCA_MAX_CONTRACTION,
857
857
which is guaranteed by using my_coll_rule_expand() with proper limit.
858
858
*/
859
859
DBUG_ASSERT (len > 1 && len <= MY_UCA_MAX_CONTRACTION);
860
- for (i= 0 ; i < len; i++)
860
+
861
+ MY_CONTRACTION *next= &list->item [len][list->nitems [len]];
862
+ for (size_t i= 0 ; i < len; i++)
861
863
{
862
864
/*
863
865
We don't support contractions with U+0000.
@@ -866,10 +868,10 @@ my_uca_add_contraction(MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
866
868
DBUG_ASSERT (wc[i] != 0 );
867
869
next->ch [i]= wc[i];
868
870
}
869
- if (i < MY_UCA_MAX_CONTRACTION)
870
- next->ch [i ]= 0 ; /* Add end-of-line marker */
871
+ if (len < MY_UCA_MAX_CONTRACTION)
872
+ next->ch [len ]= 0 ; /* Add end-of-line marker */
871
873
next->with_context = with_context;
872
- list->nitems ++;
874
+ list->nitems [len] ++;
873
875
return next;
874
876
}
875
877
@@ -879,24 +881,35 @@ my_uca_add_contraction(MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
879
881
880
882
@param contractions Pointer to UCA data
881
883
@param loader Pointer to charset loader
882
- @param n Number of contractions
884
+ @param ncontractions Pointer to number of contractions
883
885
884
886
@return Error code
885
887
@retval 0 - memory allocated successfully
886
888
@retval 1 - not enough memory
887
889
*/
888
890
889
- static my_bool
891
+ static bool
890
892
my_uca_alloc_contractions (MY_CONTRACTIONS *contractions,
891
- MY_CHARSET_LOADER *loader, size_t n )
893
+ MY_CHARSET_LOADER *loader, size_t *ncontractions )
892
894
{
893
- size_t size= n * sizeof (MY_CONTRACTION);
894
- if (!(contractions->item = static_cast <MY_CONTRACTION*>((loader->once_alloc )(size))) ||
895
- !(contractions->flags = (char *) (loader->once_alloc )(MY_UCA_CNT_FLAG_SIZE)))
896
- return 1 ;
897
- memset (contractions->item , 0 , size);
895
+ for (size_t contraction_len= 2 ; contraction_len <= MY_UCA_MAX_CONTRACTION;
896
+ contraction_len++)
897
+ {
898
+ if (ncontractions[contraction_len])
899
+ {
900
+ size_t size= ncontractions[contraction_len] * sizeof (MY_CONTRACTION);
901
+ contractions->item [contraction_len]=
902
+ static_cast <MY_CONTRACTION*>((loader->once_alloc )(size));
903
+ if (!contractions->item [contraction_len])
904
+ return true ;
905
+ memset (contractions->item [contraction_len], 0 , size);
906
+ }
907
+ }
908
+ if (!(contractions->flags =
909
+ (char *)(loader->once_alloc )(MY_UCA_CNT_FLAG_SIZE)))
910
+ return true ;
898
911
memset (contractions->flags , 0 , MY_UCA_CNT_FLAG_SIZE);
899
- return 0 ;
912
+ return false ;
900
913
}
901
914
902
915
@@ -911,7 +924,7 @@ my_uca_alloc_contractions(MY_CONTRACTIONS *contractions,
911
924
const MY_CONTRACTIONS *
912
925
my_charset_get_contractions (const CHARSET_INFO *cs)
913
926
{
914
- return (cs->uca != NULL ) && (cs->uca ->contractions .nitems > 0 ) ?
927
+ return (cs->uca != NULL ) && (cs->uca ->contractions .has_contractions ) ?
915
928
&cs->uca ->contractions : NULL ;
916
929
}
917
930
@@ -929,7 +942,7 @@ my_charset_get_contractions(const CHARSET_INFO *cs)
929
942
static inline my_bool
930
943
my_uca_have_contractions (const MY_UCA_INFO *uca)
931
944
{
932
- return ( uca->contractions .nitems > 0 ) ;
945
+ return uca->contractions .has_contractions ;
933
946
}
934
947
935
948
@@ -1002,9 +1015,9 @@ uint16 *
1002
1015
my_uca_contraction2_weight (const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
1003
1016
{
1004
1017
MY_CONTRACTION *c, *last;
1005
- for (c= list->item , last= c + list->nitems ; c < last; c++)
1018
+ for (c= list->item [ 2 ] , last= c + list->nitems [ 2 ] ; c < last; c++)
1006
1019
{
1007
- if (c->ch [0 ] == wc1 && c->ch [1 ] == wc2 && c-> ch [ 2 ] == 0 )
1020
+ if (c->ch [0 ] == wc1 && c->ch [1 ] == wc2)
1008
1021
{
1009
1022
return c->weight ;
1010
1023
}
@@ -1085,7 +1098,7 @@ static inline const uint16 *
1085
1098
my_uca_contraction_weight (const MY_CONTRACTIONS *list, const my_wc_t *wc, size_t len)
1086
1099
{
1087
1100
MY_CONTRACTION *c, *last;
1088
- for (c= list->item , last= c + list->nitems ; c < last; c++)
1101
+ for (c= list->item [len] , last= c + list->nitems [len] ; c < last; c++)
1089
1102
{
1090
1103
if ((len == MY_UCA_MAX_CONTRACTION || c->ch [len] == 0 ) &&
1091
1104
!c->with_context &&
@@ -1148,10 +1161,7 @@ my_uca_scanner::contraction_find(my_wc_t wc0, size_t *chars_skipped)
1148
1161
{
1149
1162
size_t clen= 1 ;
1150
1163
int flag;
1151
- uchar *s, *beg= nullptr ;
1152
- const MY_CONTRACTION *contraction_begin= cs->uca ->contractions .item ;
1153
- const MY_CONTRACTION *contraction_end=
1154
- contraction_begin + cs->uca ->contractions .nitems ;
1164
+ uchar *s, *beg;
1155
1165
MY_CONTRACTION tofind;
1156
1166
memset (&tofind, 0 , sizeof (tofind));
1157
1167
tofind.ch [0 ]= wc0;
@@ -1192,21 +1202,24 @@ my_uca_scanner::contraction_find(my_wc_t wc0, size_t *chars_skipped)
1192
1202
to looking for new character sequence which adds one more character,
1193
1203
which is obviously greater than the current one.
1194
1204
*/
1205
+ const MY_CONTRACTION *contraction_begin=
1206
+ cs->uca ->contractions .item [clen + 1 ];
1207
+ const MY_CONTRACTION *contraction_end=
1208
+ contraction_begin + cs->uca ->contractions .nitems [clen + 1 ];
1195
1209
auto candidate= std::lower_bound (contraction_begin,
1196
1210
contraction_end,
1197
1211
tofind,
1198
1212
contraction_chars_cmp);
1199
- if (candidate == contraction_end)
1200
- break ;
1201
- if (!contraction_chars_cmp (tofind, *candidate))
1213
+ if (candidate != contraction_end &&
1214
+ !contraction_chars_cmp (tofind, *candidate))
1202
1215
{
1203
1216
/*
1204
1217
std::lower_bound() ensures *candidate is greater than or equal to
1205
1218
tofind. And contraction_chars_cmp() returns false which means
1206
1219
tofind is greater than or equal to *candidate. So tofind has to
1207
1220
equal to *candidate.
1208
1221
*/
1209
- contraction_begin= longest_contraction= candidate;
1222
+ longest_contraction= candidate;
1210
1223
beg= s;
1211
1224
*chars_skipped= clen;
1212
1225
}
@@ -1263,11 +1276,12 @@ my_uca_scanner::previous_context_find(my_wc_t wc0, my_wc_t wc1)
1263
1276
memset (&tofind, 0 , sizeof (tofind));
1264
1277
tofind.ch [0 ]= wc0;
1265
1278
tofind.ch [1 ]= wc1;
1266
- MY_CONTRACTION *contraction_end= contractions->item + contractions->nitems ;
1267
- MY_CONTRACTION *c= std::lower_bound (contractions->item ,
1279
+ MY_CONTRACTION *contraction_end=
1280
+ contractions->item [2 ] + contractions->nitems [2 ];
1281
+ MY_CONTRACTION *c= std::lower_bound (contractions->item [2 ],
1268
1282
contraction_end,
1269
1283
tofind, contraction_chars_cmp);
1270
- if (c == contraction_end || c->ch [0 ] != wc0 || c->ch [1 ] != wc1 || c-> ch [ 2 ] )
1284
+ if (c == contraction_end || c->ch [0 ] != wc0 || c->ch [1 ] != wc1)
1271
1285
return NULL ;
1272
1286
if (c->with_context )
1273
1287
{
@@ -4358,11 +4372,12 @@ apply_one_rule(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4358
4372
r->with_context )->weight ;
4359
4373
to_stride= 1 ;
4360
4374
to_num_ce= &to[MY_UCA_MAX_WEIGHT_SIZE - 1 ];
4375
+ /* Temporarily hide - it's incomplete */
4376
+ dst->contractions .nitems [nshift]--;
4361
4377
/* Store weights of the "reset to" character */
4362
- dst->contractions .nitems --; /* Temporarily hide - it's incomplete */
4363
4378
nweights= my_char_weight_put (dst, to, to_stride, MY_UCA_MAX_WEIGHT_SIZE - 1 ,
4364
4379
to_num_ce, r, nreset, rules->uca ->version );
4365
- dst->contractions .nitems ++; /* Activate, now it's complete */
4380
+ dst->contractions .nitems [nshift] ++; /* Activate, now it's complete */
4366
4381
}
4367
4382
else
4368
4383
{
@@ -4450,8 +4465,8 @@ init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4450
4465
bool lengths_are_temporary)
4451
4466
{
4452
4467
MY_COLL_RULE *r, *rlast;
4453
- int ncontractions= 0 ;
4454
4468
size_t i, npages= (src->maxchar + 1 ) / 256 ;
4469
+ size_t ncontractions[MY_UCA_MAX_CONTRACTION + 1 ]{0 };
4455
4470
4456
4471
dst->maxchar = src->maxchar ;
4457
4472
@@ -4516,7 +4531,10 @@ init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4516
4531
dst->weights [pagec]= NULL ; /* Mark that we'll overwrite this page */
4517
4532
}
4518
4533
else
4519
- ncontractions++;
4534
+ {
4535
+ ncontractions[my_wstrnlen (r->curr , MY_UCA_MAX_CONTRACTION)]++;
4536
+ dst->contractions .has_contractions = true ;
4537
+ }
4520
4538
}
4521
4539
4522
4540
/* Allocate pages that we'll overwrite and copy default weights */
@@ -4532,7 +4550,7 @@ init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4532
4550
return rc;
4533
4551
}
4534
4552
4535
- if (ncontractions )
4553
+ if (dst-> contractions . has_contractions )
4536
4554
{
4537
4555
if (my_uca_alloc_contractions (&dst->contractions , loader, ncontractions))
4538
4556
return TRUE ;
@@ -4552,10 +4570,16 @@ init_weight_level(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader,
4552
4570
return TRUE ;
4553
4571
}
4554
4572
// Sort contractions by the code points.
4555
- if (ncontractions)
4556
- std::sort (dst->contractions .item ,
4557
- dst->contractions .item + dst->contractions .nitems ,
4558
- contraction_chars_cmp);
4573
+ if (dst->contractions .has_contractions )
4574
+ {
4575
+ for (size_t i= 2 ; i <= MY_UCA_MAX_CONTRACTION; i++)
4576
+ {
4577
+ if (dst->contractions .nitems [i])
4578
+ std::sort (dst->contractions .item [i],
4579
+ dst->contractions .item [i] + dst->contractions .nitems [i],
4580
+ contraction_chars_cmp);
4581
+ }
4582
+ }
4559
4583
return FALSE ;
4560
4584
}
4561
4585
0 commit comments