Skip to content

Commit 567cee5

Browse files
author
Steinar H. Gunderson
committed
Bug #25481041: WL#9106: SIGNAL 11 IN STRINGS/CTYPE-UCA.CC
Make sure hashing and strnxfrm for UCA collations accept nullptr without crashing; we would form a pointer that wrapped around. In particular, this would happen when trying to hash an empty blob (MyISAM returns a null pointer in this case). Performance on microbenchmarks is neutral or ever so slightly better; generally within measurement noise (~1%). Change-Id: I4e352e425f057b89599e32c477baea004b65aeea
1 parent 961d33a commit 567cee5

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

strings/ctype-uca.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1633,6 +1633,13 @@ ALWAYS_INLINE void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::for_each_weight(
16331633
const uint16 *ascii_wpage= UCA900_WEIGHT_ADDR(
16341634
cs->uca->weights[0], /*level=*/weight_lv, /*subcode=*/0);
16351635

1636+
/*
1637+
Precalculate the limit for the fast path below, taking care not to form
1638+
pointers that are before sbeg, as those cannot be legally compared.
1639+
(In particular, this catches the case of sbeg == send == nullptr.)
1640+
*/
1641+
const uchar *send_local= (send - sbeg > 3) ? (send - 3) : sbeg;
1642+
16361643
for ( ;; )
16371644
{
16381645
/*
@@ -1652,7 +1659,6 @@ ALWAYS_INLINE void uca_scanner_900<Mb_wc, LEVELS_FOR_COMPARE>::for_each_weight(
16521659
we'd otherwise have to do.
16531660
*/
16541661
const uchar *sbeg_local= sbeg;
1655-
const uchar *send_local= send - (sizeof(uint32) - 1);
16561662
while (sbeg_local < send_local && preaccept_data(sizeof(uint32)))
16571663
{
16581664
/*

unittest/gunit/strings_strnxfrm-t.cc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,20 @@ TEST(StrXfrmTest, UTF8MB4PadCorrectness_2)
315315
}
316316
}
317317

318+
TEST(StrXfrmTest, NullPointer)
319+
{
320+
CHARSET_INFO *cs= init_collation("utf8mb4_0900_ai_ci");
321+
unsigned char buf[256];
322+
323+
memset(buf, 0x33, sizeof(buf));
324+
cs->coll->strnxfrm(
325+
cs, buf, sizeof(buf), sizeof(buf), nullptr, 0, MY_STRXFRM_PAD_TO_MAXLEN);
326+
327+
for (size_t i= 0; i < sizeof(buf); ++i) {
328+
EXPECT_EQ(0, buf[i]);
329+
}
330+
}
331+
318332
// Benchmark based on reduced test case in Bug #83247 / #24788778.
319333
//
320334
// Note: This benchmark does not exercise any real multibyte characters;
@@ -1234,4 +1248,21 @@ TEST(PadCollationTest, HashSort)
12341248
EXPECT_NE(hash(as_cs, "ab c"), hash(as_cs, "abc"));
12351249
}
12361250

1251+
TEST(HashTest, NullPointer)
1252+
{
1253+
CHARSET_INFO *cs= init_collation("utf8mb4_0900_ai_ci");
1254+
ulong nr1= 1, nr2= 4;
1255+
1256+
/*
1257+
We should get the same hash from the empty string no matter what
1258+
the pointer is.
1259+
*/
1260+
cs->coll->hash_sort(cs, nullptr, 0, &nr1, &nr2);
1261+
EXPECT_EQ(nr1, hash(cs, ""));
1262+
1263+
cs->coll->hash_sort(
1264+
cs, pointer_cast<const uchar *>(" "), 8, &nr1, &nr2);
1265+
// Don't care what the values are, just that we don't crash.
1266+
}
1267+
12371268
} // namespace strnxfrm_unittest

0 commit comments

Comments
 (0)