Skip to content

Commit 3ec9325

Browse files
committed
stdlib: Don't hash in the length of the string
Some characters are not considered when comparing strings. Hashing in the length would cause a difference of hash values in equal strings. Swift SVN r31468
1 parent 41119cb commit 3ec9325

File tree

2 files changed

+14
-14
lines changed

2 files changed

+14
-14
lines changed

stdlib/public/runtime/UnicodeNormalization.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState,
138138
Collator, Str, Length, ErrorCode);
139139
while (U_SUCCESS(*ErrorCode)) {
140140
intptr_t Elem = ucol_next(CollationIterator, ErrorCode);
141+
// Ignore zero valued collation elements. They don't participate in the
142+
// ordering relation.
143+
if (Elem == 0)
144+
continue;
141145
if (Elem != UCOL_NULLORDER) {
142146
Elem *= HASH_M;
143147
Elem ^= Elem >> HASH_R;
@@ -162,7 +166,7 @@ static intptr_t hashFinish(intptr_t HashState) {
162166
extern "C"
163167
intptr_t _swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) {
164168
UErrorCode ErrorCode = U_ZERO_ERROR;
165-
intptr_t HashState = HASH_SEED ^ (Length * HASH_M);
169+
intptr_t HashState = HASH_SEED;
166170
HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode);
167171

168172
if (U_FAILURE(ErrorCode)) {
@@ -176,7 +180,7 @@ extern "C"
176180
intptr_t _swift_stdlib_unicode_hash_ascii(const char *Str, int32_t Length) {
177181
UErrorCode ErrorCode = U_ZERO_ERROR;
178182
const UCollator *Collator = GetRootCollator();
179-
intptr_t HashState = HASH_SEED ^ (Length * HASH_M);
183+
intptr_t HashState = HASH_SEED;
180184
uint16_t HashBuffer[ASCII_HASH_BUFFER_SIZE];
181185

182186
int32_t Pos = 0;

validation-test/stdlib/HashingICU.swift

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,14 @@
77
// use StdlibUnittest because that doesn't work on linux yet. May go away in
88
// favour of the more comprehensive tests that already exist once it does.
99

10-
// ASCII strings
11-
// CHECK: 5308980208032766932
12-
print("boom".hashValue)
13-
// CHECK-NEXT: 6894346571320922064
14-
print("zoom".hashValue)
15-
16-
// Unicode strings
17-
// CHECK-NEXT: 3514641426931780352
18-
print("ZOO≪M".hashValue)
19-
// CHECK-NEXT: 7349636929305805742
20-
print("moo≪m".hashValue)
21-
2210
// Let's not crash on changing case.
2311
let upper = "\u{00df}".uppercaseString
2412
let lower = "\u{0130}".lowercaseString
13+
14+
// ASCII strings
15+
// CHECK: true
16+
print("abc".hashValue == "\0abc".hashValue)
17+
18+
// Unicode strings
19+
// CHECK-NEXT: true
20+
print("abc\u{0130}".hashValue == "\0abc\u{0130}".hashValue)

0 commit comments

Comments
 (0)