Skip to content

Commit e8e8b35

Browse files
committed
stdlib: use SipHash-1-3 for string hashing on non-ObjC platforms
Part of rdar://problem/24109692
1 parent daa7bfc commit e8e8b35

File tree

7 files changed

+115
-80
lines changed

7 files changed

+115
-80
lines changed

stdlib/public/SwiftShims/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ set(sources
99
RefCount.h
1010
RuntimeShims.h
1111
RuntimeStubs.h
12+
SwiftStdbool.h
1213
SwiftStddef.h
1314
SwiftStdint.h
1415
UnicodeShims.h
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_
14+
#define SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_
15+
16+
#ifdef __cplusplus
17+
typedef bool __swift_bool;
18+
#else
19+
typedef _Bool __swift_bool;
20+
#endif
21+
22+
#endif
23+

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#define SWIFT_STDLIB_SHIMS_UNICODESHIMS_H_
1919

2020
#include "SwiftStdint.h"
21+
#include "SwiftStdbool.h"
2122
#include "Visibility.h"
2223

2324
#if __has_feature(nullability)
@@ -83,13 +84,20 @@ _swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *Left,
8384
__swift_int32_t RightLength);
8485

8586
SWIFT_RUNTIME_STDLIB_INTERFACE
86-
__attribute__((__pure__)) __swift_intptr_t
87-
_swift_stdlib_unicode_hash(const __swift_uint16_t *Str, __swift_int32_t Length);
87+
void *_swift_stdlib_unicodeCollationIterator_create(
88+
const __swift_uint16_t *Str,
89+
__swift_uint32_t Length);
8890

8991
SWIFT_RUNTIME_STDLIB_INTERFACE
90-
__attribute__((__pure__)) __swift_intptr_t
91-
_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
92-
__swift_int32_t Length);
92+
__swift_int32_t _swift_stdlib_unicodeCollationIterator_next(
93+
void *CollationIterator, __swift_bool *HitEnd);
94+
95+
SWIFT_RUNTIME_STDLIB_INTERFACE
96+
void _swift_stdlib_unicodeCollationIterator_delete(
97+
void *CollationIterator);
98+
99+
SWIFT_RUNTIME_STDLIB_INTERFACE
100+
const __swift_int32_t *_swift_stdlib_unicode_getASCIICollationTable();
93101

94102
SWIFT_RUNTIME_STDLIB_INTERFACE
95103
__swift_int32_t _swift_stdlib_unicode_strToUpper(

stdlib/public/SwiftShims/module.modulemap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ module SwiftShims {
99
header "RefCount.h"
1010
header "RuntimeShims.h"
1111
header "RuntimeStubs.h"
12+
header "SwiftStdbool.h"
1213
header "SwiftStddef.h"
1314
header "SwiftStdint.h"
1415
header "UnicodeShims.h"

stdlib/public/core/StringHashable.swift

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,50 @@ func _stdlib_NSStringHashValue(_ str: AnyObject, _ isASCII: Bool) -> Int
2020
func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) -> Int
2121
#endif
2222

23+
extension _Unicode {
24+
internal static func hashASCII(
25+
_ string: UnsafeBufferPointer<UInt8>
26+
) -> Int {
27+
let collationTable = _swift_stdlib_unicode_getASCIICollationTable()
28+
var hasher = _SipHash13Context(key: _Hashing.secretKey)
29+
for c in string {
30+
_precondition(c <= 127)
31+
let element = collationTable[Int(c)]
32+
// Ignore zero valued collation elements. They don't participate in the
33+
// ordering relation.
34+
if element != 0 {
35+
hasher.append(element)
36+
}
37+
}
38+
return hasher._finalizeAndReturnIntHash()
39+
}
40+
41+
internal static func hashUTF16(
42+
_ string: UnsafeBufferPointer<UInt16>
43+
) -> Int {
44+
let collationIterator = _swift_stdlib_unicodeCollationIterator_create(
45+
string.baseAddress!,
46+
UInt32(string.count))
47+
defer { _swift_stdlib_unicodeCollationIterator_delete(collationIterator) }
48+
49+
var hasher = _SipHash13Context(key: _Hashing.secretKey)
50+
while true {
51+
var hitEnd = false
52+
let element =
53+
_swift_stdlib_unicodeCollationIterator_next(collationIterator, &hitEnd)
54+
if hitEnd {
55+
break
56+
}
57+
// Ignore zero valued collation elements. They don't participate in the
58+
// ordering relation.
59+
if element != 0 {
60+
hasher.append(element)
61+
}
62+
}
63+
return hasher._finalizeAndReturnIntHash()
64+
}
65+
}
66+
2367
extension String : Hashable {
2468
/// The string's hash value.
2569
///
@@ -49,10 +93,12 @@ extension String : Hashable {
4993
}
5094
#else
5195
if let asciiBuffer = self._core.asciiBuffer {
52-
return _swift_stdlib_unicode_hash_ascii(
53-
asciiBuffer.baseAddress!, Int32(asciiBuffer.count))
96+
return _Unicode.hashASCII(UnsafeBufferPointer(
97+
start: asciiBuffer.baseAddress!,
98+
count: asciiBuffer.count))
5499
} else {
55-
return _swift_stdlib_unicode_hash(_core.startUTF16, Int32(_core.count))
100+
return _Unicode.hashUTF16(
101+
UnsafeBufferPointer(start: _core.startUTF16, count: _core.count))
56102
}
57103
#endif
58104
}

stdlib/public/core/Unicode.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,3 +1176,7 @@ extension UTF16 {
11761176
Builtin.unreachable()
11771177
}
11781178
}
1179+
1180+
/// A namespace for Unicode utilities.
1181+
internal enum _Unicode {}
1182+

stdlib/public/stubs/UnicodeNormalization.cpp

Lines changed: 24 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ static const UCollator *GetRootCollator() {
5757
/// This class caches the collation element results for the ASCII subset of
5858
/// unicode.
5959
class ASCIICollation {
60-
int32_t CollationTable[128];
6160
public:
6261
friend class swift::Lazy<ASCIICollation>;
6362

@@ -66,6 +65,8 @@ class ASCIICollation {
6665
return &theTable.get();
6766
}
6867

68+
int32_t CollationTable[128];
69+
6970
/// Maps an ASCII character to a collation element priority as would be
7071
/// returned by a call to ucol_next().
7172
int32_t map(unsigned char c) const {
@@ -189,91 +190,42 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
189190
return Diff;
190191
}
191192

192-
// These functions use murmurhash2 in its 32 and 64bit forms, which are
193-
// differentiated by the constants defined below. This seems like a good choice
194-
// for now because it operates efficiently in blocks rather than bytes, and
195-
// the data returned from the collation iterator comes in 4byte chunks.
196-
#if __arm__ || __i386__
197-
#define HASH_SEED 0x88ddcc21
198-
#define HASH_M 0x5bd1e995
199-
#define HASH_R 24
200-
#else
201-
#define HASH_SEED 0x429b126688ddcc21
202-
#define HASH_M 0xc6a4a7935bd1e995
203-
#define HASH_R 47
204-
#endif
205-
206-
static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState,
207-
const uint16_t *Str, uint32_t Length,
208-
UErrorCode *ErrorCode) {
193+
void *swift::_swift_stdlib_unicodeCollationIterator_create(
194+
const __swift_uint16_t *Str, __swift_uint32_t Length) {
195+
UErrorCode ErrorCode = U_ZERO_ERROR;
209196
#if defined(__CYGWIN__) || defined(_MSC_VER)
210197
UCollationElements *CollationIterator = ucol_openElements(
211-
Collator, reinterpret_cast<const UChar *>(Str), Length, ErrorCode);
198+
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
199+
&ErrorCode);
212200
#else
213201
UCollationElements *CollationIterator = ucol_openElements(
214-
Collator, Str, Length, ErrorCode);
202+
GetRootCollator(), Str, Length, &ErrorCode);
215203
#endif
216-
while (U_SUCCESS(*ErrorCode)) {
217-
intptr_t Elem = ucol_next(CollationIterator, ErrorCode);
218-
// Ignore zero valued collation elements. They don't participate in the
219-
// ordering relation.
220-
if (Elem == 0)
221-
continue;
222-
if (Elem != UCOL_NULLORDER) {
223-
Elem *= HASH_M;
224-
Elem ^= Elem >> HASH_R;
225-
Elem *= HASH_M;
226-
227-
HashState *= HASH_M;
228-
HashState ^= Elem;
229-
} else {
230-
break;
231-
}
204+
if (U_FAILURE(ErrorCode)) {
205+
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
232206
}
233-
ucol_closeElements(CollationIterator);
234-
return HashState;
235-
}
236-
237-
static intptr_t hashFinish(intptr_t HashState) {
238-
HashState ^= HashState >> HASH_R;
239-
HashState *= HASH_M;
240-
HashState ^= HashState >> HASH_R;
241-
return HashState;
207+
return CollationIterator;
242208
}
243209

244-
intptr_t
245-
swift::_swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) {
210+
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
211+
void *CollationIterator, bool *HitEnd) {
246212
UErrorCode ErrorCode = U_ZERO_ERROR;
247-
intptr_t HashState = HASH_SEED;
248-
HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode);
249-
213+
auto Result = ucol_next(
214+
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
250215
if (U_FAILURE(ErrorCode)) {
251-
swift::crash("hashChunk: Unexpected error hashing unicode string.");
216+
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
252217
}
253-
return hashFinish(HashState);
218+
*HitEnd = (Result == UCOL_NULLORDER);
219+
return Result;
254220
}
255221

256-
intptr_t swift::_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
257-
int32_t Length) {
258-
const ASCIICollation *Table = ASCIICollation::getTable();
259-
intptr_t HashState = HASH_SEED;
260-
int32_t Pos = 0;
261-
while (Pos < Length) {
262-
const unsigned char c = Str[Pos++];
263-
assert((c & 0x80) == 0 && "This table only exists for the ASCII subset");
264-
intptr_t Elem = Table->map(c);
265-
// Ignore zero valued collation elements. They don't participate in the
266-
// ordering relation.
267-
if (Elem == 0)
268-
continue;
269-
Elem *= HASH_M;
270-
Elem ^= Elem >> HASH_R;
271-
Elem *= HASH_M;
222+
void swift::_swift_stdlib_unicodeCollationIterator_delete(
223+
void *CollationIterator) {
224+
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
225+
}
272226

273-
HashState *= HASH_M;
274-
HashState ^= Elem;
275-
}
276-
return hashFinish(HashState);
227+
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
228+
return ASCIICollation::getTable()->CollationTable;
277229
}
278230

279231
/// Convert the unicode string to uppercase. This function will return the

0 commit comments

Comments
 (0)