Skip to content

Commit 89fdd77

Browse files
gribozavrMax Moiseev
authored andcommitted
stdlib: use SipHash-1-3 for string hashing on non-ObjC platforms
Part of rdar://problem/24109692
1 parent 436c90e commit 89fdd77

File tree

7 files changed

+116
-81
lines changed

7 files changed

+116
-81
lines changed

stdlib/public/SwiftShims/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(sources
88
RefCount.h
99
RuntimeShims.h
1010
RuntimeStubs.h
11+
SwiftStdbool.h
1112
SwiftStddef.h
1213
SwiftStdint.h
1314
UnicodeShims.h
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See http://swift.org/LICENSE.txt for license information
9+
// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_
14+
#define SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_
15+
16+
#ifdef __cplusplus
17+
typedef bool __swift_bool;
18+
#else
19+
typedef _Bool __swift_bool;
20+
#endif
21+
22+
#endif
23+

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#define SWIFT_STDLIB_SHIMS_UNICODESHIMS_H_
1919

2020
#include "SwiftStdint.h"
21+
#include "SwiftStdbool.h"
2122
#include "Visibility.h"
2223

2324
#ifdef __cplusplus
@@ -79,13 +80,20 @@ _swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *Left,
7980
__swift_int32_t RightLength);
8081

8182
SWIFT_RUNTIME_STDLIB_INTERFACE
82-
__attribute__((__pure__)) __swift_intptr_t
83-
_swift_stdlib_unicode_hash(const __swift_uint16_t *Str, __swift_int32_t Length);
83+
void *_swift_stdlib_unicodeCollationIterator_create(
84+
const __swift_uint16_t *Str,
85+
__swift_uint32_t Length);
8486

8587
SWIFT_RUNTIME_STDLIB_INTERFACE
86-
__attribute__((__pure__)) __swift_intptr_t
87-
_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
88-
__swift_int32_t Length);
88+
__swift_int32_t _swift_stdlib_unicodeCollationIterator_next(
89+
void *CollationIterator, __swift_bool *HitEnd);
90+
91+
SWIFT_RUNTIME_STDLIB_INTERFACE
92+
void _swift_stdlib_unicodeCollationIterator_delete(
93+
void *CollationIterator);
94+
95+
SWIFT_RUNTIME_STDLIB_INTERFACE
96+
const __swift_int32_t *_swift_stdlib_unicode_getASCIICollationTable();
8997

9098
SWIFT_RUNTIME_STDLIB_INTERFACE
9199
__swift_int32_t _swift_stdlib_unicode_strToUpper(

stdlib/public/SwiftShims/module.modulemap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ module SwiftShims {
88
header "RefCount.h"
99
header "RuntimeShims.h"
1010
header "RuntimeStubs.h"
11+
header "SwiftStdbool.h"
1112
header "SwiftStddef.h"
1213
header "SwiftStdint.h"
1314
header "UnicodeShims.h"

stdlib/public/core/StringHashable.swift

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,50 @@ func _stdlib_NSStringHashValue(_ str: AnyObject, _ isASCII: Bool) -> Int
2020
func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) -> Int
2121
#endif
2222

23+
extension _Unicode {
24+
internal static func hashASCII(
25+
_ string: UnsafeBufferPointer<UInt8>
26+
) -> Int {
27+
let collationTable = _swift_stdlib_unicode_getASCIICollationTable()
28+
var hasher = _SipHash13Context(key: _Hashing.secretKey)
29+
for c in string {
30+
_precondition(c <= 127)
31+
let element = collationTable[Int(c)]
32+
// Ignore zero valued collation elements. They don't participate in the
33+
// ordering relation.
34+
if element != 0 {
35+
hasher.append(element)
36+
}
37+
}
38+
return hasher._finalizeAndReturnIntHash()
39+
}
40+
41+
internal static func hashUTF16(
42+
_ string: UnsafeBufferPointer<UInt16>
43+
) -> Int {
44+
let collationIterator = _swift_stdlib_unicodeCollationIterator_create(
45+
string.baseAddress!,
46+
UInt32(string.count))
47+
defer { _swift_stdlib_unicodeCollationIterator_delete(collationIterator) }
48+
49+
var hasher = _SipHash13Context(key: _Hashing.secretKey)
50+
while true {
51+
var hitEnd = false
52+
let element =
53+
_swift_stdlib_unicodeCollationIterator_next(collationIterator, &hitEnd)
54+
if hitEnd {
55+
break
56+
}
57+
// Ignore zero valued collation elements. They don't participate in the
58+
// ordering relation.
59+
if element != 0 {
60+
hasher.append(element)
61+
}
62+
}
63+
return hasher._finalizeAndReturnIntHash()
64+
}
65+
}
66+
2367
extension String : Hashable {
2468
/// The string's hash value.
2569
///
@@ -48,11 +92,13 @@ extension String : Hashable {
4892
return hashOffset ^ _stdlib_NSStringHashValue(cocoaString, isASCII)
4993
}
5094
#else
51-
if self._core.isASCII {
52-
return _swift_stdlib_unicode_hash_ascii(
53-
_core.startASCII, Int32(_core.count))
95+
if let asciiBuffer = self._core.asciiBuffer {
96+
return _Unicode.hashASCII(UnsafeBufferPointer(
97+
start: asciiBuffer.baseAddress!,
98+
count: asciiBuffer.count))
5499
} else {
55-
return _swift_stdlib_unicode_hash(_core.startUTF16, Int32(_core.count))
100+
return _Unicode.hashUTF16(
101+
UnsafeBufferPointer(start: _core.startUTF16, count: _core.count))
56102
}
57103
#endif
58104
}

stdlib/public/core/Unicode.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,3 +1175,7 @@ extension UTF16 {
11751175
Builtin.unreachable()
11761176
}
11771177
}
1178+
1179+
/// A namespace for Unicode utilities.
1180+
internal enum _Unicode {}
1181+

stdlib/public/stubs/UnicodeNormalization.cpp

Lines changed: 24 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ static const UCollator *GetRootCollator() {
7070
/// This class caches the collation element results for the ASCII subset of
7171
/// unicode.
7272
class ASCIICollation {
73-
int32_t CollationTable[128];
7473
public:
7574
friend class swift::Lazy<ASCIICollation>;
7675

@@ -79,6 +78,8 @@ class ASCIICollation {
7978
return &theTable.get();
8079
}
8180

81+
int32_t CollationTable[128];
82+
8283
/// Maps an ASCII character to a collation element priority as would be
8384
/// returned by a call to ucol_next().
8485
int32_t map(unsigned char c) const {
@@ -202,91 +203,42 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString,
202203
return Diff;
203204
}
204205

205-
// These functions use murmurhash2 in its 32 and 64bit forms, which are
206-
// differentiated by the constants defined below. This seems like a good choice
207-
// for now because it operates efficiently in blocks rather than bytes, and
208-
// the data returned from the collation iterator comes in 4byte chunks.
209-
#if __arm__ || __i386__
210-
#define HASH_SEED 0x88ddcc21
211-
#define HASH_M 0x5bd1e995
212-
#define HASH_R 24
213-
#else
214-
#define HASH_SEED 0x429b126688ddcc21
215-
#define HASH_M 0xc6a4a7935bd1e995
216-
#define HASH_R 47
217-
#endif
218-
219-
static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState,
220-
const uint16_t *Str, uint32_t Length,
221-
UErrorCode *ErrorCode) {
206+
void *swift::_swift_stdlib_unicodeCollationIterator_create(
207+
const __swift_uint16_t *Str, __swift_uint32_t Length) {
208+
UErrorCode ErrorCode = U_ZERO_ERROR;
222209
#if defined(__CYGWIN__) || defined(_MSC_VER)
223210
UCollationElements *CollationIterator = ucol_openElements(
224-
Collator, reinterpret_cast<const UChar *>(Str), Length, ErrorCode);
211+
GetRootCollator(), reinterpret_cast<const UChar *>(Str), Length,
212+
&ErrorCode);
225213
#else
226214
UCollationElements *CollationIterator = ucol_openElements(
227-
Collator, Str, Length, ErrorCode);
215+
GetRootCollator(), Str, Length, &ErrorCode);
228216
#endif
229-
while (U_SUCCESS(*ErrorCode)) {
230-
intptr_t Elem = ucol_next(CollationIterator, ErrorCode);
231-
// Ignore zero valued collation elements. They don't participate in the
232-
// ordering relation.
233-
if (Elem == 0)
234-
continue;
235-
if (Elem != UCOL_NULLORDER) {
236-
Elem *= HASH_M;
237-
Elem ^= Elem >> HASH_R;
238-
Elem *= HASH_M;
239-
240-
HashState *= HASH_M;
241-
HashState ^= Elem;
242-
} else {
243-
break;
244-
}
217+
if (U_FAILURE(ErrorCode)) {
218+
swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed.");
245219
}
246-
ucol_closeElements(CollationIterator);
247-
return HashState;
248-
}
249-
250-
static intptr_t hashFinish(intptr_t HashState) {
251-
HashState ^= HashState >> HASH_R;
252-
HashState *= HASH_M;
253-
HashState ^= HashState >> HASH_R;
254-
return HashState;
220+
return CollationIterator;
255221
}
256222

257-
intptr_t
258-
swift::_swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) {
223+
__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
224+
void *CollationIterator, bool *HitEnd) {
259225
UErrorCode ErrorCode = U_ZERO_ERROR;
260-
intptr_t HashState = HASH_SEED;
261-
HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode);
262-
226+
auto Result = ucol_next(
227+
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
263228
if (U_FAILURE(ErrorCode)) {
264-
swift::crash("hashChunk: Unexpected error hashing unicode string.");
229+
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
265230
}
266-
return hashFinish(HashState);
231+
*HitEnd = (Result == UCOL_NULLORDER);
232+
return Result;
267233
}
268234

269-
intptr_t swift::_swift_stdlib_unicode_hash_ascii(const unsigned char *Str,
270-
int32_t Length) {
271-
const ASCIICollation *Table = ASCIICollation::getTable();
272-
intptr_t HashState = HASH_SEED;
273-
int32_t Pos = 0;
274-
while (Pos < Length) {
275-
const unsigned char c = Str[Pos++];
276-
assert((c & 0x80) == 0 && "This table only exists for the ASCII subset");
277-
intptr_t Elem = Table->map(c);
278-
// Ignore zero valued collation elements. They don't participate in the
279-
// ordering relation.
280-
if (Elem == 0)
281-
continue;
282-
Elem *= HASH_M;
283-
Elem ^= Elem >> HASH_R;
284-
Elem *= HASH_M;
235+
void swift::_swift_stdlib_unicodeCollationIterator_delete(
236+
void *CollationIterator) {
237+
ucol_closeElements(static_cast<UCollationElements *>(CollationIterator));
238+
}
285239

286-
HashState *= HASH_M;
287-
HashState ^= Elem;
288-
}
289-
return hashFinish(HashState);
240+
const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() {
241+
return ASCIICollation::getTable()->CollationTable;
290242
}
291243

292244
/// Convert the unicode string to uppercase. This function will return the

0 commit comments

Comments
 (0)