10
10
//
11
11
//===----------------------------------------------------------------------===//
12
12
13
+ // The code units in _SmallString are always stored in memory in the same order
14
+ // that they would be stored in an array. This means that on big-endian
15
+ // platforms the order of the bytes in storage is reversed compared to
16
+ // _StringObject whereas on little-endian platforms the order is the same.
17
+ //
18
+ // Memory layout:
19
+ //
20
+ // |0 1 2 3 4 5 6 7 8 9 A B C D E F| ← hexadecimal offset in bytes
21
+ // | _storage.0 | _storage.1 | ← raw bits
22
+ // | code units | | ← encoded layout
23
+ // ↑ ↑
24
+ // first (leftmost) code unit discriminator (incl. count)
25
+ //
13
26
@_fixed_layout @usableFromInline
14
27
internal struct _SmallString {
15
28
@usableFromInline
@@ -50,16 +63,18 @@ internal struct _SmallString {
50
63
@inlinable @inline ( __always)
51
64
internal init ( _ object: _StringObject ) {
52
65
_internalInvariant ( object. isSmall)
53
- self . init ( raw: object. rawBits)
66
+ // On big-endian platforms the byte order is the reverse of _StringObject.
67
+ let leading = object. rawBits. 0 . littleEndian
68
+ let trailing = object. rawBits. 1 . littleEndian
69
+ self . init ( raw: ( leading, trailing) )
54
70
}
55
71
56
72
@inlinable @inline ( __always)
57
73
internal init ( ) {
58
- self . init ( raw : _StringObject ( empty: ( ) ) . rawBits )
74
+ self . init ( _StringObject ( empty: ( ) ) )
59
75
}
60
76
}
61
77
62
- // TODO
63
78
extension _SmallString {
64
79
@inlinable
65
80
internal static var capacity : Int {
@@ -72,9 +87,12 @@ extension _SmallString {
72
87
}
73
88
}
74
89
90
+ // Get an integer equivalent to the _StringObject.discriminatedObjectRawBits
91
+ // computed property.
75
92
@inlinable @inline ( __always)
76
93
internal var rawDiscriminatedObject : UInt64 {
77
- return _storage. 1
94
+ // Reverse the bytes on big-endian systems.
95
+ return _storage. 1 . littleEndian
78
96
}
79
97
80
98
@inlinable
@@ -107,7 +125,7 @@ extension _SmallString {
107
125
// usage: it always clears the discriminator and count (in case it's full)
108
126
@inlinable @inline ( __always)
109
127
internal var zeroTerminatedRawCodeUnits : RawBitPattern {
110
- let smallStringCodeUnitMask : UInt64 = 0x00FF_FFFF_FFFF_FFFF
128
+ let smallStringCodeUnitMask = ~ UInt64( 0xFF ) . bigEndian // zero last byte
111
129
return ( self . _storage. 0 , self . _storage. 1 & smallStringCodeUnitMask)
112
130
}
113
131
@@ -231,11 +249,12 @@ extension _SmallString {
231
249
_internalInvariant ( count <= _SmallString. capacity)
232
250
233
251
let isASCII = ( leading | trailing) & 0x8080_8080_8080_8080 == 0
234
- let countAndDiscriminator = UInt64 ( truncatingIfNeeded: count) &<< 56
235
- | _StringObject. Nibbles. small ( isASCII: isASCII)
236
- _internalInvariant ( trailing & countAndDiscriminator == 0 )
252
+ let discriminator = _StringObject. Nibbles
253
+ . small ( withCount: count, isASCII: isASCII)
254
+ . littleEndian // reversed byte order on big-endian platforms
255
+ _internalInvariant ( trailing & discriminator == 0 )
237
256
238
- self . init ( raw: ( leading, trailing | countAndDiscriminator ) )
257
+ self . init ( raw: ( leading, trailing | discriminator ) )
239
258
_internalInvariant ( self . count == count)
240
259
}
241
260
@@ -295,23 +314,31 @@ extension _SmallString {
295
314
#endif
296
315
297
316
extension UInt64 {
298
- // Fetches the `i`th byte, from least-significant to most-significant
299
- //
300
- // TODO: endianess awareness day
317
+ // Fetches the `i`th byte in memory order. On little-endian systems the byte
318
+ // at i=0 is the least significant byte (LSB) while on big-endian systems the
319
+ // byte at i=7 is the LSB.
301
320
@inlinable @inline ( __always)
302
321
internal func _uncheckedGetByte( at i: Int ) -> UInt8 {
303
322
_internalInvariant ( i >= 0 && i < MemoryLayout< UInt64> . stride)
323
+ #if _endian(big)
324
+ let shift = ( 7 - UInt64( truncatingIfNeeded: i) ) &* 8
325
+ #else
304
326
let shift = UInt64 ( truncatingIfNeeded: i) &* 8
327
+ #endif
305
328
return UInt8 ( truncatingIfNeeded: ( self &>> shift) )
306
329
}
307
330
308
- // Sets the `i`th byte, from least-significant to most-significant
309
- //
310
- // TODO: endianess awareness day
331
+ // Sets the `i`th byte in memory order. On little-endian systems the byte
332
+ // at i=0 is the least significant byte (LSB) while on big-endian systems the
333
+ // byte at i=7 is the LSB.
311
334
@inlinable @inline ( __always)
312
335
internal mutating func _uncheckedSetByte( at i: Int , to value: UInt8 ) {
313
336
_internalInvariant ( i >= 0 && i < MemoryLayout< UInt64> . stride)
337
+ #if _endian(big)
338
+ let shift = ( 7 - UInt64( truncatingIfNeeded: i) ) &* 8
339
+ #else
314
340
let shift = UInt64 ( truncatingIfNeeded: i) &* 8
341
+ #endif
315
342
let valueMask : UInt64 = 0xFF &<< shift
316
343
self = ( self & ~ valueMask) | ( UInt64 ( truncatingIfNeeded: value) &<< shift)
317
344
}
@@ -331,5 +358,6 @@ internal func _bytesToUInt64(
331
358
r = r | ( UInt64 ( input [ idx] ) &<< shift)
332
359
shift = shift &+ 8
333
360
}
334
- return r
361
+ // Convert from little-endian to host byte order.
362
+ return r. littleEndian
335
363
}
0 commit comments