Skip to content

Commit 817dff3

Browse files
authored
Merge pull request #22614 from linux-on-ibm-z/s390x-smallstring-5.0-backport
[5.0] [string] Fix string implementation for big endian platforms
2 parents 15be364 + 4f7d007 commit 817dff3

File tree

2 files changed

+53
-24
lines changed

2 files changed

+53
-24
lines changed

stdlib/public/core/SmallString.swift

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,19 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
// The code units in _SmallString are always stored in memory in the same order
14+
// that they would be stored in an array. This means that on big-endian
15+
// platforms the order of the bytes in storage is reversed compared to
16+
// _StringObject whereas on little-endian platforms the order is the same.
17+
//
18+
// Memory layout:
19+
//
20+
// |0 1 2 3 4 5 6 7 8 9 A B C D E F| ← hexadecimal offset in bytes
21+
// | _storage.0 | _storage.1 | ← raw bits
22+
// | code units | | ← encoded layout
23+
// ↑ ↑
24+
// first (leftmost) code unit discriminator (incl. count)
25+
//
1326
@_fixed_layout @usableFromInline
1427
internal struct _SmallString {
1528
@usableFromInline
@@ -50,16 +63,18 @@ internal struct _SmallString {
5063
@inlinable @inline(__always)
5164
internal init(_ object: _StringObject) {
5265
_internalInvariant(object.isSmall)
53-
self.init(raw: object.rawBits)
66+
// On big-endian platforms the byte order is the reverse of _StringObject.
67+
let leading = object.rawBits.0.littleEndian
68+
let trailing = object.rawBits.1.littleEndian
69+
self.init(raw: (leading, trailing))
5470
}
5571

5672
@inlinable @inline(__always)
5773
internal init() {
58-
self.init(raw: _StringObject(empty:()).rawBits)
74+
self.init(_StringObject(empty:()))
5975
}
6076
}
6177

62-
// TODO
6378
extension _SmallString {
6479
@inlinable
6580
internal static var capacity: Int {
@@ -72,9 +87,12 @@ extension _SmallString {
7287
}
7388
}
7489

90+
// Get an integer equivalent to the _StringObject.discriminatedObjectRawBits
91+
// computed property.
7592
@inlinable @inline(__always)
7693
internal var rawDiscriminatedObject: UInt64 {
77-
return _storage.1
94+
// Reverse the bytes on big-endian systems.
95+
return _storage.1.littleEndian
7896
}
7997

8098
@inlinable
@@ -107,7 +125,7 @@ extension _SmallString {
107125
// usage: it always clears the discriminator and count (in case it's full)
108126
@inlinable @inline(__always)
109127
internal var zeroTerminatedRawCodeUnits: RawBitPattern {
110-
let smallStringCodeUnitMask: UInt64 = 0x00FF_FFFF_FFFF_FFFF
128+
let smallStringCodeUnitMask = ~UInt64(0xFF).bigEndian // zero last byte
111129
return (self._storage.0, self._storage.1 & smallStringCodeUnitMask)
112130
}
113131

@@ -231,11 +249,12 @@ extension _SmallString {
231249
_internalInvariant(count <= _SmallString.capacity)
232250

233251
let isASCII = (leading | trailing) & 0x8080_8080_8080_8080 == 0
234-
let countAndDiscriminator = UInt64(truncatingIfNeeded: count) &<< 56
235-
| _StringObject.Nibbles.small(isASCII: isASCII)
236-
_internalInvariant(trailing & countAndDiscriminator == 0)
252+
let discriminator = _StringObject.Nibbles
253+
.small(withCount: count, isASCII: isASCII)
254+
.littleEndian // reversed byte order on big-endian platforms
255+
_internalInvariant(trailing & discriminator == 0)
237256

238-
self.init(raw: (leading, trailing | countAndDiscriminator))
257+
self.init(raw: (leading, trailing | discriminator))
239258
_internalInvariant(self.count == count)
240259
}
241260

@@ -300,23 +319,31 @@ extension _SmallString {
300319
#endif
301320

302321
extension UInt64 {
303-
// Fetches the `i`th byte, from least-significant to most-significant
304-
//
305-
// TODO: endianess awareness day
322+
// Fetches the `i`th byte in memory order. On little-endian systems the byte
323+
// at i=0 is the least significant byte (LSB) while on big-endian systems the
324+
// byte at i=7 is the LSB.
306325
@inlinable @inline(__always)
307326
internal func _uncheckedGetByte(at i: Int) -> UInt8 {
308327
_internalInvariant(i >= 0 && i < MemoryLayout<UInt64>.stride)
328+
#if _endian(big)
329+
let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8
330+
#else
309331
let shift = UInt64(truncatingIfNeeded: i) &* 8
332+
#endif
310333
return UInt8(truncatingIfNeeded: (self &>> shift))
311334
}
312335

313-
// Sets the `i`th byte, from least-significant to most-significant
314-
//
315-
// TODO: endianess awareness day
336+
// Sets the `i`th byte in memory order. On little-endian systems the byte
337+
// at i=0 is the least significant byte (LSB) while on big-endian systems the
338+
// byte at i=7 is the LSB.
316339
@inlinable @inline(__always)
317340
internal mutating func _uncheckedSetByte(at i: Int, to value: UInt8) {
318341
_internalInvariant(i >= 0 && i < MemoryLayout<UInt64>.stride)
342+
#if _endian(big)
343+
let shift = (7 - UInt64(truncatingIfNeeded: i)) &* 8
344+
#else
319345
let shift = UInt64(truncatingIfNeeded: i) &* 8
346+
#endif
320347
let valueMask: UInt64 = 0xFF &<< shift
321348
self = (self & ~valueMask) | (UInt64(truncatingIfNeeded: value) &<< shift)
322349
}
@@ -336,5 +363,6 @@ internal func _bytesToUInt64(
336363
r = r | (UInt64(input[idx]) &<< shift)
337364
shift = shift &+ 8
338365
}
339-
return r
366+
// Convert from little-endian to host byte order.
367+
return r.littleEndian
340368
}

stdlib/public/core/StringObject.swift

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -516,11 +516,15 @@ extension _StringObject {
516516

517517
*/
518518
extension _StringObject {
519-
#if arch(i386) || arch(arm)
520519
@inlinable @inline(__always)
521520
internal init(_ small: _SmallString) {
521+
// Small strings are encoded as _StringObjects in reverse byte order
522+
// on big-endian platforms. This is to match the discriminator to the
523+
// spare bits (the most significant nibble) in a pointer.
524+
let word1 = small.rawBits.0.littleEndian
525+
let word2 = small.rawBits.1.littleEndian
526+
#if arch(i386) || arch(arm)
522527
// On 32-bit, we need to unpack the small string.
523-
let (word1, word2) = small.rawBits
524528
let smallStringDiscriminatorAndCount: UInt64 = 0xFF00_0000_0000_0000
525529

526530
let leadingFour = Int(truncatingIfNeeded: word1)
@@ -532,15 +536,12 @@ extension _StringObject {
532536
variant: .immortal(nextFour),
533537
discriminator: smallDiscriminatorAndCount,
534538
flags: trailingTwo)
535-
_internalInvariant(isSmall)
536-
}
537539
#else
538-
@inlinable @inline(__always)
539-
internal init(_ small: _SmallString) {
540-
self.init(rawValue: small.rawBits)
540+
// On 64-bit, we copy the raw bits (to host byte order).
541+
self.init(rawValue: (word1, word2))
542+
#endif
541543
_internalInvariant(isSmall)
542544
}
543-
#endif
544545

545546
@inlinable
546547
internal static func getSmallCount(fromRaw x: UInt64) -> Int {

0 commit comments

Comments
 (0)