Skip to content

Commit e3ee12c

Browse files
authored
Merge pull request #19498 from linux-on-ibm-z/s390x-smallstring-fix
[string] Fix small string implementation for big endian platforms
2 parents bc87e9b + 2866b4a commit e3ee12c

File tree

2 files changed

+69
-19
lines changed

2 files changed

+69
-19
lines changed

stdlib/public/core/SmallString.swift

Lines changed: 68 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,79 @@ func unsupportedOn32bit() -> Never { _conditionallyUnreachable() }
3030
@_fixed_layout public struct _SmallUTF8String {}
3131

3232
#else
33+
//
34+
// The low byte of the first word (low) stores the first code unit. Up to 15
35+
// such code units are encodable, with the second-highest byte of the second
36+
// word (high) being the final code unit. The high byte of the final word
37+
// stores the count.
38+
//
39+
// The low and high values are automatically stored in little-endian byte order
40+
// by the _RawBitPattern struct, which reverses the byte order as needed to
41+
// convert between the little-endian storage format and the host byte order.
42+
// The memory layout of the _RawBitPattern struct will therefore be identical
43+
// on both big- and little-endian machines. The values of 'high' and 'low'
44+
// will also appear to be identical. Routines which build, manipulate and
45+
// convert small strings should therefore always assume little-endian byte
46+
// order.
47+
//
48+
// Storage layout:
49+
//
50+
// |0 1 2 3 4 5 6 7 8 9 a b c d e f| ← offset
51+
// | low | high | ← properties
52+
// | string | | ← encoded layout
53+
// ↑ ↑
54+
// first (low) byte count
55+
//
56+
// Examples:
57+
// ! o l l e H 6
58+
// |H e l l o ! ░ ░ ░ ░ ░ ░ ░ ░ ░|6| → low=0x0000216f6c6c6548 high=0x0600000000000000
59+
//
60+
// W , o l l e H 13 ! d l r o
61+
// |H e l l o , W o r l d ! ░ ░|d| → low=0x57202c6f6c6c6548 high=0x0d000021646c726f
62+
//
3363
@_fixed_layout
3464
@usableFromInline
3565
internal struct _SmallUTF8String {
66+
@_fixed_layout
3667
@usableFromInline
37-
typealias _RawBitPattern = (low: UInt, high: UInt)
38-
39-
//
40-
// TODO: pretty ASCII art.
41-
//
42-
// TODO: endianess awareness day
43-
//
44-
// The low byte of the first word stores the first code unit. There is up to
45-
// 15 such code units encodable, with the second-highest byte of the second
46-
// word being the final code unit. The high byte of the final word stores the
47-
// count.
48-
//
68+
internal struct _RawBitPattern: Equatable {
69+
// high and low are stored in little-endian byte order
70+
@usableFromInline
71+
internal var _storage: (low: UInt, high: UInt)
72+
73+
@inlinable
74+
var low: UInt {
75+
@inline(__always) get { return _storage.low.littleEndian }
76+
@inline(__always) set { _storage.low = newValue.littleEndian }
77+
}
78+
79+
@inlinable
80+
var high: UInt {
81+
@inline(__always) get { return _storage.high.littleEndian }
82+
@inline(__always) set { _storage.high = newValue.littleEndian }
83+
}
84+
85+
@inlinable
86+
@inline(__always)
87+
init(low l: UInt, high h: UInt) {
88+
// host byte order to little-endian byte order
89+
_storage.low = l.littleEndian
90+
_storage.high = h.littleEndian
91+
}
92+
93+
@inlinable
94+
@inline(__always)
95+
static func == (lhs: _RawBitPattern, rhs: _RawBitPattern) -> Bool {
96+
return lhs._storage == rhs._storage
97+
}
98+
}
99+
49100
@usableFromInline
50-
var _storage: _RawBitPattern = (0,0)
101+
var _storage: _RawBitPattern
51102
@inlinable
52103
@inline(__always)
53104
init() {
54-
self._storage = (0,0)
105+
self._storage = _RawBitPattern(low: 0, high: 0)
55106
}
56107
}
57108
#endif // 64-bit
@@ -150,7 +201,7 @@ extension _SmallUTF8String {
150201
}
151202
high |= (UInt(count) &<< (8*15))
152203
let low = _bytesToUInt(addr, lowCount)
153-
_storage = (low, high)
204+
_storage = _RawBitPattern(low: low, high: high)
154205

155206
// FIXME: support transcoding
156207
if !self.isASCII { return nil }
@@ -585,7 +636,8 @@ extension _SmallUTF8String {
585636
extension _SmallUTF8String {
586637
@inlinable
587638
@inline(__always)
588-
init(_rawBits: _RawBitPattern) {
639+
init(_rawBits: (low: UInt, high: UInt)) {
640+
self.init()
589641
self._storage.low = _rawBits.low
590642
self._storage.high = _rawBits.high
591643
_invariantCheck()
@@ -831,8 +883,6 @@ func _castBufPtr<A, B>(
831883

832884
extension UInt {
833885
// Fetches the `i`th byte, from least-significant to most-significant
834-
//
835-
// TODO: endianess awareness day
836886
@inlinable
837887
@inline(__always)
838888
func _uncheckedGetByte(at i: Int) -> UInt8 {

stdlib/public/core/StringGuts.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ extension _StringGuts {
365365
unsupportedOn32bit()
366366
#else
367367
return _SmallUTF8String(
368-
_rawBits: (_otherBits, _object.asSmallUTF8SecondWord))
368+
_rawBits: (low: _otherBits, high: _object.asSmallUTF8SecondWord))
369369
#endif
370370
}
371371
}

0 commit comments

Comments
 (0)