Skip to content

Commit 5dfcb21

Browse files
committed
[Parse] Avoid 'inout' operator passing in Character.Info testing
Passing `var` value with `&` operator triggers TSAN "modifying access" detection even though it doesn't actually mutates the value. That results "access race" sanitizer error in multi-thread environments. Instead of using a tuple table, use 'switch' to test the character info. Also, eliminate unused character info value, and introduce 'IDENT_CONT'
1 parent 7537f98 commit 5dfcb21

File tree

2 files changed

+46
-151
lines changed

2 files changed

+46
-151
lines changed

Sources/SwiftParser/CharacterInfo.swift

Lines changed: 41 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,10 @@ extension Character {
1818
self.rawValue = rawValue
1919
}
2020

21-
static let SPACE = Character.Info(rawValue: 0x01) // ' '
22-
static let DIGIT = Character.Info(rawValue: 0x02) // 0-9
23-
static let XLETTER = Character.Info(rawValue: 0x04) // a-f,A-F
24-
static let UPPER = Character.Info(rawValue: 0x08) // A-Z
25-
static let LOWER = Character.Info(rawValue: 0x10) // a-z
26-
static let UNDER = Character.Info(rawValue: 0x20) // _
27-
static let PERIOD = Character.Info(rawValue: 0x40) // .
28-
static let PUNCT = Character.Info(rawValue: 0x80) // `$@()
29-
30-
static let XUPPER: Character.Info = [ .XLETTER, .UPPER ]
31-
32-
static let XLOWER: Character.Info = [ .XLETTER, .LOWER ]
21+
static let IDENT_CONT: Self = .init(rawValue: 0x01)
22+
static let DECIMAL: Self = .init(rawValue: 0x02)
23+
static let HEX: Self = .init(rawValue: 0x04)
24+
static let LETTER: Self = .init(rawValue: 0x08)
3325
}
3426
}
3527

@@ -42,167 +34,70 @@ extension Unicode.Scalar {
4234
/// to be allowed to appear in a starting position in a programming language
4335
/// identifier.
4436
var isAsciiIdentifierStart: Bool {
45-
return self.testRawInfoTable { entry in
46-
!entry.intersection([ .UPPER, .LOWER, .UNDER, ]).isEmpty
47-
}
37+
self.testCharacterInfo(.IDENT_CONT) && !self.isDigit && self != "$"
4838
}
4939

5040
/// A Boolean value indicating whether this scalar is one which is recommended
5141
/// to be allowed to appear in a non-starting position in a programming
5242
/// language identifier.
5343
var isAsciiIdentifierContinue: Bool {
54-
return self.testRawInfoTable { entry in
55-
!entry.intersection([ .UPPER, .LOWER, .DIGIT, .UNDER ]).isEmpty
56-
}
44+
self.testCharacterInfo(.IDENT_CONT)
5745
}
5846

5947
/// A Boolean value indicating whether this scalar is an ASCII character used
6048
/// for the representation of base-10 numbers.
6149
var isDigit: Bool {
62-
return self.testRawInfoTable { entry in
63-
entry.contains(.DIGIT)
64-
}
50+
self.testCharacterInfo(.DECIMAL)
6551
}
6652

6753
/// A Boolean value indicating whether this scalar is considered to be either
6854
/// an uppercase or lowercase ASCII character.
6955
var isLetter: Bool {
70-
return self.testRawInfoTable { entry in
71-
!entry.intersection([ .UPPER, .LOWER ]).isEmpty
72-
}
56+
self.testCharacterInfo(.LETTER)
7357
}
7458

7559
/// A Boolean value indicating whether this scalar is an ASCII character
7660
/// commonly used for the representation of hexadecimal numbers.
7761
var isHexDigit: Bool {
78-
return self.testRawInfoTable { entry in
79-
!entry.intersection([ .DIGIT, .XLETTER ]).isEmpty
80-
}
62+
self.testCharacterInfo(.HEX)
8163
}
8264
}
8365

8466
extension Unicode.Scalar {
85-
private func testRawInfoTable(
86-
_ performTest: (Character.Info) -> Bool
67+
private func testCharacterInfo(
68+
_ match: Character.Info
8769
) -> Bool {
88-
return self.isASCII && withUnsafePointer(to: &InfoTable) { InfoTable in
89-
let infoPtr = UnsafeRawBufferPointer(start: InfoTable, count: 0x80)
90-
.assumingMemoryBound(to: Character.Info.self)
91-
return performTest(infoPtr[Int(self.value)])
92-
}
93-
}
94-
}
70+
let info: Character.Info
71+
switch self.value {
72+
case
73+
// '$', '_'
74+
36, 95:
75+
info = [.IDENT_CONT]
76+
77+
case
78+
// '0'-'9'
79+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57:
80+
info = [.IDENT_CONT, .DECIMAL, .HEX]
81+
82+
case
83+
// 'A'-'F'
84+
65, 66, 67, 68, 69, 70,
85+
// 'a'-'f'
86+
97, 98, 99, 100, 101, 102:
87+
info = [.IDENT_CONT, .HEX, .LETTER]
9588

96-
extension UnsafeRawBufferPointer {
97-
/// Returns a typed buffer to the memory referenced by this buffer,
98-
/// assuming that the memory is already bound to the specified type.
99-
///
100-
/// Use this method when you have a raw buffer to memory that has *already*
101-
/// been bound to the specified type. The memory starting at this pointer
102-
/// must be bound to the type `T`. Accessing memory through the returned
103-
/// pointer is undefined if the memory has not been bound to `T`. To bind
104-
/// memory to `T`, use `bindMemory(to:capacity:)` instead of this method.
105-
///
106-
/// - Note: The buffer's base address must match the
107-
/// alignment of `T` (as reported by `MemoryLayout<T>.alignment`).
108-
/// That is, `Int(bitPattern: self.baseAddress) % MemoryLayout<T>.alignment`
109-
/// must equal zero.
110-
///
111-
/// - Parameter to: The type `T` that the memory has already been bound to.
112-
/// - Returns: A typed pointer to the same memory as this raw pointer.
113-
fileprivate func assumingMemoryBound<T>(
114-
to: T.Type
115-
) -> UnsafeBufferPointer<T> {
116-
guard let s = self.baseAddress else {
117-
return .init(start: nil, count: 0)
89+
case
90+
// 'G'-'Z'
91+
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
92+
89, 90,
93+
// 'g'-'z'
94+
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
95+
118, 119, 120, 121, 122:
96+
info = [.IDENT_CONT, .LETTER]
97+
98+
default:
99+
info = []
118100
}
119-
let c = self.count
120-
let n = c / MemoryLayout<T>.stride
121-
return .init(start: s.assumingMemoryBound(to: T.self), count: n)
101+
return info.contains(match)
122102
}
123103
}
124-
125-
private var InfoTable: CharacterInfoTable = (
126-
// 0 NUL 1 SOH 2 STX 3 ETX
127-
// 4 EOT 5 ENQ 6 ACK 7 BEL
128-
[], [], [], [],
129-
[], [], [], [],
130-
// 8 BS 9 HT 10 NL 11 VT
131-
//12 NP 13 CR 14 SO 15 SI
132-
[], [], [], [],
133-
[], [], [], [],
134-
//16 DLE 17 DC1 18 DC2 19 DC3
135-
//20 DC4 21 NAK 22 SYN 23 ETB
136-
[], [], [], [],
137-
[], [], [], [],
138-
//24 CAN 25 EM 26 SUB 27 ESC
139-
//28 FS 29 GS 30 RS 31 US
140-
[], [], [], [],
141-
[], [], [], [],
142-
//32 SP 33 ! 34 " 35 #
143-
//36 $ 37 % 38 & 39 '
144-
.SPACE, [], [], [],
145-
.PUNCT, [], [], [],
146-
//40 ( 41 ) 42 * 43 +
147-
//44 , 45 - 46 . 47 /
148-
.PUNCT, .PUNCT, [] , [],
149-
[], [], .PERIOD, [],
150-
//48 0 49 1 50 2 51 3
151-
//52 4 53 5 54 6 55 7
152-
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
153-
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
154-
//56 8 57 9 58 : 59 ;
155-
//60 < 61 = 62 > 63 ?
156-
.DIGIT, .DIGIT, [], [],
157-
[], [], [], [],
158-
//64 @ 65 A 66 B 67 C
159-
//68 D 69 E 70 F 71 G
160-
.PUNCT, .XUPPER, .XUPPER, .XUPPER,
161-
.XUPPER, .XUPPER, .XUPPER, .UPPER,
162-
//72 H 73 I 74 J 75 K
163-
//76 L 77 M 78 N 79 O
164-
.UPPER, .UPPER, .UPPER, .UPPER,
165-
.UPPER, .UPPER, .UPPER, .UPPER,
166-
//80 P 81 Q 82 R 83 S
167-
//84 T 85 U 86 V 87 W
168-
.UPPER, .UPPER, .UPPER, .UPPER,
169-
.UPPER, .UPPER, .UPPER, .UPPER,
170-
//88 X 89 Y 90 Z 91 [
171-
//92 \ 93 ] 94 ^ 95 _
172-
.UPPER, .UPPER, .UPPER, [],
173-
.PUNCT, [], [], .UNDER,
174-
//96 ` 97 a 98 b 99 c
175-
//100 d 101 e 102 f 103 g
176-
.PUNCT, .XLOWER, .XLOWER, .XLOWER,
177-
.XLOWER, .XLOWER, .XLOWER, .LOWER,
178-
//104 h 105 i 106 j 107 k
179-
//108 l 109 m 110 n 111 o
180-
.LOWER, .LOWER, .LOWER, .LOWER,
181-
.LOWER, .LOWER, .LOWER, .LOWER,
182-
//112 p 113 q 114 r 115 s
183-
//116 t 117 u 118 v 119 w
184-
.LOWER, .LOWER, .LOWER, .LOWER,
185-
.LOWER, .LOWER, .LOWER, .LOWER,
186-
//120 x 121 y 122 z 123 {
187-
//124 | 125 } 126 ~ 127 DEL
188-
.LOWER, .LOWER, .LOWER, [],
189-
[], [] , [] , [])
190-
191-
private typealias CharacterInfoTable = (
192-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
193-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
194-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
195-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
196-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
197-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
198-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
199-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
200-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
201-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
202-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
203-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
204-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
205-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
206-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
207-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info
208-
)

Sources/SwiftParser/Lexer.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,13 +2161,13 @@ extension Lexer.Cursor {
21612161

21622162
extension Unicode.Scalar {
21632163
var isValidIdentifierContinuationCodePoint: Bool {
2164-
let c = self.value
2165-
if c < 0x80 {
2166-
return self.isAsciiIdentifierContinue || c == UInt32(UInt8(ascii: "$"))
2164+
if self.isASCII {
2165+
return self.isAsciiIdentifierContinue
21672166
}
21682167

21692168
// N1518: Recommendations for extended identifier characters for C and C++
21702169
// Proposed Annex X.1: Ranges of characters allowed
2170+
let c = self.value
21712171
return c == 0x00A8 || c == 0x00AA || c == 0x00AD || c == 0x00AF
21722172
|| (c >= 0x00B2 && c <= 0x00B5) || (c >= 0x00B7 && c <= 0x00BA)
21732173
|| (c >= 0x00BC && c <= 0x00BE) || (c >= 0x00C0 && c <= 0x00D6)
@@ -2221,13 +2221,13 @@ extension Unicode.Scalar {
22212221
return false
22222222
}
22232223

2224-
let c = self.value
2225-
if c < 0x80 && (self.isDigit || c == UInt8(ascii: "$")) {
2224+
if self.isASCII && (self.isDigit || self == "$") {
22262225
return false
22272226
}
22282227

22292228
// N1518: Recommendations for extended identifier characters for C and C++
22302229
// Proposed Annex X.2: Ranges of characters disallowed initially
2230+
let c = self.value
22312231
if ((c >= 0x0300 && c <= 0x036F) ||
22322232
(c >= 0x1DC0 && c <= 0x1DFF) ||
22332233
(c >= 0x20D0 && c <= 0x20FF) ||

0 commit comments

Comments
 (0)