Skip to content

Commit 5296d93

Browse files
authored
Merge pull request #932 from rintaro/charinfo-rdar100906576
[Parse] Avoid 'inout' operator passing in Character.Info testing
2 parents deb75a3 + 03e7b90 commit 5296d93

File tree

2 files changed

+54
-154
lines changed

2 files changed

+54
-154
lines changed

Sources/SwiftParser/CharacterInfo.swift

Lines changed: 47 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,11 @@ extension Character {
1818
self.rawValue = rawValue
1919
}
2020

21-
static let SPACE = Character.Info(rawValue: 0x01) // ' '
22-
static let DIGIT = Character.Info(rawValue: 0x02) // 0-9
23-
static let XLETTER = Character.Info(rawValue: 0x04) // a-f,A-F
24-
static let UPPER = Character.Info(rawValue: 0x08) // A-Z
25-
static let LOWER = Character.Info(rawValue: 0x10) // a-z
26-
static let UNDER = Character.Info(rawValue: 0x20) // _
27-
static let PERIOD = Character.Info(rawValue: 0x40) // .
28-
static let PUNCT = Character.Info(rawValue: 0x80) // `$@()
29-
30-
static let XUPPER: Character.Info = [ .XLETTER, .UPPER ]
31-
32-
static let XLOWER: Character.Info = [ .XLETTER, .LOWER ]
21+
static let IDENT_START: Self = .init(rawValue: 0x01)
22+
static let IDENT_CONT: Self = .init(rawValue: 0x02)
23+
static let DECIMAL: Self = .init(rawValue: 0x04)
24+
static let HEX: Self = .init(rawValue: 0x08)
25+
static let LETTER: Self = .init(rawValue: 0x10)
3326
}
3427
}
3528

@@ -42,167 +35,75 @@ extension Unicode.Scalar {
4235
/// to be allowed to appear in a starting position in a programming language
4336
/// identifier.
4437
var isAsciiIdentifierStart: Bool {
45-
return self.testRawInfoTable { entry in
46-
!entry.intersection([ .UPPER, .LOWER, .UNDER, ]).isEmpty
47-
}
38+
self.testCharacterInfo(.IDENT_START)
4839
}
4940

5041
/// A Boolean value indicating whether this scalar is one which is recommended
5142
/// to be allowed to appear in a non-starting position in a programming
5243
/// language identifier.
5344
var isAsciiIdentifierContinue: Bool {
54-
return self.testRawInfoTable { entry in
55-
!entry.intersection([ .UPPER, .LOWER, .DIGIT, .UNDER ]).isEmpty
56-
}
45+
self.testCharacterInfo(.IDENT_CONT)
5746
}
5847

5948
/// A Boolean value indicating whether this scalar is an ASCII character used
6049
/// for the representation of base-10 numbers.
6150
var isDigit: Bool {
62-
return self.testRawInfoTable { entry in
63-
entry.contains(.DIGIT)
64-
}
51+
self.testCharacterInfo(.DECIMAL)
6552
}
6653

6754
/// A Boolean value indicating whether this scalar is considered to be either
6855
/// an uppercase or lowercase ASCII character.
6956
var isLetter: Bool {
70-
return self.testRawInfoTable { entry in
71-
!entry.intersection([ .UPPER, .LOWER ]).isEmpty
72-
}
57+
self.testCharacterInfo(.LETTER)
7358
}
7459

7560
/// A Boolean value indicating whether this scalar is an ASCII character
7661
/// commonly used for the representation of hexadecimal numbers.
7762
var isHexDigit: Bool {
78-
return self.testRawInfoTable { entry in
79-
!entry.intersection([ .DIGIT, .XLETTER ]).isEmpty
80-
}
63+
self.testCharacterInfo(.HEX)
8164
}
8265
}
8366

8467
extension Unicode.Scalar {
85-
private func testRawInfoTable(
86-
_ performTest: (Character.Info) -> Bool
68+
private func testCharacterInfo(
69+
_ match: Character.Info
8770
) -> Bool {
88-
return self.isASCII && withUnsafePointer(to: &InfoTable) { InfoTable in
89-
let infoPtr = UnsafeRawBufferPointer(start: InfoTable, count: 0x80)
90-
.assumingMemoryBound(to: Character.Info.self)
91-
return performTest(infoPtr[Int(self.value)])
92-
}
93-
}
94-
}
71+
let info: Character.Info
72+
switch self.value {
73+
case
74+
// '0'-'9'
75+
48, 49, 50, 51, 52, 53, 54, 55, 56, 57:
76+
info = [.IDENT_CONT, .DECIMAL, .HEX]
77+
78+
case
79+
// 'A'-'F'
80+
65, 66, 67, 68, 69, 70,
81+
// 'a'-'f'
82+
97, 98, 99, 100, 101, 102:
83+
info = [.IDENT_START, .IDENT_CONT, .HEX, .LETTER]
84+
85+
case
86+
// 'G'-'Z'
87+
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
88+
89, 90,
89+
// 'g'-'z'
90+
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
91+
118, 119, 120, 121, 122:
92+
info = [.IDENT_START, .IDENT_CONT, .LETTER]
9593

96-
extension UnsafeRawBufferPointer {
97-
/// Returns a typed buffer to the memory referenced by this buffer,
98-
/// assuming that the memory is already bound to the specified type.
99-
///
100-
/// Use this method when you have a raw buffer to memory that has *already*
101-
/// been bound to the specified type. The memory starting at this pointer
102-
/// must be bound to the type `T`. Accessing memory through the returned
103-
/// pointer is undefined if the memory has not been bound to `T`. To bind
104-
/// memory to `T`, use `bindMemory(to:capacity:)` instead of this method.
105-
///
106-
/// - Note: The buffer's base address must match the
107-
/// alignment of `T` (as reported by `MemoryLayout<T>.alignment`).
108-
/// That is, `Int(bitPattern: self.baseAddress) % MemoryLayout<T>.alignment`
109-
/// must equal zero.
110-
///
111-
/// - Parameter to: The type `T` that the memory has already been bound to.
112-
/// - Returns: A typed pointer to the same memory as this raw pointer.
113-
fileprivate func assumingMemoryBound<T>(
114-
to: T.Type
115-
) -> UnsafeBufferPointer<T> {
116-
guard let s = self.baseAddress else {
117-
return .init(start: nil, count: 0)
94+
case
95+
// '_'
96+
95:
97+
info = [.IDENT_START, .IDENT_CONT]
98+
99+
case
100+
// '$'
101+
36:
102+
info = [.IDENT_CONT]
103+
104+
default:
105+
info = []
118106
}
119-
let c = self.count
120-
let n = c / MemoryLayout<T>.stride
121-
return .init(start: s.assumingMemoryBound(to: T.self), count: n)
107+
return info.contains(match)
122108
}
123109
}
124-
125-
private var InfoTable: CharacterInfoTable = (
126-
// 0 NUL 1 SOH 2 STX 3 ETX
127-
// 4 EOT 5 ENQ 6 ACK 7 BEL
128-
[], [], [], [],
129-
[], [], [], [],
130-
// 8 BS 9 HT 10 NL 11 VT
131-
//12 NP 13 CR 14 SO 15 SI
132-
[], [], [], [],
133-
[], [], [], [],
134-
//16 DLE 17 DC1 18 DC2 19 DC3
135-
//20 DC4 21 NAK 22 SYN 23 ETB
136-
[], [], [], [],
137-
[], [], [], [],
138-
//24 CAN 25 EM 26 SUB 27 ESC
139-
//28 FS 29 GS 30 RS 31 US
140-
[], [], [], [],
141-
[], [], [], [],
142-
//32 SP 33 ! 34 " 35 #
143-
//36 $ 37 % 38 & 39 '
144-
.SPACE, [], [], [],
145-
.PUNCT, [], [], [],
146-
//40 ( 41 ) 42 * 43 +
147-
//44 , 45 - 46 . 47 /
148-
.PUNCT, .PUNCT, [] , [],
149-
[], [], .PERIOD, [],
150-
//48 0 49 1 50 2 51 3
151-
//52 4 53 5 54 6 55 7
152-
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
153-
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
154-
//56 8 57 9 58 : 59 ;
155-
//60 < 61 = 62 > 63 ?
156-
.DIGIT, .DIGIT, [], [],
157-
[], [], [], [],
158-
//64 @ 65 A 66 B 67 C
159-
//68 D 69 E 70 F 71 G
160-
.PUNCT, .XUPPER, .XUPPER, .XUPPER,
161-
.XUPPER, .XUPPER, .XUPPER, .UPPER,
162-
//72 H 73 I 74 J 75 K
163-
//76 L 77 M 78 N 79 O
164-
.UPPER, .UPPER, .UPPER, .UPPER,
165-
.UPPER, .UPPER, .UPPER, .UPPER,
166-
//80 P 81 Q 82 R 83 S
167-
//84 T 85 U 86 V 87 W
168-
.UPPER, .UPPER, .UPPER, .UPPER,
169-
.UPPER, .UPPER, .UPPER, .UPPER,
170-
//88 X 89 Y 90 Z 91 [
171-
//92 \ 93 ] 94 ^ 95 _
172-
.UPPER, .UPPER, .UPPER, [],
173-
.PUNCT, [], [], .UNDER,
174-
//96 ` 97 a 98 b 99 c
175-
//100 d 101 e 102 f 103 g
176-
.PUNCT, .XLOWER, .XLOWER, .XLOWER,
177-
.XLOWER, .XLOWER, .XLOWER, .LOWER,
178-
//104 h 105 i 106 j 107 k
179-
//108 l 109 m 110 n 111 o
180-
.LOWER, .LOWER, .LOWER, .LOWER,
181-
.LOWER, .LOWER, .LOWER, .LOWER,
182-
//112 p 113 q 114 r 115 s
183-
//116 t 117 u 118 v 119 w
184-
.LOWER, .LOWER, .LOWER, .LOWER,
185-
.LOWER, .LOWER, .LOWER, .LOWER,
186-
//120 x 121 y 122 z 123 {
187-
//124 | 125 } 126 ~ 127 DEL
188-
.LOWER, .LOWER, .LOWER, [],
189-
[], [] , [] , [])
190-
191-
private typealias CharacterInfoTable = (
192-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
193-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
194-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
195-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
196-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
197-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
198-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
199-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
200-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
201-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
202-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
203-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
204-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
205-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
206-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
207-
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info
208-
)

Sources/SwiftParser/Lexer.swift

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2161,13 +2161,13 @@ extension Lexer.Cursor {
21612161

21622162
extension Unicode.Scalar {
21632163
var isValidIdentifierContinuationCodePoint: Bool {
2164-
let c = self.value
2165-
if c < 0x80 {
2166-
return self.isAsciiIdentifierContinue || c == UInt32(UInt8(ascii: "$"))
2164+
if self.isASCII {
2165+
return self.isAsciiIdentifierContinue
21672166
}
21682167

21692168
// N1518: Recommendations for extended identifier characters for C and C++
21702169
// Proposed Annex X.1: Ranges of characters allowed
2170+
let c = self.value
21712171
return c == 0x00A8 || c == 0x00AA || c == 0x00AD || c == 0x00AF
21722172
|| (c >= 0x00B2 && c <= 0x00B5) || (c >= 0x00B7 && c <= 0x00BA)
21732173
|| (c >= 0x00BC && c <= 0x00BE) || (c >= 0x00C0 && c <= 0x00D6)
@@ -2217,17 +2217,16 @@ extension Unicode.Scalar {
22172217
}
22182218

22192219
var isValidIdentifierStartCodePoint: Bool {
2220-
guard self.isValidIdentifierContinuationCodePoint else {
2221-
return false
2220+
if (self.isASCII) {
2221+
return self.isAsciiIdentifierStart
22222222
}
2223-
2224-
let c = self.value
2225-
if c < 0x80 && (self.isDigit || c == UInt8(ascii: "$")) {
2223+
guard self.isValidIdentifierContinuationCodePoint else {
22262224
return false
22272225
}
22282226

22292227
// N1518: Recommendations for extended identifier characters for C and C++
22302228
// Proposed Annex X.2: Ranges of characters disallowed initially
2229+
let c = self.value
22312230
if ((c >= 0x0300 && c <= 0x036F) ||
22322231
(c >= 0x1DC0 && c <= 0x1DFF) ||
22332232
(c >= 0x20D0 && c <= 0x20FF) ||

0 commit comments

Comments
 (0)