Skip to content

[Parse] Avoid 'inout' operator passing in Character.Info testing #932

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 47 additions & 146 deletions Sources/SwiftParser/CharacterInfo.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,11 @@ extension Character {
self.rawValue = rawValue
}

static let SPACE = Character.Info(rawValue: 0x01) // ' '
static let DIGIT = Character.Info(rawValue: 0x02) // 0-9
static let XLETTER = Character.Info(rawValue: 0x04) // a-f,A-F
static let UPPER = Character.Info(rawValue: 0x08) // A-Z
static let LOWER = Character.Info(rawValue: 0x10) // a-z
static let UNDER = Character.Info(rawValue: 0x20) // _
static let PERIOD = Character.Info(rawValue: 0x40) // .
static let PUNCT = Character.Info(rawValue: 0x80) // `$@()

static let XUPPER: Character.Info = [ .XLETTER, .UPPER ]

static let XLOWER: Character.Info = [ .XLETTER, .LOWER ]
static let IDENT_START: Self = .init(rawValue: 0x01)
static let IDENT_CONT: Self = .init(rawValue: 0x02)
static let DECIMAL: Self = .init(rawValue: 0x04)
static let HEX: Self = .init(rawValue: 0x08)
static let LETTER: Self = .init(rawValue: 0x10)
}
}

Expand All @@ -42,167 +35,75 @@ extension Unicode.Scalar {
/// to be allowed to appear in a starting position in a programming language
/// identifier.
var isAsciiIdentifierStart: Bool {
return self.testRawInfoTable { entry in
!entry.intersection([ .UPPER, .LOWER, .UNDER, ]).isEmpty
}
self.testCharacterInfo(.IDENT_START)
}

/// A Boolean value indicating whether this scalar is one which is recommended
/// to be allowed to appear in a non-starting position in a programming
/// language identifier.
var isAsciiIdentifierContinue: Bool {
return self.testRawInfoTable { entry in
!entry.intersection([ .UPPER, .LOWER, .DIGIT, .UNDER ]).isEmpty
}
self.testCharacterInfo(.IDENT_CONT)
}

/// A Boolean value indicating whether this scalar is an ASCII character used
/// for the representation of base-10 numbers.
var isDigit: Bool {
return self.testRawInfoTable { entry in
entry.contains(.DIGIT)
}
self.testCharacterInfo(.DECIMAL)
}

/// A Boolean value indicating whether this scalar is considered to be either
/// an uppercase or lowercase ASCII character.
var isLetter: Bool {
return self.testRawInfoTable { entry in
!entry.intersection([ .UPPER, .LOWER ]).isEmpty
}
self.testCharacterInfo(.LETTER)
}

/// A Boolean value indicating whether this scalar is an ASCII character
/// commonly used for the representation of hexadecimal numbers.
var isHexDigit: Bool {
return self.testRawInfoTable { entry in
!entry.intersection([ .DIGIT, .XLETTER ]).isEmpty
}
self.testCharacterInfo(.HEX)
}
}

extension Unicode.Scalar {
private func testRawInfoTable(
_ performTest: (Character.Info) -> Bool
private func testCharacterInfo(
_ match: Character.Info
) -> Bool {
return self.isASCII && withUnsafePointer(to: &InfoTable) { InfoTable in
let infoPtr = UnsafeRawBufferPointer(start: InfoTable, count: 0x80)
.assumingMemoryBound(to: Character.Info.self)
return performTest(infoPtr[Int(self.value)])
}
}
}
let info: Character.Info
switch self.value {
case
// '0'-'9'
48, 49, 50, 51, 52, 53, 54, 55, 56, 57:
info = [.IDENT_CONT, .DECIMAL, .HEX]

case
// 'A'-'F'
65, 66, 67, 68, 69, 70,
// 'a'-'f'
97, 98, 99, 100, 101, 102:
info = [.IDENT_START, .IDENT_CONT, .HEX, .LETTER]

case
// 'G'-'Z'
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
89, 90,
// 'g'-'z'
103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
118, 119, 120, 121, 122:
info = [.IDENT_START, .IDENT_CONT, .LETTER]

extension UnsafeRawBufferPointer {
/// Returns a typed buffer to the memory referenced by this buffer,
/// assuming that the memory is already bound to the specified type.
///
/// Use this method when you have a raw buffer to memory that has *already*
/// been bound to the specified type. The memory starting at this pointer
/// must be bound to the type `T`. Accessing memory through the returned
/// pointer is undefined if the memory has not been bound to `T`. To bind
/// memory to `T`, use `bindMemory(to:capacity:)` instead of this method.
///
/// - Note: The buffer's base address must match the
/// alignment of `T` (as reported by `MemoryLayout<T>.alignment`).
/// That is, `Int(bitPattern: self.baseAddress) % MemoryLayout<T>.alignment`
/// must equal zero.
///
/// - Parameter to: The type `T` that the memory has already been bound to.
/// - Returns: A typed pointer to the same memory as this raw pointer.
fileprivate func assumingMemoryBound<T>(
to: T.Type
) -> UnsafeBufferPointer<T> {
guard let s = self.baseAddress else {
return .init(start: nil, count: 0)
case
// '_'
95:
info = [.IDENT_START, .IDENT_CONT]

case
// '$'
36:
info = [.IDENT_CONT]

default:
info = []
}
let c = self.count
let n = c / MemoryLayout<T>.stride
return .init(start: s.assumingMemoryBound(to: T.self), count: n)
return info.contains(match)
}
}

private var InfoTable: CharacterInfoTable = (
// 0 NUL 1 SOH 2 STX 3 ETX
// 4 EOT 5 ENQ 6 ACK 7 BEL
[], [], [], [],
[], [], [], [],
// 8 BS 9 HT 10 NL 11 VT
//12 NP 13 CR 14 SO 15 SI
[], [], [], [],
[], [], [], [],
//16 DLE 17 DC1 18 DC2 19 DC3
//20 DC4 21 NAK 22 SYN 23 ETB
[], [], [], [],
[], [], [], [],
//24 CAN 25 EM 26 SUB 27 ESC
//28 FS 29 GS 30 RS 31 US
[], [], [], [],
[], [], [], [],
//32 SP 33 ! 34 " 35 #
//36 $ 37 % 38 & 39 '
.SPACE, [], [], [],
.PUNCT, [], [], [],
//40 ( 41 ) 42 * 43 +
//44 , 45 - 46 . 47 /
.PUNCT, .PUNCT, [] , [],
[], [], .PERIOD, [],
//48 0 49 1 50 2 51 3
//52 4 53 5 54 6 55 7
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
.DIGIT, .DIGIT, .DIGIT, .DIGIT,
//56 8 57 9 58 : 59 ;
//60 < 61 = 62 > 63 ?
.DIGIT, .DIGIT, [], [],
[], [], [], [],
//64 @ 65 A 66 B 67 C
//68 D 69 E 70 F 71 G
.PUNCT, .XUPPER, .XUPPER, .XUPPER,
.XUPPER, .XUPPER, .XUPPER, .UPPER,
//72 H 73 I 74 J 75 K
//76 L 77 M 78 N 79 O
.UPPER, .UPPER, .UPPER, .UPPER,
.UPPER, .UPPER, .UPPER, .UPPER,
//80 P 81 Q 82 R 83 S
//84 T 85 U 86 V 87 W
.UPPER, .UPPER, .UPPER, .UPPER,
.UPPER, .UPPER, .UPPER, .UPPER,
//88 X 89 Y 90 Z 91 [
//92 \ 93 ] 94 ^ 95 _
.UPPER, .UPPER, .UPPER, [],
.PUNCT, [], [], .UNDER,
//96 ` 97 a 98 b 99 c
//100 d 101 e 102 f 103 g
.PUNCT, .XLOWER, .XLOWER, .XLOWER,
.XLOWER, .XLOWER, .XLOWER, .LOWER,
//104 h 105 i 106 j 107 k
//108 l 109 m 110 n 111 o
.LOWER, .LOWER, .LOWER, .LOWER,
.LOWER, .LOWER, .LOWER, .LOWER,
//112 p 113 q 114 r 115 s
//116 t 117 u 118 v 119 w
.LOWER, .LOWER, .LOWER, .LOWER,
.LOWER, .LOWER, .LOWER, .LOWER,
//120 x 121 y 122 z 123 {
//124 | 125 } 126 ~ 127 DEL
.LOWER, .LOWER, .LOWER, [],
[], [] , [] , [])

private typealias CharacterInfoTable = (
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info,
Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info, Character.Info
)
15 changes: 7 additions & 8 deletions Sources/SwiftParser/Lexer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2161,13 +2161,13 @@ extension Lexer.Cursor {

extension Unicode.Scalar {
var isValidIdentifierContinuationCodePoint: Bool {
let c = self.value
if c < 0x80 {
return self.isAsciiIdentifierContinue || c == UInt32(UInt8(ascii: "$"))
if self.isASCII {
return self.isAsciiIdentifierContinue
}

// N1518: Recommendations for extended identifier characters for C and C++
// Proposed Annex X.1: Ranges of characters allowed
let c = self.value
return c == 0x00A8 || c == 0x00AA || c == 0x00AD || c == 0x00AF
|| (c >= 0x00B2 && c <= 0x00B5) || (c >= 0x00B7 && c <= 0x00BA)
|| (c >= 0x00BC && c <= 0x00BE) || (c >= 0x00C0 && c <= 0x00D6)
Expand Down Expand Up @@ -2217,17 +2217,16 @@ extension Unicode.Scalar {
}

var isValidIdentifierStartCodePoint: Bool {
guard self.isValidIdentifierContinuationCodePoint else {
return false
if (self.isASCII) {
return self.isAsciiIdentifierStart
}

let c = self.value
if c < 0x80 && (self.isDigit || c == UInt8(ascii: "$")) {
guard self.isValidIdentifierContinuationCodePoint else {
return false
}

// N1518: Recommendations for extended identifier characters for C and C++
// Proposed Annex X.2: Ranges of characters disallowed initially
let c = self.value
if ((c >= 0x0300 && c <= 0x036F) ||
(c >= 0x1DC0 && c <= 0x1DFF) ||
(c >= 0x20D0 && c <= 0x20FF) ||
Expand Down