|
| 1 | +//===---------- SyntaxText.swift - Unowned String Representation ---------===// |
| 2 | +// |
| 3 | +// This source file is part of the Swift.org open source project |
| 4 | +// |
| 5 | +// Copyright (c) 2014 - 2022 Apple Inc. and the Swift project authors |
| 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception |
| 7 | +// |
| 8 | +// See https://swift.org/LICENSE.txt for license information |
| 9 | +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +#if canImport(Darwin) |
| 14 | +import Darwin |
| 15 | +#elseif canImport(Glibc) |
| 16 | +import Glibc |
| 17 | +#endif |
| 18 | + |
| 19 | +/// Represent a string. |
| 20 | +/// |
| 21 | +/// This type does not own the string data. The data reside in some other buffer |
| 22 | +/// whose lifetime extends past that of the SyntaxText. |
| 23 | +/// |
| 24 | +/// `SyntaxText` is a `Collection` of `UInt8` which is _expected_ to be a UTF8 |
| 25 | +/// encoded byte sequence. However, since that is essentialy just a span of a |
| 26 | +/// memory buffer, it may contain ill-formed UTF8 sequences. And their |
| 27 | +/// comparision (e.g.`==`, hasPrefix()) are purely based on the byte squences, |
| 28 | +/// without any Unicode normalization or anything. |
| 29 | +/// |
| 30 | +/// Since it's just a byte sequence, `SyntaxText` can represent the exact source |
| 31 | +/// buffer regardless of whether it is a valid UTF8. When creating |
| 32 | +/// `Swift.String`, ill-formed UTF8 sequences are replaced with the Unicode |
| 33 | +/// replacement character (`\u{FFFD}`). |
| 34 | +@_spi(Testing) // SPI name is subject to change |
| 35 | +public struct SyntaxText { |
| 36 | + var buffer: UnsafeBufferPointer<UInt8> |
| 37 | + |
| 38 | + public init(baseAddress: UnsafePointer<UInt8>?, count: Int) { |
| 39 | + assert(count == 0 || baseAddress != nil, |
| 40 | + "If count is not zero, base address must be exist") |
| 41 | + buffer = .init(start: baseAddress, count: count) |
| 42 | + } |
| 43 | + |
| 44 | + /// Creates an empty `SyntaxText` |
| 45 | + public init() { |
| 46 | + self.init(baseAddress: nil, count: 0) |
| 47 | + } |
| 48 | + |
| 49 | + /// Creates a `SyntaxText` from a `StaticString` |
| 50 | + public init(_ string: StaticString) { |
| 51 | + self.init(baseAddress: string.utf8Start, count: string.utf8CodeUnitCount) |
| 52 | + } |
| 53 | + |
| 54 | + /// Creates a `SyntaxText` over the same memory as the given slice. |
| 55 | + public init(rebasing slice: SubSequence) { |
| 56 | + self.init( |
| 57 | + baseAddress: slice.base.baseAddress?.advanced(by: slice.startIndex), |
| 58 | + count: slice.count) |
| 59 | + } |
| 60 | + |
| 61 | + /// Base address of the memory range this string refers to. |
| 62 | + public var baseAddress: UnsafePointer<UInt8>? { |
| 63 | + buffer.baseAddress |
| 64 | + } |
| 65 | + |
| 66 | + /// Byte length of this string. |
| 67 | + public var count: Int { |
| 68 | + buffer.count |
| 69 | + } |
| 70 | + |
| 71 | + /// A Boolean value indicating whether a string has no characters. |
| 72 | + public var isEmpty: Bool { |
| 73 | + buffer.isEmpty |
| 74 | + } |
| 75 | + |
| 76 | + /// Returns `true` if the memory range of this string is a part of `other`. |
| 77 | + /// |
| 78 | + /// `SyntaxText(rebasing: text[n ..< m]).isSliceOf(text)` is always true as |
| 79 | + /// long as `n` and `m` are valid indices. |
| 80 | + public func isSlice(of other: SyntaxText) -> Bool { |
| 81 | + // If either of it is empty, returns 'true' only if both are empty. |
| 82 | + // Otherwise, returns 'false'. |
| 83 | + guard !self.isEmpty && !other.isEmpty else { |
| 84 | + return self.isEmpty && other.isEmpty |
| 85 | + } |
| 86 | + return (other.baseAddress! <= self.baseAddress! && |
| 87 | + self.baseAddress! + count <= other.baseAddress! + other.count) |
| 88 | + } |
| 89 | + |
| 90 | + /// Returns `true` if `other` is a substring of this `SyntaxText`. |
| 91 | + public func contains(_ other: SyntaxText) -> Bool { |
| 92 | + return firstRange(of: other) != nil |
| 93 | + } |
| 94 | + |
| 95 | + /// Finds and returns the range of the first occurrence of `other` within this |
| 96 | + /// string. Returns `nil` if `other` is not found. |
| 97 | + public func firstRange(of other: SyntaxText) -> Range<Index>? { |
| 98 | + if other.isEmpty { return nil } |
| 99 | + let stop = self.count - other.count |
| 100 | + var start = 0 |
| 101 | + // If 'other' is longer than 'self', 'stop' is less than zero, so the |
| 102 | + // condition is never satisfied. |
| 103 | + while start <= stop { |
| 104 | + // Force unwrappings are safe because we know 'self' and 'other' are both |
| 105 | + // not empty. |
| 106 | + if compareMemory(self.baseAddress! + start, other.baseAddress!, other.count) { |
| 107 | + return start ..< (start + other.count) |
| 108 | + } else { |
| 109 | + start += 1 |
| 110 | + } |
| 111 | + } |
| 112 | + return nil |
| 113 | + } |
| 114 | + |
| 115 | + /// Returns `true` if the string begins with the specified prefix. |
| 116 | + public func hasPrefix(_ other: SyntaxText) -> Bool { |
| 117 | + guard self.count >= other.count else { return false } |
| 118 | + guard !other.isEmpty else { return true } |
| 119 | + let prefixSlice = self[0 ..< other.count] |
| 120 | + return Self(rebasing: prefixSlice) == other |
| 121 | + } |
| 122 | + |
| 123 | + /// Returns `true` if the string ends with the specified suffix. |
| 124 | + public func hasSuffix(_ other: SyntaxText) -> Bool { |
| 125 | + guard self.count >= other.count else { return false } |
| 126 | + guard !other.isEmpty else { return true } |
| 127 | + let suffixSlice = self[(self.count - other.count) ..< self.count] |
| 128 | + return Self(rebasing: suffixSlice) == other |
| 129 | + } |
| 130 | +} |
| 131 | + |
| 132 | +/// `SyntaxText` is a collection of `UInt8`. |
| 133 | +extension SyntaxText: RandomAccessCollection { |
| 134 | + public typealias Element = UInt8 |
| 135 | + public typealias Index = Int |
| 136 | + public typealias SubSequence = Slice<SyntaxText> |
| 137 | + |
| 138 | + public var startIndex: Index { buffer.startIndex } |
| 139 | + public var endIndex: Index { buffer.endIndex } |
| 140 | + |
| 141 | + public subscript(position: Index) -> Element { |
| 142 | + get { return buffer[position] } |
| 143 | + } |
| 144 | +} |
| 145 | + |
| 146 | +extension SyntaxText: Hashable { |
| 147 | + public static func ==(lhs: SyntaxText, rhs: SyntaxText) -> Bool { |
| 148 | + if lhs.buffer.count != rhs.buffer.count { |
| 149 | + return false |
| 150 | + } |
| 151 | + if lhs.isEmpty || lhs.buffer.baseAddress == rhs.buffer.baseAddress { |
| 152 | + return true |
| 153 | + } |
| 154 | + return compareMemory(lhs.baseAddress!, rhs.baseAddress!, lhs.count) |
| 155 | + } |
| 156 | + |
| 157 | + public func hash(into hasher: inout Hasher) { |
| 158 | + hasher.combine(bytes: .init(buffer)) |
| 159 | + } |
| 160 | +} |
| 161 | + |
| 162 | +extension SyntaxText: ExpressibleByStringLiteral { |
| 163 | + public init(stringLiteral value: StaticString) { self.init(value) } |
| 164 | + public init(unicodeScalarLiteral value: StaticString) { self.init(value) } |
| 165 | + public init(extendedGraphemeClusterLiteral value: StaticString) { self.init(value) } |
| 166 | +} |
| 167 | + |
| 168 | +extension SyntaxText: CustomStringConvertible { |
| 169 | + public var description: String { String(syntaxText: self) } |
| 170 | +} |
| 171 | + |
| 172 | +extension SyntaxText: CustomDebugStringConvertible { |
| 173 | + public var debugDescription: String { description.debugDescription } |
| 174 | +} |
| 175 | + |
| 176 | +extension String { |
| 177 | + /// Creates a `String` from a `SyntaxText`. |
| 178 | + /// |
| 179 | + /// Ill-formed UTF-8 sequences in `syntaxText` are replaced with the Unicode |
| 180 | + /// replacement character `\u{FFFD}`. |
| 181 | + @_spi(Testing) |
| 182 | + public init(syntaxText: SyntaxText) { |
| 183 | + guard !syntaxText.isEmpty else { |
| 184 | + self = "" |
| 185 | + return |
| 186 | + } |
| 187 | + if #available(macOS 11.0, iOS 14.0, watchOS 7.0, tvOS 14.0, *) { |
| 188 | + self.init(unsafeUninitializedCapacity: syntaxText.count) { strBuffer in |
| 189 | + strBuffer.initialize(from: syntaxText.buffer).1 |
| 190 | + } |
| 191 | + } else { |
| 192 | + self.init(decoding: syntaxText, as: UTF8.self) |
| 193 | + } |
| 194 | + } |
| 195 | + |
| 196 | + /// Runs `body` with a `SyntaxText` that refers the contiguous memory of this |
| 197 | + /// string. Like `String.withUTF8(_:)`, this may mutates the string if this |
| 198 | + /// string was not contiguous. |
| 199 | + @_spi(Testing) |
| 200 | + public mutating func withSyntaxText<R>( |
| 201 | + _ body: (SyntaxText) throws -> R |
| 202 | + ) rethrows -> R { |
| 203 | + try withUTF8 { utf8 in |
| 204 | + try body(SyntaxText(baseAddress: utf8.baseAddress, count: utf8.count)) |
| 205 | + } |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +private func compareMemory( |
| 210 | + _ s1: UnsafePointer<UInt8>, _ s2: UnsafePointer<UInt8>, _ count: Int |
| 211 | +) -> Bool { |
| 212 | + assert(count > 0) |
| 213 | +#if canImport(Darwin) |
| 214 | + return Darwin.memcmp(s1, s2, count) == 0 |
| 215 | +#elseif canImport(Glibc) |
| 216 | + return Glibc.memcmp(s1, s2, count) == 0 |
| 217 | +#else |
| 218 | + return UnsafeBufferPointer(start: s1, count: count) |
| 219 | + .elementsEqual(UnsafeBufferPointer(start: s2, count: count)) |
| 220 | +#endif |
| 221 | +} |
0 commit comments