Skip to content

Commit 7df5051

Browse files
committed
[RFC 9651] Add support for Display String type to RawStructuredFieldValues (apple#41)
Motivation: [RFC 9651](https://www.ietf.org/rfc/rfc9651.html) added the Display String Structured Type. Modifications: - Implement the parser and serializer for Display String in the RawStructuredFieldValues module. Result: The RawStructuredFieldValues module will support the Display String type.
1 parent 8b35bf4 commit 7df5051

File tree

7 files changed

+244
-0
lines changed

7 files changed

+244
-0
lines changed

Sources/RawStructuredFieldValues/ComponentTypes.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ extension BareItem {
110110

111111
case .date:
112112
throw StructuredHeaderError.invalidItem
113+
case .displayString:
114+
throw StructuredHeaderError.invalidItem
113115
}
114116
}
115117
}
@@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable {
141143

142144
/// A date item.
143145
case date(Int)
146+
147+
/// A display string item.
148+
case displayString(String)
144149
}
145150

146151
extension RFC9651BareItem: ExpressibleByBooleanLiteral {

Sources/RawStructuredFieldValues/Errors.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable {
2727
case invalidBoolean
2828
case invalidToken
2929
case invalidDate
30+
case invalidDisplayString
3031
case invalidList
3132
case invalidDictionary
3233
case missingKey
@@ -53,6 +54,7 @@ extension StructuredHeaderError {
5354
public static let invalidBoolean = StructuredHeaderError(.invalidBoolean)
5455
public static let invalidToken = StructuredHeaderError(.invalidToken)
5556
public static let invalidDate = StructuredHeaderError(.invalidDate)
57+
public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString)
5658
public static let invalidList = StructuredHeaderError(.invalidList)
5759
public static let invalidDictionary = StructuredHeaderError(.invalidDictionary)
5860
public static let missingKey = StructuredHeaderError(.missingKey)

Sources/RawStructuredFieldValues/FieldParser.swift

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ extension StructuredFieldValueParser {
224224
return try self._parseAToken()
225225
case asciiAt:
226226
return try self._parseADate()
227+
case asciiPercent:
228+
return try self._parseADisplayString()
227229
default:
228230
throw StructuredHeaderError.invalidItem
229231
}
@@ -491,6 +493,67 @@ extension StructuredFieldValueParser {
491493
return try self._parseAnIntegerOrDecimal(isDate: true)
492494
}
493495

496+
private mutating func _parseADisplayString() throws -> RFC9651BareItem {
497+
assert(self.underlyingData.first == asciiPercent)
498+
self.underlyingData.consumeFirst()
499+
500+
guard self.underlyingData.first == asciiDquote else {
501+
throw StructuredHeaderError.invalidDisplayString
502+
}
503+
504+
self.underlyingData.consumeFirst()
505+
506+
var byteArray = [UInt8]()
507+
508+
while let char = self.underlyingData.first {
509+
self.underlyingData.consumeFirst()
510+
511+
switch char {
512+
case 0x00...0x1F, 0x7F...:
513+
throw StructuredHeaderError.invalidDisplayString
514+
case asciiPercent:
515+
if self.underlyingData.count < 2 {
516+
throw StructuredHeaderError.invalidDisplayString
517+
}
518+
519+
let startIndex = self.underlyingData.startIndex
520+
let secondIndex = self.underlyingData.index(after: startIndex)
521+
let octetHex = self.underlyingData[...secondIndex]
522+
523+
self.underlyingData = self.underlyingData.dropFirst(2)
524+
525+
guard
526+
octetHex.allSatisfy({ asciiDigits.contains($0) || asciiLowercases.contains($0) }),
527+
let octet = UInt8.decodeHex(octetHex)
528+
else {
529+
throw StructuredHeaderError.invalidDisplayString
530+
}
531+
532+
byteArray.append(octet)
533+
case asciiDquote:
534+
let unicodeSequence = try byteArray.withUnsafeBytes {
535+
try $0.withMemoryRebound(to: CChar.self) {
536+
guard let baseAddress = $0.baseAddress else {
537+
throw StructuredHeaderError.invalidDisplayString
538+
}
539+
540+
return String(validatingUTF8: baseAddress)
541+
}
542+
}
543+
544+
guard let unicodeSequence else {
545+
throw StructuredHeaderError.invalidDisplayString
546+
}
547+
548+
return .displayString(unicodeSequence)
549+
default:
550+
byteArray.append(char)
551+
}
552+
}
553+
554+
throw StructuredHeaderError.invalidDisplayString
555+
}
556+
494557
private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> {
495558
var parameters = OrderedMap<Key, RFC9651BareItem>()
496559

@@ -643,3 +706,34 @@ extension StrippingStringEscapesCollection.Index: Comparable {
643706
lhs._baseIndex < rhs._baseIndex
644707
}
645708
}
709+
710+
extension UInt8 {
711+
fileprivate static func decodeHex<Bytes: RandomAccessCollection>(_ bytes: Bytes) -> Self?
712+
where Bytes.Element == Self {
713+
var result = Self(0)
714+
var power = Self(bytes.count)
715+
716+
for byte in bytes {
717+
power -= 1
718+
719+
guard let integer = Self.htoi(byte) else { return nil }
720+
result += integer << (power * 4)
721+
}
722+
723+
return result
724+
}
725+
726+
private static func htoi(_ value: Self) -> Self? {
727+
let charA = Self(UnicodeScalar("a").value)
728+
let char0 = Self(UnicodeScalar("0").value)
729+
730+
switch value {
731+
case char0...char0 + 9:
732+
return value - char0
733+
case charA...charA + 5:
734+
return value - charA + 10
735+
default:
736+
return nil
737+
}
738+
}
739+
}

Sources/RawStructuredFieldValues/FieldSerializer.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,26 @@ extension StructuredFieldValueSerializer {
213213
}
214214

215215
self.data.append(contentsOf: String(date, radix: 10).utf8)
216+
case .displayString(let displayString):
217+
let bytes = displayString.utf8
218+
219+
self.data.append(asciiPercent)
220+
self.data.append(asciiDquote)
221+
222+
for byte in bytes {
223+
if byte == asciiPercent
224+
|| byte == asciiDquote
225+
|| (0x00...0x1F).contains(byte)
226+
|| (0x7F...).contains(byte)
227+
{
228+
self.data.append(asciiPercent)
229+
self.data.append(contentsOf: String(byte, radix: 16, uppercase: false).utf8)
230+
} else {
231+
self.data.append(byte)
232+
}
233+
}
234+
235+
self.data.append(asciiDquote)
216236
}
217237
}
218238
}

Sources/sh-parser/main.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ extension RFC9651BareItem {
171171
return "decimal \(d)"
172172
case .date(let date):
173173
return "date \(date)"
174+
case .displayString(let displayString):
175+
return "display string \(displayString)"
174176
}
175177
}
176178
}

Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,16 @@ final class StructuredFieldParserTests: XCTestCase {
8787

8888
XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)")
8989
XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)")
90+
case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)):
91+
guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"],
92+
case .string(let typeValue) = typeDictionary["value"]
93+
else {
94+
XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)")
95+
return
96+
}
97+
98+
XCTAssertEqual(typeName, "displaystring", "\(fixtureName): Expected type displaystring, got type \(typeName)")
99+
XCTAssertEqual(typeValue, baseDisplayString, "\(fixtureName): Got \(baseDisplayString), expected \(typeValue)")
90100
default:
91101
XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)")
92102
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
[
2+
{
3+
"name": "basic display string (ascii content)",
4+
"raw": ["%\"foo bar\""],
5+
"header_type": "item",
6+
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}]
7+
},
8+
{
9+
"name": "all printable ascii",
10+
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""],
11+
"header_type": "item",
12+
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}]
13+
},
14+
{
15+
"name": "non-ascii display string (uppercase escaping)",
16+
"raw": ["%\"f%C3%BC%C3%BC\""],
17+
"canonical": ["%\"f%c3%bc%c3%bc\""],
18+
"header_type": "item",
19+
"must_fail": true
20+
},
21+
{
22+
"name": "non-ascii display string (lowercase escaping)",
23+
"raw": ["%\"f%c3%bc%c3%bc\""],
24+
"header_type": "item",
25+
"expected": [{"__type": "displaystring", "value": "füü"}, {}]
26+
},
27+
{
28+
"name": "tab in display string",
29+
"raw": ["%\"\t\""],
30+
"header_type": "item",
31+
"must_fail": true
32+
},
33+
{
34+
"name": "newline in display string",
35+
"raw": ["%\"\n\""],
36+
"header_type": "item",
37+
"must_fail": true
38+
},
39+
{
40+
"name": "single quoted display string",
41+
"raw": ["%'foo'"],
42+
"header_type": "item",
43+
"must_fail": true
44+
},
45+
{
46+
"name": "unquoted display string",
47+
"raw": ["%foo"],
48+
"header_type": "item",
49+
"must_fail": true
50+
},
51+
{
52+
"name": "display string missing initial quote",
53+
"raw": ["%foo\""],
54+
"header_type": "item",
55+
"must_fail": true
56+
},
57+
{
58+
"name": "unbalanced display string",
59+
"raw": ["%\"foo"],
60+
"header_type": "item",
61+
"must_fail": true
62+
},
63+
{
64+
"name": "display string quoting",
65+
"raw": ["%\"foo %22bar%22 \\ baz\""],
66+
"header_type": "item",
67+
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}]
68+
},
69+
{
70+
"name": "bad display string escaping",
71+
"raw": ["%\"foo %a"],
72+
"header_type": "item",
73+
"must_fail": true
74+
},
75+
{
76+
"name": "bad display string utf-8 (invalid 2-byte seq)",
77+
"raw": ["%\"%c3%28\""],
78+
"header_type": "item",
79+
"must_fail": true
80+
},
81+
{
82+
"name": "bad display string utf-8 (invalid sequence id)",
83+
"raw": ["%\"%a0%a1\""],
84+
"header_type": "item",
85+
"must_fail": true
86+
},
87+
{
88+
"name": "bad display string utf-8 (invalid hex)",
89+
"raw": ["%\"%g0%1w\""],
90+
"header_type": "item",
91+
"must_fail": true
92+
},
93+
{
94+
"name": "bad display string utf-8 (invalid 3-byte seq)",
95+
"raw": ["%\"%e2%28%a1\""],
96+
"header_type": "item",
97+
"must_fail": true
98+
},
99+
{
100+
"name": "bad display string utf-8 (invalid 4-byte seq)",
101+
"raw": ["%\"%f0%28%8c%28\""],
102+
"header_type": "item",
103+
"must_fail": true
104+
},
105+
{
106+
"name": "BOM in display string",
107+
"raw": ["%\"BOM: %ef%bb%bf\""],
108+
"header_type": "item",
109+
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}]
110+
}
111+
]

0 commit comments

Comments
 (0)