Skip to content

Commit ccd9926

Browse files
committed
CSV parsing example
1 parent 406a025 commit ccd9926

File tree

1 file changed

+144
-0
lines changed

1 file changed

+144
-0
lines changed
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
struct ParseError: Error {
2+
var message: String
3+
}
4+
5+
let comma = ",".utf16.first!
6+
let newline = "\n".utf16.first!
7+
let carriageReturn = "\n".utf16.first!
8+
let quote = "\"".utf16.first!
9+
10+
@inline(__always) func parseQuotedField(_ remainder: inout Substring) throws -> Substring? {
11+
var result: Substring = "" // we accumulate the result
12+
13+
while !remainder.isEmpty {
14+
guard let nextQuoteIndex = remainder.index(of: "\"") else {
15+
throw ParseError(message: "Expected a closing \"")
16+
}
17+
18+
// Append until the next quote
19+
result += remainder.prefix(upTo: nextQuoteIndex)
20+
remainder.remove(upToAndIncluding: nextQuoteIndex)
21+
22+
if let peek = remainder.utf16.first {
23+
switch peek {
24+
case quote: // two quotes after each other is an escaped quote
25+
remainder.removeFirst()
26+
result.append("\"")
27+
case comma: // field ending
28+
remainder.removeFirst()
29+
return result
30+
default:
31+
return result
32+
}
33+
} else {
34+
// End of the string
35+
return result
36+
}
37+
}
38+
39+
throw ParseError(message: "Expected a closing quote")
40+
}
41+
42+
// Consume a single field from `remainder`
43+
@inline(__always) func parseField(_ remainder: inout Substring) throws -> Substring? {
44+
guard let start = remainder.utf16.first else { return nil }
45+
switch start {
46+
case quote:
47+
remainder.removeFirst() // remove the first quote
48+
return try parseQuotedField(&remainder)
49+
case newline:
50+
return nil
51+
default:
52+
// This is the most common case and should ideally be super fast...
53+
var index = remainder.utf16.startIndex
54+
while index < remainder.utf16.endIndex {
55+
switch remainder.utf16[index] {
56+
case comma:
57+
defer { remainder.remove(upToAndIncluding: index) }
58+
return remainder.prefix(upTo: index)
59+
case newline:
60+
let result = remainder.prefix(upTo: index)
61+
remainder.remove(upTo: index)
62+
return result
63+
default:
64+
remainder.utf16.formIndex(after: &index)
65+
}
66+
}
67+
let result = remainder
68+
remainder.removeAll()
69+
return result
70+
}
71+
}
72+
73+
extension Substring {
74+
mutating func remove(upTo index: Index) {
75+
self = suffix(from: index)
76+
}
77+
78+
mutating func remove(upToAndIncluding index: Index) {
79+
self = suffix(from: self.index(after: index))
80+
}
81+
}
82+
83+
// Consume a single line from `remainder`
84+
func parseLine<State>(_ remainder: inout Substring, result: inout State, processField: (inout State, Int, Substring) -> ()) throws -> Bool {
85+
var fieldNumber = 0
86+
87+
while let field = try parseField(&remainder) {
88+
processField(&result, fieldNumber, field)
89+
fieldNumber += 1
90+
}
91+
92+
if !remainder.isEmpty {
93+
let next = remainder.utf16[remainder.utf16.startIndex]
94+
guard next == carriageReturn || next == newline else {
95+
throw ParseError(message: "Expected a newline or CR, got \(next)")
96+
}
97+
98+
while let x = remainder.utf16.first, x == carriageReturn || x == newline {
99+
remainder.utf16.removeFirst()
100+
}
101+
}
102+
103+
return !remainder.isEmpty && fieldNumber > 0
104+
}
105+
106+
import Foundation
107+
108+
func time<Result>(name: StaticString = #function, line: Int = #line, _ f: () throws -> Result) rethrows -> Result {
109+
let startTime = DispatchTime.now()
110+
let result = try f()
111+
let endTime = DispatchTime.now()
112+
let diff = Double(endTime.uptimeNanoseconds - startTime.uptimeNanoseconds) / 1_000_000_000 as Double
113+
print("\(name) (line \(line)): \(diff) sec")
114+
return result
115+
}
116+
117+
try time {
118+
// URL for this file: http://www.maxmind.com/download/worldcities/worldcitiespop.txt.gz
119+
// let file = URL(fileURLWithPath: "/Users/chris/Downloads/1489325/stops.txt")
120+
let file = URL(fileURLWithPath: "worldcitiespop.txt")
121+
122+
// The + "\n" is a a trick by Ole Begemann, which forces the String to be a Swift String (not an NSString). It makes it more than twice as fast on my computer...
123+
let contents = try String(contentsOf: file, encoding: .isoLatin1) + ""
124+
125+
var remainder = contents[...]
126+
127+
var result: Int = 0
128+
var x: () = ()
129+
130+
while !remainder.isEmpty {
131+
_ = try parseLine(&remainder, result: &x, processField: { state, _, field in
132+
()
133+
})
134+
if result < 10 {
135+
print("result: \(result)")
136+
}
137+
result += 1
138+
if result % 100_000 == 0 {
139+
print(result)
140+
}
141+
}
142+
print(result)
143+
144+
}

0 commit comments

Comments
 (0)