Skip to content

Commit 7dba88d

Browse files
authored
Merge pull request #2374 from drodriguez/cookies-comma-handling
Improve cookie parsing algorithm.
2 parents 16df512 + 0faf933 commit 7dba88d

File tree

2 files changed

+314
-66
lines changed

2 files changed

+314
-66
lines changed

Foundation/HTTPCookie.swift

Lines changed: 181 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,28 @@ extension HTTPCookiePropertyKey {
6969
internal static let created = HTTPCookiePropertyKey(rawValue: "Created")
7070
}
7171

72+
internal extension HTTPCookiePropertyKey {
73+
internal static let httpOnly = HTTPCookiePropertyKey(rawValue: "HttpOnly")
74+
75+
static private let _setCookieAttributes: [String: HTTPCookiePropertyKey] = {
76+
// Only some attributes are valid in the Set-Cookie header.
77+
let validProperties: [HTTPCookiePropertyKey] = [
78+
.expires, .maximumAge, .domain, .path, .secure, .comment,
79+
.commentURL, .discard, .port, .version, .httpOnly
80+
]
81+
let canonicalNames = validProperties.map { $0.rawValue.lowercased() }
82+
return Dictionary(uniqueKeysWithValues: zip(canonicalNames, validProperties))
83+
}()
84+
85+
init?(attributeName: String) {
86+
let canonical = attributeName.lowercased()
87+
switch HTTPCookiePropertyKey._setCookieAttributes[canonical] {
88+
case let property?: self = property
89+
case nil: return nil
90+
}
91+
}
92+
}
93+
7294
/// `HTTPCookie` represents an http cookie.
7395
///
7496
/// An `HTTPCookie` instance represents a single http cookie. It is
@@ -123,11 +145,6 @@ open class HTTPCookie : NSObject {
123145
static let _allFormatters: [DateFormatter]
124146
= [_formatter1, _formatter2, _formatter3]
125147

126-
static let _attributes: [HTTPCookiePropertyKey]
127-
= [.name, .value, .originURL, .version, .domain,
128-
.path, .secure, .expires, .comment, .commentURL,
129-
.discard, .maximumAge, .port]
130-
131148
/// Initialize a HTTPCookie object with a dictionary of parameters
132149
///
133150
/// - Parameter properties: The dictionary of properties to be used to
@@ -348,8 +365,12 @@ open class HTTPCookie : NSObject {
348365
} else {
349366
_commentURL = nil
350367
}
351-
_HTTPOnly = false
352368

369+
if let httpOnlyString = properties[.httpOnly] as? String {
370+
_HTTPOnly = httpOnlyString == "TRUE"
371+
} else {
372+
_HTTPOnly = false
373+
}
353374

354375
_properties = [
355376
.created : Date().timeIntervalSinceReferenceDate, // Cocoa Compatibility
@@ -412,30 +433,74 @@ open class HTTPCookie : NSObject {
412433
/// - Returns: An array of HTTPCookie objects
413434
open class func cookies(withResponseHeaderFields headerFields: [String : String], for URL: URL) -> [HTTPCookie] {
414435

415-
//HTTP Cookie parsing based on RFC 6265: https://tools.ietf.org/html/rfc6265
416-
//Though RFC6265 suggests that multiple cookies cannot be folded into a single Set-Cookie field, this is
417-
//pretty common. It also suggests that commas and semicolons among other characters, cannot be a part of
436+
// HTTP Cookie parsing based on RFC 6265: https://tools.ietf.org/html/rfc6265
437+
// Though RFC6265 suggests that multiple cookies cannot be folded into a single Set-Cookie field, this is
438+
// pretty common. It also suggests that commas and semicolons among other characters, cannot be a part of
418439
// names and values. This implementation takes care of multiple cookies in the same field, however it doesn't
419-
//support commas and semicolons in names and values(except for dates)
440+
// support commas and semicolons in names and values(except for dates)
420441

421442
guard let cookies: String = headerFields["Set-Cookie"] else { return [] }
422443

423-
let nameValuePairs = cookies.components(separatedBy: ";") //split the name/value and attribute/value pairs
424-
.map({$0.trim()}) //trim whitespaces
425-
.map({removeCommaFromDate($0)}) //get rid of commas in dates
426-
.flatMap({$0.components(separatedBy: ",")}) //cookie boundaries are marked by commas
427-
.map({$0.trim()}) //trim again
428-
.filter({$0.caseInsensitiveCompare("HTTPOnly") != .orderedSame}) //we don't use HTTPOnly, do we?
429-
.flatMap({createNameValuePair(pair: $0)}) //create Name and Value properties
444+
var httpCookies: [HTTPCookie] = []
430445

431-
//mark cookie boundaries in the name-value array
432-
var cookieIndices = (0..<nameValuePairs.count).filter({nameValuePairs[$0].hasPrefix("Name")})
433-
cookieIndices.append(nameValuePairs.count)
446+
// Let's do old school parsing, which should allow us to handle the
447+
// embedded commas correctly.
448+
var idx: String.Index = cookies.startIndex
449+
let end: String.Index = cookies.endIndex
450+
while idx < end {
451+
// Skip leading spaces.
452+
while idx < end && cookies[idx].isSpace {
453+
idx = cookies.index(after: idx)
454+
}
455+
let cookieStartIdx: String.Index = idx
456+
var cookieEndIdx: String.Index = idx
457+
458+
while idx < end {
459+
// Scan to the next comma, but check that the comma is not a
460+
// legal comma in a value, by looking ahead for the token,
461+
// which indicates the comma was separating cookies.
462+
let cookiesRest = cookies[idx..<end]
463+
if let commaIdx = cookiesRest.firstIndex(of: ",") {
464+
// We are looking for WSP* TOKEN_CHAR+ WSP* '='
465+
var lookaheadIdx = cookies.index(after: commaIdx)
466+
// Skip whitespace
467+
while lookaheadIdx < end && cookies[lookaheadIdx].isSpace {
468+
lookaheadIdx = cookies.index(after: lookaheadIdx)
469+
}
470+
// Skip over the token characters
471+
var tokenLength = 0
472+
while lookaheadIdx < end && cookies[lookaheadIdx].isTokenCharacter {
473+
lookaheadIdx = cookies.index(after: lookaheadIdx)
474+
tokenLength += 1
475+
}
476+
// Skip whitespace
477+
while lookaheadIdx < end && cookies[lookaheadIdx].isSpace {
478+
lookaheadIdx = cookies.index(after: lookaheadIdx)
479+
}
480+
// Check there was a token, and there's an equals.
481+
if lookaheadIdx < end && cookies[lookaheadIdx] == "=" && tokenLength > 0 {
482+
// We found a token after the comma, this is a cookie
483+
// separator, and not an embedded comma.
484+
idx = cookies.index(after: commaIdx)
485+
cookieEndIdx = commaIdx
486+
break
487+
}
488+
// Otherwise, keep scanning from the comma.
489+
idx = cookies.index(after: commaIdx)
490+
cookieEndIdx = idx
491+
} else {
492+
// No more commas, skip to the end.
493+
idx = end
494+
cookieEndIdx = end
495+
break
496+
}
497+
}
434498

435-
//bake the cookies
436-
var httpCookies: [HTTPCookie] = []
437-
for i in 0..<cookieIndices.count-1 {
438-
if let aCookie = createHttpCookie(url: URL, pairs: nameValuePairs[cookieIndices[i]..<cookieIndices[i+1]]) {
499+
if cookieEndIdx <= cookieStartIdx {
500+
continue
501+
}
502+
503+
if let aCookie = createHttpCookie(url: URL, cookie: String(cookies[cookieStartIdx..<cookieEndIdx])) {
439504
httpCookies.append(aCookie)
440505
}
441506
}
@@ -444,62 +509,101 @@ open class HTTPCookie : NSObject {
444509
}
445510

446511
//Bake a cookie
447-
private class func createHttpCookie(url: URL, pairs: ArraySlice<String>) -> HTTPCookie? {
512+
private class func createHttpCookie(url: URL, cookie: String) -> HTTPCookie? {
448513
var properties: [HTTPCookiePropertyKey : Any] = [:]
449-
for pair in pairs {
450-
let name = pair.components(separatedBy: "=")[0]
451-
var value = pair.components(separatedBy: "\(name)=")[1] //a value can have an "="
452-
if canonicalize(name) == .expires {
453-
value = value.unmaskCommas() //re-insert the comma
514+
let scanner = Scanner(string: cookie)
515+
516+
guard let nameValuePair = scanner.scanUpToString(";") else {
517+
// if the scanner does not read anything, there's no cookie
518+
return nil
519+
}
520+
521+
guard case (let name?, let value?) = splitNameValue(nameValuePair) else {
522+
return nil
523+
}
524+
525+
properties[.name] = name
526+
properties[.value] = value
527+
properties[.originURL] = url
528+
529+
while scanner.scanString(";") != nil {
530+
if let attribute = scanner.scanUpToString(";") {
531+
switch splitNameValue(attribute) {
532+
case (nil, _):
533+
// ignore empty attribute names
534+
break
535+
case (let name?, nil):
536+
switch HTTPCookiePropertyKey(attributeName: name) {
537+
case .secure?:
538+
properties[.secure] = "TRUE"
539+
case .discard?:
540+
properties[.discard] = "TRUE"
541+
case .httpOnly?:
542+
properties[.httpOnly] = "TRUE"
543+
default:
544+
// ignore unknown attributes
545+
break
546+
}
547+
case (let name?, let value?):
548+
switch HTTPCookiePropertyKey(attributeName: name) {
549+
case .comment?:
550+
properties[.comment] = value
551+
case .commentURL?:
552+
properties[.commentURL] = value
553+
case .domain?:
554+
properties[.domain] = value
555+
case .maximumAge?:
556+
properties[.maximumAge] = value
557+
case .path?:
558+
properties[.path] = value
559+
case .port?:
560+
properties[.port] = value
561+
case .version?:
562+
properties[.version] = value
563+
case .expires?:
564+
properties[.expires] = value
565+
default:
566+
// ignore unknown attributes
567+
break
568+
}
569+
}
454570
}
455-
properties[canonicalize(name)] = value
456571
}
457572

458573
// If domain wasn't provided, extract it from the URL
459574
if properties[.domain] == nil {
460575
properties[.domain] = url.host
461576
}
462577

463-
//the default Path is "/"
464-
if properties[.path] == nil {
578+
// the default Path is "/"
579+
if let path = properties[.path] as? String, path.first == "/" {
580+
// do nothing
581+
} else {
465582
properties[.path] = "/"
466583
}
467584

468585
return HTTPCookie(properties: properties)
469586
}
470587

471-
//we pass this to a map()
472-
private class func removeCommaFromDate(_ value: String) -> String {
473-
if value.hasPrefix("Expires") || value.hasPrefix("expires") {
474-
return value.maskCommas()
588+
private class func splitNameValue(_ pair: String) -> (name: String?, value: String?) {
589+
let scanner = Scanner(string: pair)
590+
591+
guard let name = scanner.scanUpToString("=")?.trim(),
592+
!name.isEmpty else {
593+
// if the scanner does not read anything, or the trimmed name is
594+
// empty, there's no name=value
595+
return (nil, nil)
475596
}
476-
return value
477-
}
478597

479-
//These cookie attributes are defined in RFC 6265 and 2965(which is obsolete)
480-
//HTTPCookie supports these
481-
private class func isCookieAttribute(_ string: String) -> Bool {
482-
return _attributes.first(where: {$0.rawValue.caseInsensitiveCompare(string) == .orderedSame}) != nil
483-
}
598+
guard scanner.scanString("=") != nil else {
599+
// if the scanner does not find =, there's no value
600+
return (name, nil)
601+
}
484602

485-
//Cookie attribute names are case-insensitive as per RFC6265: https://tools.ietf.org/html/rfc6265
486-
//but HTTPCookie needs only the first letter of each attribute in uppercase
487-
private class func canonicalize(_ name: String) -> HTTPCookiePropertyKey {
488-
let idx = _attributes.firstIndex(where: {$0.rawValue.caseInsensitiveCompare(name) == .orderedSame})!
489-
return _attributes[idx]
490-
}
603+
let location = scanner.scanLocation
604+
let value = String(pair[pair.index(pair.startIndex, offsetBy: location)..<pair.endIndex]).trim()
491605

492-
//A name=value pair should be translated to two properties, Name=name and Value=value
493-
private class func createNameValuePair(pair: String) -> [String] {
494-
if pair.caseInsensitiveCompare(HTTPCookiePropertyKey.secure.rawValue) == .orderedSame {
495-
return ["Secure=TRUE"]
496-
}
497-
let name = pair.components(separatedBy: "=")[0]
498-
let value = pair.components(separatedBy: "\(name)=")[1]
499-
if !isCookieAttribute(name) {
500-
return ["Name=\(name)", "Value=\(value)"]
501-
}
502-
return [pair]
606+
return (name, value)
503607
}
504608

505609
/// Returns a dictionary representation of the receiver.
@@ -654,12 +758,24 @@ fileprivate extension String {
654758
func trim() -> String {
655759
return self.trimmingCharacters(in: .whitespacesAndNewlines)
656760
}
761+
}
657762

658-
func maskCommas() -> String {
659-
return self.replacingOccurrences(of: ",", with: "&comma")
763+
fileprivate extension Character {
764+
var isSpace: Bool {
765+
return self == " " || self == "\t" || self == "\n" || self == "\r"
660766
}
661767

662-
func unmaskCommas() -> String {
663-
return self.replacingOccurrences(of: "&comma", with: ",")
768+
var isTokenCharacter: Bool {
769+
guard let asciiValue = self.asciiValue else {
770+
return false
771+
}
772+
773+
// CTL, 0-31 and DEL (127)
774+
if asciiValue <= 31 || asciiValue >= 127 {
775+
return false
776+
}
777+
778+
let nonTokenCharacters = "()<>@,;:\\\"/[]?={} \t"
779+
return !nonTokenCharacters.contains(self)
664780
}
665781
}

0 commit comments

Comments
 (0)