Skip to content

Commit 2c913fe

Browse files
Improving edit distance string extension
1 parent 060d523 commit 2c913fe

File tree

2 files changed

+80
-19
lines changed

2 files changed

+80
-19
lines changed

Sources/ArgumentParser/Utilities/StringExtensions.swift

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -141,30 +141,86 @@ extension StringProtocol where SubSequence == Substring {
141141
return Swift.max(rows, columns)
142142
}
143143

144-
var matrix = Array(repeating: Array(repeating: 0, count: columns + 1), count: rows + 1)
145-
146-
for row in 1...rows {
147-
matrix[row][0] = row
144+
// Trim common prefix and suffix
145+
var selfStartTrim = self.startIndex
146+
var targetStartTrim = target.startIndex
147+
while selfStartTrim < self.endIndex &&
148+
targetStartTrim < target.endIndex &&
149+
self[selfStartTrim] == target[targetStartTrim] {
150+
self.formIndex(after: &selfStartTrim)
151+
target.formIndex(after: &targetStartTrim)
152+
}
153+
154+
var selfEndTrim = self.endIndex
155+
var targetEndTrim = target.endIndex
156+
157+
while selfEndTrim > selfStartTrim &&
158+
targetEndTrim > targetStartTrim {
159+
let selfIdx = self.index(before: selfEndTrim)
160+
let targetIdx = self.index(before: targetEndTrim)
161+
162+
guard self[selfIdx] == target[targetIdx] else {
163+
break
164+
}
165+
166+
self.formIndex(before: &selfEndTrim)
167+
target.formIndex(before: &targetEndTrim)
148168
}
149-
for column in 1...columns {
150-
matrix[0][column] = column
169+
170+
// Equal strings
171+
guard !(selfStartTrim == self.endIndex &&
172+
targetStartTrim == target.endIndex) else {
173+
return 0
151174
}
152175

153-
for row in 1...rows {
154-
for column in 1...columns {
155-
let source = self[self.index(self.startIndex, offsetBy: row - 1)]
156-
let target = target[target.index(target.startIndex, offsetBy: column - 1)]
157-
let cost = source == target ? 0 : 1
158-
159-
matrix[row][column] = Swift.min(
160-
matrix[row - 1][column] + 1,
161-
matrix[row][column - 1] + 1,
162-
matrix[row - 1][column - 1] + cost
163-
)
176+
// After trimming common prefix and suffix, self is empty.
177+
guard selfStartTrim < selfEndTrim else {
178+
return target.distance(from: targetStartTrim,
179+
to: targetEndTrim)
180+
}
181+
182+
// After trimming common prefix and suffix, target is empty.
183+
guard targetStartTrim < targetEndTrim else {
184+
return distance(from: selfStartTrim,
185+
to: selfEndTrim)
186+
}
187+
188+
let newSelf = self[selfStartTrim..<selfEndTrim]
189+
let newTarget = target[targetStartTrim..<targetEndTrim]
190+
191+
let m = newSelf.count
192+
let n = newTarget.count
193+
194+
// Initialize the levenshtein matrix with only two rows
195+
// current and previous.
196+
var previousRow = [Int](repeating: 0, count: n + 1)
197+
var currentRow = [Int](0...n)
198+
199+
var sourceIdx = newSelf.startIndex
200+
for i in 1...m {
201+
swap(&previousRow, &currentRow)
202+
currentRow[0] = i
203+
204+
var targetIdx = newTarget.startIndex
205+
for j in 1...n {
206+
// If characteres are equal for the levenshtein algorithm the
207+
// minimum will always be the substitution cost, so we can fast
208+
// path here in order to avoid min calls.
209+
if newSelf[sourceIdx] == newTarget[targetIdx] {
210+
currentRow[j] = previousRow[j - 1]
211+
} else {
212+
let deletion = previousRow[j]
213+
let insertion = currentRow[j - 1]
214+
let substitution = previousRow[j - 1]
215+
currentRow[j] = Swift.min(deletion, Swift.min(insertion, substitution)) + 1
216+
}
217+
// j += 1
218+
newTarget.formIndex(after: &targetIdx)
164219
}
220+
// i += 1
221+
newSelf.formIndex(after: &sourceIdx)
165222
}
166-
167-
return matrix.last!.last!
223+
return currentRow[n]
168224
}
169225

170226
func indentingEachLine(by n: Int) -> String {

Tests/ArgumentParserUnitTests/StringEditDistanceTests.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,10 @@ extension StringEditDistanceTests {
2323
XCTAssertEqual("bar".editDistance(to: "foo"), 3)
2424
XCTAssertEqual("bar".editDistance(to: "baz"), 1)
2525
XCTAssertEqual("baz".editDistance(to: "bar"), 1)
26+
XCTAssertEqual("friend".editDistance(to: "fresh"), 3)
27+
XCTAssertEqual("friend".editDistance(to: "friend"), 0)
28+
XCTAssertEqual("friend".editDistance(to: "fried"), 1)
29+
XCTAssertEqual("friend".editDistance(to: "friendly"), 2)
30+
XCTAssertEqual("friendly".editDistance(to: "friend"), 2)
2631
}
2732
}

0 commit comments

Comments
 (0)