|
| 1 | +/// Determine the edit distance between two sequences. |
| 2 | +/// |
| 3 | +/// - parameter fa: The first sequence to compare. |
| 4 | +/// - parameter ta: The second sequence to compare. |
| 5 | +/// - parameter allowReplacements: Whether to allow element replacements (change one |
| 6 | +/// element into another) as a single operation, rather than as two operations |
| 7 | +/// (an insertion and a removal). |
| 8 | +/// - parameter maxEditDistance: If non-zero, the maximum edit distance that this |
| 9 | +/// routine is allowed to compute. If the edit distance will exceed that |
| 10 | +/// maximum, returns \c MaxEditDistance+1. |
| 11 | +/// |
| 12 | +/// - returns: the minimum number of element insertions, removals, or (if |
| 13 | +/// `allowReplacements` is `true`) replacements needed to transform one of |
| 14 | +/// the given sequences into the other. If zero, the sequences are identical. |
| 15 | +func editDistance<T: Equatable>(from fa : [T], to ta : [T], allowReplacements : Bool = true, maxEditDistance : Int = 0) -> Int { |
| 16 | + // The algorithm implemented below is the "classic" |
| 17 | + // dynamic-programming algorithm for computing the Levenshtein |
| 18 | + // distance, which is described here: |
| 19 | + // |
| 20 | + // http://en.wikipedia.org/wiki/Levenshtein_distance |
| 21 | + // |
| 22 | + // Although the algorithm is typically described using an m x n |
| 23 | + // array, only one row plus one element are used at a time, so this |
| 24 | + // implementation just keeps one vector for the row. To update one entry, |
| 25 | + // only the entries to the left, top, and top-left are needed. The left |
| 26 | + // entry is in `row[x-1]`, the top entry is what's in `row[x]` from the last |
| 27 | + // iteration, and the top-left entry is stored in Previous. |
| 28 | + let m = fa.count |
| 29 | + let n = ta.count |
| 30 | + |
| 31 | + var row = [Int](1...(n+1)) |
| 32 | + |
| 33 | + for y in 1...m { |
| 34 | + row[0] = y |
| 35 | + var bestThisRow = row[0] |
| 36 | + |
| 37 | + var previous = y - 1 |
| 38 | + for x in 1...n { |
| 39 | + let oldRow = row[x] |
| 40 | + if allowReplacements { |
| 41 | + row[x] = min( |
| 42 | + previous + (fa[y - 1] == ta[x - 1] ? 0 : 1), |
| 43 | + min(row[x - 1], row[x]) + 1 |
| 44 | + ) |
| 45 | + } else { |
| 46 | + if fa[y-1] == ta[x-1] { |
| 47 | + row[x] = previous |
| 48 | + } else { |
| 49 | + row[x] = min(row[x-1], row[x]) + 1 |
| 50 | + } |
| 51 | + } |
| 52 | + previous = oldRow |
| 53 | + bestThisRow = min(bestThisRow, row[x]) |
| 54 | + } |
| 55 | + |
| 56 | + if maxEditDistance != 0 && bestThisRow > maxEditDistance { |
| 57 | + return maxEditDistance + 1 |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + return row[n] |
| 62 | +} |
0 commit comments