Skip to content

Commit d2e7cfb

Browse files
committed
Auto merge of #11963 - jhpratt:master, r=epage
Use restricted Damerau-Levenshtein algorithm This uses the same implementation as the one used in rustc, so review should be simple. As with rust-lang/rust#108200, the module and function names have been changed to be implementation-agnostic. [Reference](https://github.com/rust-lang/rust/blob/13d1802b8882452f7d9d1bf514a096c5c8a22303/compiler/rustc_span/src/edit_distance.rs) for rustc's current implementation.
2 parents b0742b2 + ccd77a3 commit d2e7cfb

File tree

7 files changed

+164
-114
lines changed

7 files changed

+164
-114
lines changed

src/cargo/core/package_id_spec.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ use serde::{de, ser};
77
use url::Url;
88

99
use crate::core::PackageId;
10+
use crate::util::edit_distance;
1011
use crate::util::errors::CargoResult;
1112
use crate::util::interning::InternedString;
12-
use crate::util::lev_distance;
1313
use crate::util::{validate_package_name, IntoUrl, ToSemver};
1414

1515
/// Some or all of the data required to identify a package:
@@ -88,7 +88,7 @@ impl PackageIdSpec {
8888
{
8989
let i: Vec<_> = i.into_iter().collect();
9090
let spec = PackageIdSpec::parse(spec).with_context(|| {
91-
let suggestion = lev_distance::closest_msg(spec, i.iter(), |id| id.name().as_str());
91+
let suggestion = edit_distance::closest_msg(spec, i.iter(), |id| id.name().as_str());
9292
format!("invalid package ID specification: `{}`{}", spec, suggestion)
9393
})?;
9494
spec.query(i)
@@ -229,7 +229,7 @@ impl PackageIdSpec {
229229
);
230230
}
231231
if suggestion.is_empty() {
232-
suggestion.push_str(&lev_distance::closest_msg(
232+
suggestion.push_str(&edit_distance::closest_msg(
233233
&self.name,
234234
all_ids.iter(),
235235
|id| id.name().as_str(),

src/cargo/core/resolver/errors.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::fmt;
22
use std::task::Poll;
33

44
use crate::core::{Dependency, PackageId, QueryKind, Registry, Summary};
5-
use crate::util::lev_distance::lev_distance;
5+
use crate::util::edit_distance::edit_distance;
66
use crate::util::{Config, VersionExt};
77
use anyhow::Error;
88

@@ -308,8 +308,7 @@ pub(super) fn activation_error(
308308
candidates.dedup_by(|a, b| a.name() == b.name());
309309
let mut candidates: Vec<_> = candidates
310310
.iter()
311-
.map(|n| (lev_distance(&*new_dep.package_name(), &*n.name()), n))
312-
.filter(|&(d, _)| d < 4)
311+
.filter_map(|n| Some((edit_distance(&*new_dep.package_name(), &*n.name(), 3)?, n)))
313312
.collect();
314313
candidates.sort_by_key(|o| o.0);
315314
let mut msg: String;

src/cargo/core/workspace.rs

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ use crate::core::{Dependency, FeatureValue, PackageId, PackageIdSpec};
1919
use crate::core::{EitherManifest, Package, SourceId, VirtualManifest};
2020
use crate::ops;
2121
use crate::sources::{PathSource, CRATES_IO_INDEX, CRATES_IO_REGISTRY};
22+
use crate::util::edit_distance;
2223
use crate::util::errors::{CargoResult, ManifestError};
2324
use crate::util::interning::InternedString;
24-
use crate::util::lev_distance;
2525
use crate::util::toml::{read_manifest, InheritableFields, TomlDependency, TomlProfiles};
2626
use crate::util::{config::ConfigRelativePath, Config, Filesystem, IntoUrl};
2727
use cargo_util::paths;
@@ -1245,8 +1245,9 @@ impl<'cfg> Workspace<'cfg> {
12451245
optional_dependency_names_per_member.insert(member, optional_dependency_names_raw);
12461246
}
12471247

1248-
let levenshtein_test =
1249-
|a: InternedString, b: InternedString| lev_distance(a.as_str(), b.as_str()) < 4;
1248+
let edit_distance_test = |a: InternedString, b: InternedString| {
1249+
edit_distance(a.as_str(), b.as_str(), 3).is_some()
1250+
};
12501251

12511252
let suggestions: Vec<_> = cli_features
12521253
.features
@@ -1257,12 +1258,12 @@ impl<'cfg> Workspace<'cfg> {
12571258
// Finds member features which are similar to the requested feature.
12581259
let summary_features = summary_features
12591260
.iter()
1260-
.filter(move |feature| levenshtein_test(**feature, *typo));
1261+
.filter(move |feature| edit_distance_test(**feature, *typo));
12611262

12621263
// Finds optional dependencies which name is similar to the feature
12631264
let optional_dependency_features = optional_dependency_names
12641265
.iter()
1265-
.filter(move |feature| levenshtein_test(**feature, *typo));
1266+
.filter(move |feature| edit_distance_test(**feature, *typo));
12661267

12671268
summary_features
12681269
.chain(optional_dependency_features)
@@ -1278,13 +1279,13 @@ impl<'cfg> Workspace<'cfg> {
12781279
// Finds set of `pkg/feat` that are very similar to current `pkg/feat`.
12791280
let pkg_feat_similar = dependencies_features
12801281
.iter()
1281-
.filter(|(name, _)| levenshtein_test(**name, *dep_name))
1282+
.filter(|(name, _)| edit_distance_test(**name, *dep_name))
12821283
.map(|(name, features)| {
12831284
(
12841285
name,
12851286
features
12861287
.iter()
1287-
.filter(|feature| levenshtein_test(**feature, *dep_feature))
1288+
.filter(|feature| edit_distance_test(**feature, *dep_feature))
12881289
.collect::<Vec<_>>(),
12891290
)
12901291
})
@@ -1298,12 +1299,12 @@ impl<'cfg> Workspace<'cfg> {
12981299
// Finds set of `member/optional_dep` features which name is similar to current `pkg/feat`.
12991300
let optional_dependency_features = optional_dependency_names_per_member
13001301
.iter()
1301-
.filter(|(package, _)| levenshtein_test(package.name(), *dep_name))
1302+
.filter(|(package, _)| edit_distance_test(package.name(), *dep_name))
13021303
.map(|(package, optional_dependencies)| {
13031304
optional_dependencies
13041305
.into_iter()
13051306
.filter(|optional_dependency| {
1306-
levenshtein_test(**optional_dependency, *dep_name)
1307+
edit_distance_test(**optional_dependency, *dep_name)
13071308
})
13081309
.map(move |optional_dependency| {
13091310
format!("{}/{}", package.name(), optional_dependency)
@@ -1314,12 +1315,12 @@ impl<'cfg> Workspace<'cfg> {
13141315
// Finds set of `member/feat` features which name is similar to current `pkg/feat`.
13151316
let summary_features = summary_features_per_member
13161317
.iter()
1317-
.filter(|(package, _)| levenshtein_test(package.name(), *dep_name))
1318+
.filter(|(package, _)| edit_distance_test(package.name(), *dep_name))
13181319
.map(|(package, summary_features)| {
13191320
summary_features
13201321
.into_iter()
13211322
.filter(|summary_feature| {
1322-
levenshtein_test(**summary_feature, *dep_feature)
1323+
edit_distance_test(**summary_feature, *dep_feature)
13231324
})
13241325
.map(move |summary_feature| {
13251326
format!("{}/{}", package.name(), summary_feature)

src/cargo/ops/cargo_clean.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ use crate::core::compiler::{CompileKind, CompileMode, Layout, RustcTargetData};
22
use crate::core::profiles::Profiles;
33
use crate::core::{PackageIdSpec, TargetKind, Workspace};
44
use crate::ops;
5+
use crate::util::edit_distance;
56
use crate::util::errors::CargoResult;
67
use crate::util::interning::InternedString;
7-
use crate::util::lev_distance;
88
use crate::util::{Config, Progress, ProgressStyle};
99

1010
use anyhow::Context as _;
@@ -118,7 +118,7 @@ pub fn clean(ws: &Workspace<'_>, opts: &CleanOptions<'_>) -> CargoResult<()> {
118118
let matches: Vec<_> = resolve.iter().filter(|id| spec.matches(*id)).collect();
119119
if matches.is_empty() {
120120
let mut suggestion = String::new();
121-
suggestion.push_str(&lev_distance::closest_msg(
121+
suggestion.push_str(&edit_distance::closest_msg(
122122
&spec.name(),
123123
resolve.iter(),
124124
|id| id.name().as_str(),

src/cargo/util/edit_distance.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
use std::{cmp, mem};
2+
3+
/// Finds the [edit distance] between two strings.
4+
///
5+
/// Returns `None` if the distance exceeds the limit.
6+
///
7+
/// [edit distance]: https://en.wikipedia.org/wiki/Edit_distance
8+
pub fn edit_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
9+
// Comparing the strings lowercased will result in a difference in capitalization being less distance away
10+
// than being a completely different letter. Otherwise `CHECK` is as far away from `check` as it
11+
// is from `build` (both with a distance of 5). For a single letter shortcut (e.g. `b` or `c`), they will
12+
// all be as far away from any capital single letter entry (all with a distance of 1).
13+
// By first lowercasing the strings, `C` and `c` are closer than `C` and `b`, for example.
14+
let a = a.to_lowercase();
15+
let b = b.to_lowercase();
16+
17+
let mut a = &a.chars().collect::<Vec<_>>()[..];
18+
let mut b = &b.chars().collect::<Vec<_>>()[..];
19+
20+
// Ensure that `b` is the shorter string, minimizing memory use.
21+
if a.len() < b.len() {
22+
mem::swap(&mut a, &mut b);
23+
}
24+
25+
let min_dist = a.len() - b.len();
26+
// If we know the limit will be exceeded, we can return early.
27+
if min_dist > limit {
28+
return None;
29+
}
30+
31+
// Strip common prefix.
32+
while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_first().zip(a.split_first()) {
33+
if a_char != b_char {
34+
break;
35+
}
36+
a = a_rest;
37+
b = b_rest;
38+
}
39+
// Strip common suffix.
40+
while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_last().zip(a.split_last()) {
41+
if a_char != b_char {
42+
break;
43+
}
44+
a = a_rest;
45+
b = b_rest;
46+
}
47+
48+
// If either string is empty, the distance is the length of the other.
49+
// We know that `b` is the shorter string, so we don't need to check `a`.
50+
if b.len() == 0 {
51+
return Some(min_dist);
52+
}
53+
54+
let mut prev_prev = vec![usize::MAX; b.len() + 1];
55+
let mut prev = (0..=b.len()).collect::<Vec<_>>();
56+
let mut current = vec![0; b.len() + 1];
57+
58+
// row by row
59+
for i in 1..=a.len() {
60+
current[0] = i;
61+
let a_idx = i - 1;
62+
63+
// column by column
64+
for j in 1..=b.len() {
65+
let b_idx = j - 1;
66+
67+
// There is no cost to substitute a character with itself.
68+
let substitution_cost = if a[a_idx] == b[b_idx] { 0 } else { 1 };
69+
70+
current[j] = cmp::min(
71+
// deletion
72+
prev[j] + 1,
73+
cmp::min(
74+
// insertion
75+
current[j - 1] + 1,
76+
// substitution
77+
prev[j - 1] + substitution_cost,
78+
),
79+
);
80+
81+
if (i > 1) && (j > 1) && (a[a_idx] == b[b_idx - 1]) && (a[a_idx - 1] == b[b_idx]) {
82+
// transposition
83+
current[j] = cmp::min(current[j], prev_prev[j - 2] + 1);
84+
}
85+
}
86+
87+
// Rotate the buffers, reusing the memory.
88+
[prev_prev, prev, current] = [prev, current, prev_prev];
89+
}
90+
91+
// `prev` because we already rotated the buffers.
92+
let distance = prev[b.len()];
93+
(distance <= limit).then_some(distance)
94+
}
95+
96+
/// Find the closest element from `iter` matching `choice`. The `key` callback
97+
/// is used to select a `&str` from the iterator to compare against `choice`.
98+
pub fn closest<'a, T>(
99+
choice: &str,
100+
iter: impl Iterator<Item = T>,
101+
key: impl Fn(&T) -> &'a str,
102+
) -> Option<T> {
103+
// Only consider candidates with an edit distance of 3 or less so we don't
104+
// suggest out-of-the-blue options.
105+
iter.filter_map(|e| Some((edit_distance(choice, key(&e), 3)?, e)))
106+
.min_by_key(|t| t.0)
107+
.map(|t| t.1)
108+
}
109+
110+
/// Version of `closest` that returns a common "suggestion" that can be tacked
111+
/// onto the end of an error message.
112+
pub fn closest_msg<'a, T>(
113+
choice: &str,
114+
iter: impl Iterator<Item = T>,
115+
key: impl Fn(&T) -> &'a str,
116+
) -> String {
117+
match closest(choice, iter, &key) {
118+
Some(e) => format!("\n\n\tDid you mean `{}`?", key(&e)),
119+
None => String::new(),
120+
}
121+
}
122+
123+
#[test]
124+
fn test_edit_distance() {
125+
use std::char::{from_u32, MAX};
126+
// Test bytelength agnosticity
127+
for c in (0u32..MAX as u32)
128+
.filter_map(from_u32)
129+
.map(|i| i.to_string())
130+
{
131+
assert_eq!(edit_distance(&c, &c, usize::MAX), Some(0));
132+
}
133+
134+
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
135+
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
136+
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
137+
assert_eq!(edit_distance(a, b, usize::MAX), Some(1));
138+
assert_eq!(edit_distance(b, a, usize::MAX), Some(1));
139+
assert_eq!(edit_distance(a, c, usize::MAX), Some(2));
140+
assert_eq!(edit_distance(c, a, usize::MAX), Some(2));
141+
assert_eq!(edit_distance(b, c, usize::MAX), Some(1));
142+
assert_eq!(edit_distance(c, b, usize::MAX), Some(1));
143+
}

src/cargo/util/lev_distance.rs

Lines changed: 0 additions & 93 deletions
This file was deleted.

0 commit comments

Comments
 (0)