Skip to content

Commit 8aee5c2

Browse files
committed
Implement keyword suggestion routine
`suggestions.rs` is almost porting of implementation of [this](python/cpython#16856) and [this](python/cpython#25397). Signed-off-by: snowapril <[email protected]>
1 parent a4e69dc commit 8aee5c2

File tree

3 files changed

+179
-1
lines changed

3 files changed

+179
-1
lines changed

vm/src/builtins/code.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ impl PyRef<PyCode> {
256256
}
257257

258258
#[pyproperty]
259-
fn co_varnames(self, vm: &VirtualMachine) -> PyTupleRef {
259+
pub fn co_varnames(self, vm: &VirtualMachine) -> PyTupleRef {
260260
let varnames = self
261261
.code
262262
.varnames

vm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ mod sequence;
7070
mod signal;
7171
pub mod sliceable;
7272
pub mod stdlib;
73+
pub mod suggestion;
7374
pub mod types;
7475
pub mod utils;
7576
pub mod version;

vm/src/suggestion.rs

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
use crate::{
2+
builtins::{PyList, PyStrRef},
3+
exceptions::types::PyBaseExceptionRef,
4+
sliceable::PySliceableSequence,
5+
IdProtocol, PyObjectRef, TryFromObject, TypeProtocol, VirtualMachine,
6+
};
7+
use std::cell::RefCell;
8+
use std::thread_local;
9+
10+
const MAX_CANDIDATE_ITEMS: usize = 750;
11+
const MAX_STRING_SIZE: usize = 40;
12+
13+
const MOVE_COST: usize = 2;
14+
const CASE_COST: usize = 1;
15+
16+
fn substitution_cost(mut a: u8, mut b: u8) -> usize {
17+
if (a & 31) != (b & 31) {
18+
return MOVE_COST;
19+
}
20+
if a == b {
21+
return 0usize;
22+
}
23+
if (b'A'..=b'Z').contains(&a) {
24+
a += b'a' - b'A';
25+
}
26+
if (b'A'..=b'Z').contains(&b) {
27+
b += b'a' - b'A';
28+
}
29+
if a == b {
30+
CASE_COST
31+
} else {
32+
MOVE_COST
33+
}
34+
}
35+
36+
fn levenshtein_distance(a: &str, b: &str, max_cost: usize) -> usize {
37+
thread_local! {
38+
static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = RefCell::new([0usize; MAX_STRING_SIZE]);
39+
}
40+
41+
if a == b {
42+
return 0;
43+
}
44+
45+
let (mut a_bytes, mut b_bytes) = (a.as_bytes(), b.as_bytes());
46+
let (mut a_begin, mut a_end) = (0usize, a.len());
47+
let (mut b_begin, mut b_end) = (0usize, b.len());
48+
49+
while a_end > 0 && b_end > 0 && (a_bytes[a_begin] == b_bytes[b_begin]) {
50+
a_begin += 1;
51+
b_begin += 1;
52+
a_end -= 1;
53+
b_end -= 1;
54+
}
55+
while a_end > 0 && b_end > 0 && (a_bytes[a_begin + a_end - 1] == b_bytes[b_begin + b_end - 1]) {
56+
a_end -= 1;
57+
b_end -= 1;
58+
}
59+
if a_end == 0 || b_end == 0 {
60+
return (a_end + b_end) * MOVE_COST;
61+
}
62+
if a_end > MAX_STRING_SIZE || b_end > MAX_STRING_SIZE {
63+
return max_cost + 1;
64+
}
65+
66+
if b_end < a_end {
67+
std::mem::swap(&mut a_bytes, &mut b_bytes);
68+
std::mem::swap(&mut a_begin, &mut b_begin);
69+
std::mem::swap(&mut a_end, &mut b_end);
70+
}
71+
72+
if (b_end - a_end) * MOVE_COST > max_cost {
73+
return max_cost + 1;
74+
}
75+
76+
BUFFER.with(|buffer| {
77+
let mut buffer = buffer.borrow_mut();
78+
for i in 0..a_end {
79+
buffer[i] = (i + 1) * MOVE_COST;
80+
}
81+
82+
let mut result = 0usize;
83+
for (b_index, b_code) in b_bytes[b_begin..(b_begin + b_end)].iter().enumerate() {
84+
result = b_index * MOVE_COST;
85+
let mut distance = result;
86+
let mut minimum = usize::MAX;
87+
for (a_index, a_code) in a_bytes[a_begin..(a_begin + a_end)].iter().enumerate() {
88+
let substitute = distance + substitution_cost(*b_code, *a_code);
89+
distance = buffer[a_index];
90+
let insert_delete = usize::min(result, distance) + MOVE_COST;
91+
result = usize::min(insert_delete, substitute);
92+
93+
buffer[a_index] = result;
94+
if result < minimum {
95+
minimum = result;
96+
}
97+
}
98+
if minimum > max_cost {
99+
return max_cost + 1;
100+
}
101+
}
102+
result
103+
})
104+
}
105+
106+
fn calculate_suggestions(dir: PyList, name: &PyObjectRef, vm: &VirtualMachine) -> Option<PyStrRef> {
107+
let dir = dir.borrow_vec();
108+
if dir.len() >= MAX_CANDIDATE_ITEMS {
109+
return None;
110+
}
111+
112+
let mut suggestion: Option<PyStrRef> = None;
113+
let mut suggestion_distance = usize::MAX;
114+
let name = match PyStrRef::try_from_object(vm, name.clone()) {
115+
Ok(name) => name,
116+
Err(_) => return None,
117+
};
118+
119+
for item in dir.iter() {
120+
let item_name = match PyStrRef::try_from_object(vm, item.clone()) {
121+
Ok(name) => name,
122+
Err(_) => return None,
123+
};
124+
if name.to_string() == item_name.to_string() {
125+
continue;
126+
}
127+
let max_distance = usize::min(
128+
(name.len() + item_name.len() + 3) * MOVE_COST / 6,
129+
suggestion_distance - 1,
130+
);
131+
let current_distance =
132+
levenshtein_distance(name.as_str(), item_name.as_str(), max_distance);
133+
if current_distance > max_distance {
134+
continue;
135+
}
136+
if suggestion.is_none() || current_distance < suggestion_distance {
137+
suggestion = Some(item_name);
138+
suggestion_distance = current_distance;
139+
}
140+
}
141+
suggestion
142+
}
143+
144+
pub fn offer_suggestions(exc: &PyBaseExceptionRef, vm: &VirtualMachine) -> Option<PyStrRef> {
145+
if exc.class().is(&vm.ctx.exceptions.attribute_error) {
146+
let name = vm.get_attribute(exc.as_object().clone(), "name").unwrap();
147+
let obj = vm.get_attribute(exc.as_object().clone(), "obj").unwrap();
148+
149+
match vm.dir(Some(obj)) {
150+
Ok(dir_list) => calculate_suggestions(dir_list, &name, vm),
151+
Err(_) => None,
152+
}
153+
} else if exc.class().is(&vm.ctx.exceptions.name_error) {
154+
let name = vm.get_attribute(exc.as_object().clone(), "name").unwrap();
155+
let mut tb = exc.traceback().unwrap();
156+
while let Some(traceback) = tb.next.clone() {
157+
tb = traceback;
158+
}
159+
160+
let frame = tb.frame.clone();
161+
let code = frame.code.clone();
162+
let varnames = PyList::from(code.co_varnames(vm).as_slice().to_vec());
163+
if let Some(suggestions) = calculate_suggestions(varnames, &name, vm) {
164+
return Some(suggestions);
165+
};
166+
167+
let globals = PyList::from(vm.extract_elements(frame.globals.as_object()).unwrap());
168+
if let Some(suggestions) = calculate_suggestions(globals, &name, vm) {
169+
return Some(suggestions);
170+
};
171+
172+
let builtins = PyList::from(vm.extract_elements(frame.builtins.as_object()).unwrap());
173+
calculate_suggestions(builtins, &name, vm)
174+
} else {
175+
None
176+
}
177+
}

0 commit comments

Comments
 (0)