Skip to content

Commit 37075f4

Browse files
committed
Implement keyword suggestion routine
`suggestions.rs` is almost porting of implementation of [this](python/cpython#16856) and [this](python/cpython#25397). Signed-off-by: snowapril <[email protected]>
1 parent a86769e commit 37075f4

File tree

3 files changed

+183
-1
lines changed

3 files changed

+183
-1
lines changed

vm/src/builtins/code.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ impl PyRef<PyCode> {
249249
}
250250

251251
#[pyproperty]
252-
fn co_varnames(self, vm: &VirtualMachine) -> PyObjectRef {
252+
pub fn co_varnames(self, vm: &VirtualMachine) -> PyObjectRef {
253253
let varnames = self
254254
.code
255255
.varnames

vm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ mod signal;
7171
pub mod sliceable;
7272
pub mod slots;
7373
pub mod stdlib;
74+
pub mod suggestions;
7475
pub mod types;
7576
pub mod utils;
7677
pub mod version;

vm/src/suggestions.rs

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
use crate::common::lock::PyRwLock;
2+
use crate::sliceable::PySliceableSequence;
3+
use crate::{
4+
builtins::{PyBaseObject, PyList, PyListRef, PyStrRef},
5+
exceptions::types::PyBaseExceptionRef,
6+
IdProtocol, PyObjectRef, PyValue, TryFromObject, TypeProtocol, VirtualMachine,
7+
};
8+
9+
const MAX_CANDIDATE_ITEMS: usize = 750;
10+
const MAX_STRING_SIZE: usize = 40;
11+
12+
const MOVE_COST: usize = 2;
13+
const CASE_COST: usize = 1;
14+
15+
fn substitution_cost(mut a: u8, mut b: u8) -> usize {
16+
if (a & 31) != (b & 31) {
17+
return MOVE_COST;
18+
}
19+
if a == b {
20+
return 0usize;
21+
}
22+
if b'A' <= a && a <= b'Z' {
23+
a += b'a' - b'A';
24+
}
25+
if b'A' <= b && b <= b'Z' {
26+
b += b'a' - b'A';
27+
}
28+
if a == b {
29+
return CASE_COST;
30+
}
31+
MOVE_COST
32+
}
33+
34+
fn global_levelshtein_buffer() -> &'static PyRwLock<[usize; MAX_STRING_SIZE]> {
35+
rustpython_common::static_cell! {
36+
static BUFFER: PyRwLock<[usize; MAX_STRING_SIZE]>;
37+
};
38+
BUFFER.get_or_init(|| PyRwLock::new([0usize; MAX_STRING_SIZE]))
39+
}
40+
41+
fn levelshtein_distance(a: &str, b: &str, max_cost: usize) -> usize {
42+
if a == b {
43+
return 0;
44+
}
45+
46+
let (mut a_bytes, mut b_bytes) = (a.as_bytes(), b.as_bytes());
47+
let (mut a_begin, mut a_end) = (0usize, a.len());
48+
let (mut b_begin, mut b_end) = (0usize, b.len());
49+
50+
while a_end > 0 && b_end > 0 && a_bytes[a_begin] == b_bytes[b_begin] {
51+
a_begin += 1;
52+
b_begin += 1;
53+
a_end -= 1;
54+
b_end -= 1;
55+
}
56+
while a_end > 0 && b_end > 0 && a_bytes[a_end - 1] == b_bytes[b_end - 1] {
57+
a_end -= 1;
58+
b_end -= 1;
59+
}
60+
if a_end == 0 || b_end == 0 {
61+
return (a_end + b_end) * MOVE_COST;
62+
}
63+
if a_end > MAX_STRING_SIZE || b_end > MAX_STRING_SIZE {
64+
return max_cost + 1;
65+
}
66+
67+
if b_end < a_end {
68+
std::mem::swap(&mut a_bytes, &mut b_bytes);
69+
std::mem::swap(&mut a_begin, &mut b_begin);
70+
std::mem::swap(&mut a_end, &mut b_end);
71+
}
72+
73+
if (b_end - a_end) * MOVE_COST > max_cost {
74+
return max_cost + 1;
75+
}
76+
77+
for i in 0..a_end {
78+
global_levelshtein_buffer().write()[i] = (i + 1) * MOVE_COST;
79+
}
80+
81+
let mut result = 0usize;
82+
for b_index in 0..b_end {
83+
let code = b_bytes[b_index];
84+
result = b_index * MOVE_COST;
85+
let mut distance = result;
86+
let mut minimum = usize::MAX;
87+
for index in 0..a_end {
88+
let substitute = distance + substitution_cost(code, a_bytes[index]);
89+
distance = global_levelshtein_buffer().read()[index];
90+
let insert_delete = usize::min(result, distance) + MOVE_COST;
91+
result = usize::min(insert_delete, substitute);
92+
93+
global_levelshtein_buffer().write()[index] = result;
94+
if result > minimum {
95+
minimum = result;
96+
}
97+
}
98+
if minimum > max_cost {
99+
return max_cost + 1;
100+
}
101+
}
102+
result
103+
}
104+
105+
fn calculate_suggestions(
106+
dir: PyListRef,
107+
name: &PyObjectRef,
108+
vm: &VirtualMachine,
109+
) -> Option<PyObjectRef> {
110+
let dir = dir.borrow_vec();
111+
if dir.len() >= MAX_CANDIDATE_ITEMS {
112+
return None;
113+
}
114+
115+
let mut suggestion: Option<PyObjectRef> = None;
116+
let mut suggestion_distance = usize::MAX;
117+
let name = if let Ok(name) = PyStrRef::try_from_object(vm, name.clone()) {
118+
name
119+
} else {
120+
return None;
121+
};
122+
for item in dir.into_iter() {
123+
let item_str = if let Ok(item_str) = PyStrRef::try_from_object(vm, item.clone()) {
124+
item_str
125+
} else {
126+
return None;
127+
};
128+
if name.to_string() == item_str.to_string() {
129+
continue;
130+
}
131+
let max_distance = usize::min(
132+
(name.len() + item_str.len() + 3) * MOVE_COST / 6,
133+
suggestion_distance,
134+
);
135+
let current_distance = levelshtein_distance(name.as_str(), item_str.as_str(), max_distance);
136+
if current_distance > max_distance {
137+
continue;
138+
}
139+
if suggestion.is_none() || current_distance < suggestion_distance {
140+
suggestion = Some(item.clone());
141+
suggestion_distance = current_distance;
142+
}
143+
}
144+
suggestion
145+
}
146+
147+
pub fn offer_suggestions(exc: &PyBaseExceptionRef, vm: &VirtualMachine) -> Option<PyObjectRef> {
148+
if exc.class().is(&vm.ctx.exceptions.attribute_error) {
149+
let name = exc.class().get_attr("name").unwrap();
150+
let obj = exc.class().get_attr("obj").unwrap();
151+
152+
calculate_suggestions(PyBaseObject::dir(obj, vm).unwrap().into_ref(vm), &name, vm)
153+
} else if exc.class().is(&vm.ctx.exceptions.name_error) {
154+
let name = exc.class().get_attr("name").unwrap();
155+
let mut tb = exc.traceback().unwrap();
156+
157+
while let Some(traceback) = tb.next.clone() {
158+
tb = traceback;
159+
}
160+
161+
let frame = tb.frame.clone();
162+
let code = frame.code.clone();
163+
164+
let dir = PyListRef::try_from_object(vm, code.co_varnames(vm)).unwrap();
165+
if let Some(suggestions) = calculate_suggestions(dir, &name, vm) {
166+
return Some(suggestions);
167+
};
168+
169+
let dir =
170+
PyList::from(vm.extract_elements(frame.globals.as_object()).unwrap()).into_ref(vm);
171+
if let Some(suggestions) = calculate_suggestions(dir, &name, vm) {
172+
return Some(suggestions);
173+
};
174+
175+
let dir =
176+
PyList::from(vm.extract_elements(frame.builtins.as_object()).unwrap()).into_ref(vm);
177+
calculate_suggestions(dir, &name, vm)
178+
} else {
179+
None
180+
}
181+
}

0 commit comments

Comments
 (0)