Skip to content

Commit 71e8766

Browse files
committed
Simplify the algorithm implementation
1 parent c760303 commit 71e8766

File tree

1 file changed

+72
-128
lines changed

1 file changed

+72
-128
lines changed

Python/suggestions.c

Lines changed: 72 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -2,157 +2,102 @@
22

33
#include "pycore_pyerrors.h"
44

5-
#define MAX_GETATTR_PREDICT_DIST 3
6-
#define MAX_GETATTR_PREDICT_ITEMS 100
7-
#define MAX_GETATTR_STRING_SIZE 20
5+
#define MAX_DISTANCE 3
6+
#define MAX_CANDIDATE_ITEMS 100
7+
#define MAX_STRING_SIZE 20
88

99
/* Calculate the Levenshtein distance between string1 and string2 */
10-
static Py_ssize_t
11-
levenshtein_distance(const char *string1, const char *string2)
12-
{
13-
Py_ssize_t len1 = strlen(string1);
14-
Py_ssize_t len2 = strlen(string2);
15-
Py_ssize_t i;
16-
Py_ssize_t half;
17-
size_t *row;
18-
size_t *end;
19-
20-
/* Get rid of the common prefix */
21-
while (len1 > 0 && len2 > 0 && *string1 == *string2) {
22-
len1--;
23-
len2--;
24-
string1++;
25-
string2++;
10+
static size_t
11+
levenshtein_distance(const char *a, const char *b) {
12+
if (a == NULL || b == NULL) {
13+
return 0;
2614
}
2715

28-
/* strip common suffix */
29-
while (len1 > 0 && len2 > 0 && string1[len1-1] == string2[len2-1]) {
30-
len1--;
31-
len2--;
32-
}
16+
const size_t a_size = strlen(a);
17+
const size_t b_size = strlen(b);
3318

34-
/* catch trivial cases */
35-
if (len1 == 0) {
36-
return len2;
19+
if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
20+
return 0;
3721
}
38-
if (len2 == 0) {
39-
return len1;
22+
23+
// Both strings are the same (by identity)
24+
if (a == b) {
25+
return 0;
4026
}
4127

42-
/* make the inner cycle (i.e. string2) the longer one */
43-
if (len1 > len2) {
44-
size_t nx = len1;
45-
const char *sx = string1;
46-
len1 = len2;
47-
len2 = nx;
48-
string1 = string2;
49-
string2 = sx;
28+
// The first string is empty
29+
if (a_size == 0) {
30+
return b_size;
5031
}
51-
/* check len1 == 1 separately */
52-
if (len1 == 1) {
53-
return len2 - (memchr(string2, *string1, len2) != NULL);
32+
33+
// The second string is empty
34+
if (b_size == 0) {
35+
return a_size;
5436
}
55-
len1++;
56-
len2++;
57-
half = len1 >> 1;
58-
59-
/* initalize first row */
60-
row = (size_t*)PyMem_Malloc(len2*sizeof(size_t));
61-
if (!row) {
62-
return (Py_ssize_t)(-1);
37+
38+
size_t *buffer = PyMem_Calloc(a_size, sizeof(size_t));
39+
if (buffer == NULL) {
40+
return 0;
6341
}
64-
end = row + len2 - 1;
65-
for (i = 0; i < len2 - half; i++) {
66-
row[i] = i;
42+
43+
// Initialize the buffer row
44+
size_t index = 0;
45+
while (index < a_size) {
46+
buffer[index] = index + 1;
47+
index++;
6748
}
6849

69-
/* We don't have to scan two corner triangles (of size len1/2)
70-
* in the matrix because no best path can go throught them. This is
71-
* not true when len1 == len2 == 2 so the memchr() special case above is
72-
* necessary */
73-
row[0] = len1 - half - 1;
74-
for (i = 1; i < len1; i++) {
75-
size_t *scan_ptr;
76-
const char char1 = string1[i - 1];
77-
const char *char2p;
78-
size_t D, x;
79-
/* skip the upper triangle */
80-
if (i >= len1 - half) {
81-
size_t offset = i - (len1 - half);
82-
size_t c3;
83-
84-
char2p = string2 + offset;
85-
scan_ptr = row + offset;
86-
c3 = *(scan_ptr++) + (char1 != *(char2p++));
87-
x = *scan_ptr;
88-
x++;
89-
D = x;
90-
if (x > c3) {
91-
x = c3;
50+
size_t b_index = 0;
51+
size_t result = 0;
52+
while (b_index < b_size) {
53+
char code = b[b_index];
54+
size_t distance = result = b_index++;
55+
index = SIZE_MAX;
56+
while (++index < a_size) {
57+
size_t b_distance = code == a[index] ? distance : distance + 1;
58+
distance = buffer[index];
59+
if (distance > result) {
60+
if (b_distance > result) {
61+
result = result + 1;
62+
} else {
63+
result = b_distance;
64+
}
65+
} else {
66+
if (b_distance > distance) {
67+
result = distance + 1;
68+
} else {
69+
result = b_distance;
70+
}
9271
}
93-
*(scan_ptr++) = x;
94-
}
95-
else {
96-
scan_ptr = row + 1;
97-
char2p = string2;
98-
D = x = i;
99-
}
100-
/* skip the lower triangle */
101-
if (i <= half + 1) {
102-
end = row + len2 + i - half - 2;
103-
}
104-
/* main */
105-
while (scan_ptr <= end) {
106-
size_t c3 = --D + (char1 != *(char2p++));
107-
x++;
108-
if (x > c3) {
109-
x = c3;
110-
}
111-
D = *scan_ptr;
112-
D++;
113-
if (x > D)
114-
x = D;
115-
*(scan_ptr++) = x;
116-
}
117-
/* lower triangle sentinel */
118-
if (i <= half) {
119-
size_t c3 = --D + (char1 != *char2p);
120-
x++;
121-
if (x > c3) {
122-
x = c3;
123-
}
124-
*scan_ptr = x;
72+
buffer[index] = result;
12573
}
12674
}
127-
i = *end;
128-
PyMem_Free(row);
129-
return i;
75+
PyMem_Free(buffer);
76+
return result;
13077
}
13178

132-
static inline PyObject*
133-
calculate_suggestions(PyObject* dir,
134-
PyObject* name)
135-
{
79+
static inline PyObject *
80+
calculate_suggestions(PyObject *dir,
81+
PyObject *name) {
13682
assert(!PyErr_Occurred());
13783
assert(PyList_CheckExact(dir));
13884

13985
Py_ssize_t dir_size = PyList_GET_SIZE(dir);
140-
if (dir_size >= MAX_GETATTR_PREDICT_ITEMS) {
86+
if (dir_size >= MAX_CANDIDATE_ITEMS) {
14187
return NULL;
14288
}
14389

14490
Py_ssize_t suggestion_distance = PyUnicode_GetLength(name);
145-
PyObject* suggestion = NULL;
91+
PyObject *suggestion = NULL;
14692
for (int i = 0; i < dir_size; ++i) {
14793
PyObject *item = PyList_GET_ITEM(dir, i);
14894
const char *name_str = PyUnicode_AsUTF8(name);
14995
if (name_str == NULL) {
15096
PyErr_Clear();
15197
continue;
15298
}
153-
Py_ssize_t current_distance = levenshtein_distance(PyUnicode_AsUTF8(name),
154-
PyUnicode_AsUTF8(item));
155-
if (current_distance > MAX_GETATTR_PREDICT_DIST){
99+
Py_ssize_t current_distance = levenshtein_distance(PyUnicode_AsUTF8(name), PyUnicode_AsUTF8(item));
100+
if (current_distance == 0 || current_distance > MAX_DISTANCE) {
156101
continue;
157102
}
158103
if (!suggestion || current_distance < suggestion_distance) {
@@ -167,34 +112,33 @@ calculate_suggestions(PyObject* dir,
167112
return suggestion;
168113
}
169114

170-
static PyObject*
171-
offer_suggestions_for_attribute_error(PyAttributeErrorObject* exc) {
172-
PyObject* name = exc->name; // borrowed reference
173-
PyObject* obj = exc->obj; // borrowed reference
115+
static PyObject *
116+
offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc) {
117+
PyObject *name = exc->name; // borrowed reference
118+
PyObject *obj = exc->obj; // borrowed reference
174119

175120
// Abort if we don't have an attribute name or we have an invalid one
176121
if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
177122
return NULL;
178123
}
179124

180-
PyObject* dir = PyObject_Dir(obj);
125+
PyObject *dir = PyObject_Dir(obj);
181126
if (dir == NULL) {
182127
return NULL;
183128
}
184129

185-
PyObject* suggestions = calculate_suggestions(dir, name);
130+
PyObject *suggestions = calculate_suggestions(dir, name);
186131
Py_DECREF(dir);
187132
return suggestions;
188133
}
189134

190-
191135
// Offer suggestions for a given exception. Returns a python string object containing the
192136
// suggestions. This function does not raise exceptions and returns NULL if no suggestion was found.
193-
PyObject* _Py_Offer_Suggestions(PyObject* exception) {
194-
PyObject* result = NULL;
137+
PyObject *_Py_Offer_Suggestions(PyObject *exception) {
138+
PyObject *result = NULL;
195139
assert(!PyErr_Occurred()); // Check that we are not going to clean any existing exception
196140
if (PyErr_GivenExceptionMatches(exception, PyExc_AttributeError)) {
197-
result = offer_suggestions_for_attribute_error((PyAttributeErrorObject*) exception);
141+
result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
198142
}
199143
assert(!PyErr_Occurred());
200144
return result;

0 commit comments

Comments
 (0)