2
2
3
3
#include "pycore_pyerrors.h"
4
4
5
- #define MAX_GETATTR_PREDICT_DIST 3
6
- #define MAX_GETATTR_PREDICT_ITEMS 100
7
- #define MAX_GETATTR_STRING_SIZE 20
5
+ #define MAX_DISTANCE 3
6
+ #define MAX_CANDIDATE_ITEMS 100
7
+ #define MAX_STRING_SIZE 20
8
8
9
9
/* Calculate the Levenshtein distance between string1 and string2 */
10
- static Py_ssize_t
11
- levenshtein_distance (const char * string1 , const char * string2 )
12
- {
13
- Py_ssize_t len1 = strlen (string1 );
14
- Py_ssize_t len2 = strlen (string2 );
15
- Py_ssize_t i ;
16
- Py_ssize_t half ;
17
- size_t * row ;
18
- size_t * end ;
19
-
20
- /* Get rid of the common prefix */
21
- while (len1 > 0 && len2 > 0 && * string1 == * string2 ) {
22
- len1 -- ;
23
- len2 -- ;
24
- string1 ++ ;
25
- string2 ++ ;
10
+ static size_t
11
+ levenshtein_distance (const char * a , const char * b ) {
12
+ if (a == NULL || b == NULL ) {
13
+ return 0 ;
26
14
}
27
15
28
- /* strip common suffix */
29
- while (len1 > 0 && len2 > 0 && string1 [len1 - 1 ] == string2 [len2 - 1 ]) {
30
- len1 -- ;
31
- len2 -- ;
32
- }
16
+ const size_t a_size = strlen (a );
17
+ const size_t b_size = strlen (b );
33
18
34
- /* catch trivial cases */
35
- if (len1 == 0 ) {
36
- return len2 ;
19
+ if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE ) {
20
+ return 0 ;
37
21
}
38
- if (len2 == 0 ) {
39
- return len1 ;
22
+
23
+ // Both strings are the same (by identity)
24
+ if (a == b ) {
25
+ return 0 ;
40
26
}
41
27
42
- /* make the inner cycle (i.e. string2) the longer one */
43
- if (len1 > len2 ) {
44
- size_t nx = len1 ;
45
- const char * sx = string1 ;
46
- len1 = len2 ;
47
- len2 = nx ;
48
- string1 = string2 ;
49
- string2 = sx ;
28
+ // The first string is empty
29
+ if (a_size == 0 ) {
30
+ return b_size ;
50
31
}
51
- /* check len1 == 1 separately */
52
- if (len1 == 1 ) {
53
- return len2 - (memchr (string2 , * string1 , len2 ) != NULL );
32
+
33
+ // The second string is empty
34
+ if (b_size == 0 ) {
35
+ return a_size ;
54
36
}
55
- len1 ++ ;
56
- len2 ++ ;
57
- half = len1 >> 1 ;
58
-
59
- /* initalize first row */
60
- row = (size_t * )PyMem_Malloc (len2 * sizeof (size_t ));
61
- if (!row ) {
62
- return (Py_ssize_t )(-1 );
37
+
38
+ size_t * buffer = PyMem_Calloc (a_size , sizeof (size_t ));
39
+ if (buffer == NULL ) {
40
+ return 0 ;
63
41
}
64
- end = row + len2 - 1 ;
65
- for (i = 0 ; i < len2 - half ; i ++ ) {
66
- row [i ] = i ;
42
+
43
+ // Initialize the buffer row
44
+ size_t index = 0 ;
45
+ while (index < a_size ) {
46
+ buffer [index ] = index + 1 ;
47
+ index ++ ;
67
48
}
68
49
69
- /* We don't have to scan two corner triangles (of size len1/2)
70
- * in the matrix because no best path can go throught them. This is
71
- * not true when len1 == len2 == 2 so the memchr() special case above is
72
- * necessary */
73
- row [0 ] = len1 - half - 1 ;
74
- for (i = 1 ; i < len1 ; i ++ ) {
75
- size_t * scan_ptr ;
76
- const char char1 = string1 [i - 1 ];
77
- const char * char2p ;
78
- size_t D , x ;
79
- /* skip the upper triangle */
80
- if (i >= len1 - half ) {
81
- size_t offset = i - (len1 - half );
82
- size_t c3 ;
83
-
84
- char2p = string2 + offset ;
85
- scan_ptr = row + offset ;
86
- c3 = * (scan_ptr ++ ) + (char1 != * (char2p ++ ));
87
- x = * scan_ptr ;
88
- x ++ ;
89
- D = x ;
90
- if (x > c3 ) {
91
- x = c3 ;
50
+ size_t b_index = 0 ;
51
+ size_t result = 0 ;
52
+ while (b_index < b_size ) {
53
+ char code = b [b_index ];
54
+ size_t distance = result = b_index ++ ;
55
+ index = SIZE_MAX ;
56
+ while (++ index < a_size ) {
57
+ size_t b_distance = code == a [index ] ? distance : distance + 1 ;
58
+ distance = buffer [index ];
59
+ if (distance > result ) {
60
+ if (b_distance > result ) {
61
+ result = result + 1 ;
62
+ } else {
63
+ result = b_distance ;
64
+ }
65
+ } else {
66
+ if (b_distance > distance ) {
67
+ result = distance + 1 ;
68
+ } else {
69
+ result = b_distance ;
70
+ }
92
71
}
93
- * (scan_ptr ++ ) = x ;
94
- }
95
- else {
96
- scan_ptr = row + 1 ;
97
- char2p = string2 ;
98
- D = x = i ;
99
- }
100
- /* skip the lower triangle */
101
- if (i <= half + 1 ) {
102
- end = row + len2 + i - half - 2 ;
103
- }
104
- /* main */
105
- while (scan_ptr <= end ) {
106
- size_t c3 = -- D + (char1 != * (char2p ++ ));
107
- x ++ ;
108
- if (x > c3 ) {
109
- x = c3 ;
110
- }
111
- D = * scan_ptr ;
112
- D ++ ;
113
- if (x > D )
114
- x = D ;
115
- * (scan_ptr ++ ) = x ;
116
- }
117
- /* lower triangle sentinel */
118
- if (i <= half ) {
119
- size_t c3 = -- D + (char1 != * char2p );
120
- x ++ ;
121
- if (x > c3 ) {
122
- x = c3 ;
123
- }
124
- * scan_ptr = x ;
72
+ buffer [index ] = result ;
125
73
}
126
74
}
127
- i = * end ;
128
- PyMem_Free (row );
129
- return i ;
75
+ PyMem_Free (buffer );
76
+ return result ;
130
77
}
131
78
132
- static inline PyObject *
133
- calculate_suggestions (PyObject * dir ,
134
- PyObject * name )
135
- {
79
+ static inline PyObject *
80
+ calculate_suggestions (PyObject * dir ,
81
+ PyObject * name ) {
136
82
assert (!PyErr_Occurred ());
137
83
assert (PyList_CheckExact (dir ));
138
84
139
85
Py_ssize_t dir_size = PyList_GET_SIZE (dir );
140
- if (dir_size >= MAX_GETATTR_PREDICT_ITEMS ) {
86
+ if (dir_size >= MAX_CANDIDATE_ITEMS ) {
141
87
return NULL ;
142
88
}
143
89
144
90
Py_ssize_t suggestion_distance = PyUnicode_GetLength (name );
145
- PyObject * suggestion = NULL ;
91
+ PyObject * suggestion = NULL ;
146
92
for (int i = 0 ; i < dir_size ; ++ i ) {
147
93
PyObject * item = PyList_GET_ITEM (dir , i );
148
94
const char * name_str = PyUnicode_AsUTF8 (name );
149
95
if (name_str == NULL ) {
150
96
PyErr_Clear ();
151
97
continue ;
152
98
}
153
- Py_ssize_t current_distance = levenshtein_distance (PyUnicode_AsUTF8 (name ),
154
- PyUnicode_AsUTF8 (item ));
155
- if (current_distance > MAX_GETATTR_PREDICT_DIST ){
99
+ Py_ssize_t current_distance = levenshtein_distance (PyUnicode_AsUTF8 (name ), PyUnicode_AsUTF8 (item ));
100
+ if (current_distance == 0 || current_distance > MAX_DISTANCE ) {
156
101
continue ;
157
102
}
158
103
if (!suggestion || current_distance < suggestion_distance ) {
@@ -167,34 +112,33 @@ calculate_suggestions(PyObject* dir,
167
112
return suggestion ;
168
113
}
169
114
170
- static PyObject *
171
- offer_suggestions_for_attribute_error (PyAttributeErrorObject * exc ) {
172
- PyObject * name = exc -> name ; // borrowed reference
173
- PyObject * obj = exc -> obj ; // borrowed reference
115
+ static PyObject *
116
+ offer_suggestions_for_attribute_error (PyAttributeErrorObject * exc ) {
117
+ PyObject * name = exc -> name ; // borrowed reference
118
+ PyObject * obj = exc -> obj ; // borrowed reference
174
119
175
120
// Abort if we don't have an attribute name or we have an invalid one
176
121
if (name == NULL || obj == NULL || !PyUnicode_CheckExact (name )) {
177
122
return NULL ;
178
123
}
179
124
180
- PyObject * dir = PyObject_Dir (obj );
125
+ PyObject * dir = PyObject_Dir (obj );
181
126
if (dir == NULL ) {
182
127
return NULL ;
183
128
}
184
129
185
- PyObject * suggestions = calculate_suggestions (dir , name );
130
+ PyObject * suggestions = calculate_suggestions (dir , name );
186
131
Py_DECREF (dir );
187
132
return suggestions ;
188
133
}
189
134
190
-
191
135
// Offer suggestions for a given exception. Returns a python string object containing the
192
136
// suggestions. This function does not raise exceptions and returns NULL if no suggestion was found.
193
- PyObject * _Py_Offer_Suggestions (PyObject * exception ) {
194
- PyObject * result = NULL ;
137
+ PyObject * _Py_Offer_Suggestions (PyObject * exception ) {
138
+ PyObject * result = NULL ;
195
139
assert (!PyErr_Occurred ()); // Check that we are not going to clean any existing exception
196
140
if (PyErr_GivenExceptionMatches (exception , PyExc_AttributeError )) {
197
- result = offer_suggestions_for_attribute_error ((PyAttributeErrorObject * ) exception );
141
+ result = offer_suggestions_for_attribute_error ((PyAttributeErrorObject * ) exception );
198
142
}
199
143
assert (!PyErr_Occurred ());
200
144
return result ;
0 commit comments