/home/mdboom/Work/builds/cpython/Python/suggestions.c
Line | Count | Source (jump to first uncovered line) |
1 | #include "Python.h" |
2 | #include "pycore_frame.h" |
3 | |
4 | #include "pycore_pyerrors.h" |
5 | #include "pycore_code.h" // _PyCode_GetVarnames() |
6 | |
7 | #define MAX_CANDIDATE_ITEMS 750 |
8 | #define MAX_STRING_SIZE 40 |
9 | |
10 | #define MOVE_COST 2 |
11 | #define CASE_COST 1 |
12 | |
13 | #define LEAST_FIVE_BITS(n) ((n) & 31) |
14 | |
15 | static inline int |
16 | substitution_cost(char a, char b) |
17 | { |
18 | if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) { Branch (18:9): [True: 23.3k, False: 1.46k]
|
19 | // Not the same, not a case flip. |
20 | return MOVE_COST; |
21 | } |
22 | if (a == b) { Branch (22:9): [True: 874, False: 589]
|
23 | return 0; |
24 | } |
25 | if ('A' <= a && a <= 'Z') { Branch (25:9): [True: 589, False: 0]
Branch (25:21): [True: 352, False: 237]
|
26 | a += ('a' - 'A'); |
27 | } |
28 | if ('A' <= b && b <= 'Z'587 ) { Branch (28:9): [True: 587, False: 2]
Branch (28:21): [True: 235, False: 352]
|
29 | b += ('a' - 'A'); |
30 | } |
31 | if (a == b) { Branch (31:9): [True: 587, False: 2]
|
32 | return CASE_COST; |
33 | } |
34 | return MOVE_COST; |
35 | } |
36 | |
37 | /* Calculate the Levenshtein distance between string1 and string2 */ |
38 | static Py_ssize_t |
39 | levenshtein_distance(const char *a, size_t a_size, |
40 | const char *b, size_t b_size, |
41 | size_t max_cost) |
42 | { |
43 | static size_t buffer[MAX_STRING_SIZE]; |
44 | |
45 | // Both strings are the same (by identity) |
46 | if (a == b) { Branch (46:9): [True: 2, False: 3.45k]
|
47 | return 0; |
48 | } |
49 | |
50 | // Trim away common affixes. |
51 | while (3.45k a_size && b_size3.84k && a[0] == b[0]3.83k ) { Branch (51:12): [True: 3.84k, False: 38]
Branch (51:22): [True: 3.83k, False: 4]
Branch (51:32): [True: 425, False: 3.41k]
|
52 | a++; a_size--; |
53 | b++; b_size--; |
54 | } |
55 | while (a_size && b_size3.58k && a[a_size-1] == b[b_size-1]3.57k ) { Branch (55:12): [True: 3.58k, False: 41]
Branch (55:22): [True: 3.57k, False: 14]
Branch (55:32): [True: 175, False: 3.39k]
|
56 | a_size--; |
57 | b_size--; |
58 | } |
59 | if (a_size == 0 || b_size == 03.41k ) { Branch (59:9): [True: 41, False: 3.41k]
Branch (59:24): [True: 14, False: 3.39k]
|
60 | return (a_size + b_size) * MOVE_COST; |
61 | } |
62 | if (a_size > MAX_STRING_SIZE || b_size > 3.18k MAX_STRING_SIZE3.18k ) { Branch (62:9): [True: 211, False: 3.18k]
Branch (62:37): [True: 0, False: 3.18k]
|
63 | return max_cost + 1; |
64 | } |
65 | |
66 | // Prefer shorter buffer |
67 | if (b_size < a_size) { Branch (67:9): [True: 684, False: 2.50k]
|
68 | const char *t = a; a = b; b = t; |
69 | size_t t_size = a_size; a_size = b_size; b_size = t_size; |
70 | } |
71 | |
72 | // quick fail when a match is impossible. |
73 | if ((b_size - a_size) * MOVE_COST > max_cost) { Branch (73:9): [True: 2.64k, False: 544]
|
74 | return max_cost + 1; |
75 | } |
76 | |
77 | // Instead of producing the whole traditional len(a)-by-len(b) |
78 | // matrix, we can update just one row in place. |
79 | // Initialize the buffer row |
80 | size_t tmp = MOVE_COST; |
81 | for (size_t i = 0; i < a_size; i++3.94k ) { Branch (81:24): [True: 3.94k, False: 544]
|
82 | // cost from b[:0] to a[:i+1] |
83 | buffer[i] = tmp; |
84 | tmp += MOVE_COST; |
85 | } |
86 | |
87 | size_t result = 0; |
88 | for (size_t b_index = 0; b_index < b_size; b_index++1.69k ) { Branch (88:30): [True: 2.18k, False: 46]
|
89 | char code = b[b_index]; |
90 | // cost(b[:b_index], a[:0]) == b_index * MOVE_COST |
91 | size_t distance = result = b_index * MOVE_COST; |
92 | size_t minimum = SIZE_MAX; |
93 | for (size_t index = 0; index < a_size; index++24.8k ) { Branch (93:32): [True: 24.8k, False: 2.18k]
|
94 | |
95 | // cost(b[:b_index+1], a[:index+1]) = min( |
96 | // // 1) substitute |
97 | // cost(b[:b_index], a[:index]) |
98 | // + substitution_cost(b[b_index], a[index]), |
99 | // // 2) delete from b |
100 | // cost(b[:b_index], a[:index+1]) + MOVE_COST, |
101 | // // 3) delete from a |
102 | // cost(b[:b_index+1], a[index]) + MOVE_COST |
103 | // ) |
104 | |
105 | // 1) Previous distance in this row is cost(b[:b_index], a[:index]) |
106 | size_t substitute = distance + substitution_cost(code, a[index]); |
107 | // 2) cost(b[:b_index], a[:index+1]) from previous row |
108 | distance = buffer[index]; |
109 | // 3) existing result is cost(b[:b_index+1], a[index]) |
110 | |
111 | size_t insert_delete = Py_MIN(result, distance) + MOVE_COST; |
112 | result = Py_MIN(insert_delete, substitute); |
113 | |
114 | // cost(b[:b_index+1], a[:index+1]) |
115 | buffer[index] = result; |
116 | if (result < minimum) { Branch (116:17): [True: 3.45k, False: 21.3k]
|
117 | minimum = result; |
118 | } |
119 | } |
120 | if (minimum > max_cost) { Branch (120:13): [True: 498, False: 1.69k]
|
121 | // Everything in this row is too big, so bail early. |
122 | return max_cost + 1; |
123 | } |
124 | } |
125 | return result; |
126 | } |
127 | |
128 | static inline PyObject * |
129 | calculate_suggestions(PyObject *dir, |
130 | PyObject *name) |
131 | { |
132 | assert(!PyErr_Occurred()); |
133 | assert(PyList_CheckExact(dir)); |
134 | |
135 | Py_ssize_t dir_size = PyList_GET_SIZE(dir); |
136 | if (dir_size >= MAX_CANDIDATE_ITEMS) { Branch (136:9): [True: 2, False: 67]
|
137 | return NULL; |
138 | } |
139 | |
140 | Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX; |
141 | PyObject *suggestion = NULL; |
142 | Py_ssize_t name_size; |
143 | const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size); |
144 | if (name_str == NULL) { Branch (144:9): [True: 0, False: 67]
|
145 | return NULL; |
146 | } |
147 | |
148 | for (int i = 0; 67 i < dir_size; ++i3.36k ) { Branch (148:21): [True: 3.36k, False: 67]
|
149 | PyObject *item = PyList_GET_ITEM(dir, i); |
150 | Py_ssize_t item_size; |
151 | const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size); |
152 | if (item_str == NULL) { Branch (152:13): [True: 0, False: 3.36k]
|
153 | return NULL; |
154 | } |
155 | if (PyUnicode_CompareWithASCIIString(name, item_str) == 0) { Branch (155:13): [True: 1, False: 3.36k]
|
156 | continue; |
157 | } |
158 | // No more than 1/3 of the involved characters should need changed. |
159 | Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; |
160 | // Don't take matches we've already beaten. |
161 | max_distance = Py_MIN(max_distance, suggestion_distance - 1); |
162 | Py_ssize_t current_distance = |
163 | levenshtein_distance(name_str, name_size, |
164 | item_str, item_size, max_distance); |
165 | if (current_distance > max_distance) { Branch (165:13): [True: 3.33k, False: 27]
|
166 | continue; |
167 | } |
168 | if (!suggestion || current_distance < suggestion_distance0 ) { Branch (168:13): [True: 27, False: 0]
Branch (168:28): [True: 0, False: 0]
|
169 | suggestion = item; |
170 | suggestion_distance = current_distance; |
171 | } |
172 | } |
173 | Py_XINCREF(suggestion); |
174 | return suggestion; |
175 | } |
176 | |
177 | static PyObject * |
178 | offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc) |
179 | { |
180 | PyObject *name = exc->name; // borrowed reference |
181 | PyObject *obj = exc->obj; // borrowed reference |
182 | |
183 | // Abort if we don't have an attribute name or we have an invalid one |
184 | if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) { Branch (184:9): [True: 0, False: 24]
Branch (184:25): [True: 0, False: 24]
Branch (184:40): [True: 1, False: 23]
|
185 | return NULL; |
186 | } |
187 | |
188 | PyObject *dir = PyObject_Dir(obj); |
189 | if (dir == NULL) { Branch (189:9): [True: 1, False: 22]
|
190 | return NULL; |
191 | } |
192 | |
193 | PyObject *suggestions = calculate_suggestions(dir, name); |
194 | Py_DECREF(dir); |
195 | return suggestions; |
196 | } |
197 | |
198 | |
199 | static PyObject * |
200 | offer_suggestions_for_name_error(PyNameErrorObject *exc) |
201 | { |
202 | PyObject *name = exc->name; // borrowed reference |
203 | PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference |
204 | // Abort if we don't have a variable name or we have an invalid one |
205 | // or if we don't have a traceback to work with |
206 | if (name == NULL || !22 PyUnicode_CheckExact(name) || Branch (206:9): [True: 2, False: 22]
Branch (206:25): [True: 0, False: 22]
|
207 | traceback == NULL22 || !22 Py_IS_TYPE22 (traceback, &PyTraceBack_Type) Branch (207:9): [True: 0, False: 22]
Branch (207:30): [True: 2, False: 20]
|
208 | ) { |
209 | return NULL; |
210 | } |
211 | |
212 | // Move to the traceback of the exception |
213 | while (20 1) { Branch (213:12): [Folded - Ignored]
|
214 | PyTracebackObject *next = traceback->tb_next; |
215 | if (next == NULL || !16 Py_IS_TYPE16 (next, &PyTraceBack_Type)) { Branch (215:13): [True: 20, False: 16]
Branch (215:29): [True: 0, False: 16]
|
216 | break; |
217 | } |
218 | else { |
219 | traceback = next; |
220 | } |
221 | } |
222 | |
223 | PyFrameObject *frame = traceback->tb_frame; |
224 | assert(frame != NULL); |
225 | PyCodeObject *code = PyFrame_GetCode(frame); |
226 | assert(code != NULL && code->co_localsplusnames != NULL); |
227 | PyObject *varnames = _PyCode_GetVarnames(code); |
228 | if (varnames == NULL) { Branch (228:9): [True: 0, False: 20]
|
229 | return NULL; |
230 | } |
231 | PyObject *dir = PySequence_List(varnames); |
232 | Py_DECREF(varnames); |
233 | Py_DECREF(code); |
234 | if (dir == NULL) { Branch (234:9): [True: 0, False: 20]
|
235 | return NULL; |
236 | } |
237 | |
238 | PyObject *suggestions = calculate_suggestions(dir, name); |
239 | Py_DECREF(dir); |
240 | if (suggestions != NULL) { Branch (240:9): [True: 6, False: 14]
|
241 | return suggestions; |
242 | } |
243 | |
244 | dir = PySequence_List(frame->f_frame->f_globals); |
245 | if (dir == NULL) { Branch (245:9): [True: 0, False: 14]
|
246 | return NULL; |
247 | } |
248 | suggestions = calculate_suggestions(dir, name); |
249 | Py_DECREF(dir); |
250 | if (suggestions != NULL) { Branch (250:9): [True: 1, False: 13]
|
251 | return suggestions; |
252 | } |
253 | |
254 | dir = PySequence_List(frame->f_frame->f_builtins); |
255 | if (dir == NULL) { Branch (255:9): [True: 0, False: 13]
|
256 | return NULL; |
257 | } |
258 | suggestions = calculate_suggestions(dir, name); |
259 | Py_DECREF(dir); |
260 | |
261 | return suggestions; |
262 | } |
263 | |
264 | // Offer suggestions for a given exception. Returns a python string object containing the |
265 | // suggestions. This function returns NULL if no suggestion was found or if an exception happened, |
266 | // users must call PyErr_Occurred() to disambiguate. |
267 | PyObject * |
268 | _Py_Offer_Suggestions(PyObject *exception) |
269 | { |
270 | PyObject *result = NULL; |
271 | assert(!PyErr_Occurred()); |
272 | if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) { |
273 | result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception); |
274 | } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) { |
275 | result = offer_suggestions_for_name_error((PyNameErrorObject *) exception); |
276 | } |
277 | return result; |
278 | } |
279 | |
280 | Py_ssize_t |
281 | _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost) |
282 | { |
283 | assert(PyUnicode_Check(a) && PyUnicode_Check(b)); |
284 | Py_ssize_t size_a, size_b; |
285 | const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a); |
286 | if (utf8_a == NULL) { Branch (286:9): [True: 0, False: 90]
|
287 | return -1; |
288 | } |
289 | const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b); |
290 | if (utf8_b == NULL) { Branch (290:9): [True: 0, False: 90]
|
291 | return -1; |
292 | } |
293 | if (max_cost == -1) { Branch (293:9): [True: 19, False: 71]
|
294 | max_cost = MOVE_COST * Py_MAX(size_a, size_b); |
295 | } |
296 | return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost); |
297 | } |
298 | |