Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Python/suggestions.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_frame.h"
3
4
#include "pycore_pyerrors.h"
5
#include "pycore_code.h"        // _PyCode_GetVarnames()
6
7
#define MAX_CANDIDATE_ITEMS 750
8
#define MAX_STRING_SIZE 40
9
10
#define MOVE_COST 2
11
#define CASE_COST 1
12
13
#define LEAST_FIVE_BITS(n) ((n) & 31)
14
15
static inline int
16
substitution_cost(char a, char b)
17
{
18
    if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
  Branch (18:9): [True: 23.3k, False: 1.46k]
19
        // Not the same, not a case flip.
20
        return MOVE_COST;
21
    }
22
    if (a == b) {
  Branch (22:9): [True: 874, False: 589]
23
        return 0;
24
    }
25
    if ('A' <= a && a <= 'Z') {
  Branch (25:9): [True: 589, False: 0]
  Branch (25:21): [True: 352, False: 237]
26
        a += ('a' - 'A');
27
    }
28
    if ('A' <= b && 
b <= 'Z'587
) {
  Branch (28:9): [True: 587, False: 2]
  Branch (28:21): [True: 235, False: 352]
29
        b += ('a' - 'A');
30
    }
31
    if (a == b) {
  Branch (31:9): [True: 587, False: 2]
32
        return CASE_COST;
33
    }
34
    return MOVE_COST;
35
}
36
37
/* Calculate the Levenshtein distance between string1 and string2 */
38
static Py_ssize_t
39
levenshtein_distance(const char *a, size_t a_size,
40
                     const char *b, size_t b_size,
41
                     size_t max_cost)
42
{
43
    static size_t buffer[MAX_STRING_SIZE];
44
45
    // Both strings are the same (by identity)
46
    if (a == b) {
  Branch (46:9): [True: 2, False: 3.45k]
47
        return 0;
48
    }
49
50
    // Trim away common affixes.
51
    
while (3.45k
a_size &&
b_size3.84k
&&
a[0] == b[0]3.83k
) {
  Branch (51:12): [True: 3.84k, False: 38]
  Branch (51:22): [True: 3.83k, False: 4]
  Branch (51:32): [True: 425, False: 3.41k]
52
        a++; a_size--;
53
        b++; b_size--;
54
    }
55
    while (a_size && 
b_size3.58k
&&
a[a_size-1] == b[b_size-1]3.57k
) {
  Branch (55:12): [True: 3.58k, False: 41]
  Branch (55:22): [True: 3.57k, False: 14]
  Branch (55:32): [True: 175, False: 3.39k]
56
        a_size--;
57
        b_size--;
58
    }
59
    if (a_size == 0 || 
b_size == 03.41k
) {
  Branch (59:9): [True: 41, False: 3.41k]
  Branch (59:24): [True: 14, False: 3.39k]
60
        return (a_size + b_size) * MOVE_COST;
61
    }
62
    if (a_size > MAX_STRING_SIZE || 
b_size > 3.18k
MAX_STRING_SIZE3.18k
) {
  Branch (62:9): [True: 211, False: 3.18k]
  Branch (62:37): [True: 0, False: 3.18k]
63
        return max_cost + 1;
64
    }
65
66
    // Prefer shorter buffer
67
    if (b_size < a_size) {
  Branch (67:9): [True: 684, False: 2.50k]
68
        const char *t = a; a = b; b = t;
69
        size_t t_size = a_size; a_size = b_size; b_size = t_size;
70
    }
71
72
    // quick fail when a match is impossible.
73
    if ((b_size - a_size) * MOVE_COST > max_cost) {
  Branch (73:9): [True: 2.64k, False: 544]
74
        return max_cost + 1;
75
    }
76
77
    // Instead of producing the whole traditional len(a)-by-len(b)
78
    // matrix, we can update just one row in place.
79
    // Initialize the buffer row
80
    size_t tmp = MOVE_COST;
81
    for (size_t i = 0; i < a_size; 
i++3.94k
) {
  Branch (81:24): [True: 3.94k, False: 544]
82
        // cost from b[:0] to a[:i+1]
83
        buffer[i] = tmp;
84
        tmp += MOVE_COST;
85
    }
86
87
    size_t result = 0;
88
    for (size_t b_index = 0; b_index < b_size; 
b_index++1.69k
) {
  Branch (88:30): [True: 2.18k, False: 46]
89
        char code = b[b_index];
90
        // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
91
        size_t distance = result = b_index * MOVE_COST;
92
        size_t minimum = SIZE_MAX;
93
        for (size_t index = 0; index < a_size; 
index++24.8k
) {
  Branch (93:32): [True: 24.8k, False: 2.18k]
94
95
            // cost(b[:b_index+1], a[:index+1]) = min(
96
            //     // 1) substitute
97
            //     cost(b[:b_index], a[:index])
98
            //         + substitution_cost(b[b_index], a[index]),
99
            //     // 2) delete from b
100
            //     cost(b[:b_index], a[:index+1]) + MOVE_COST,
101
            //     // 3) delete from a
102
            //     cost(b[:b_index+1], a[index]) + MOVE_COST
103
            // )
104
105
            // 1) Previous distance in this row is cost(b[:b_index], a[:index])
106
            size_t substitute = distance + substitution_cost(code, a[index]);
107
            // 2) cost(b[:b_index], a[:index+1]) from previous row
108
            distance = buffer[index];
109
            // 3) existing result is cost(b[:b_index+1], a[index])
110
111
            size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
112
            result = Py_MIN(insert_delete, substitute);
113
114
            // cost(b[:b_index+1], a[:index+1])
115
            buffer[index] = result;
116
            if (result < minimum) {
  Branch (116:17): [True: 3.45k, False: 21.3k]
117
                minimum = result;
118
            }
119
        }
120
        if (minimum > max_cost) {
  Branch (120:13): [True: 498, False: 1.69k]
121
            // Everything in this row is too big, so bail early.
122
            return max_cost + 1;
123
        }
124
    }
125
    return result;
126
}
127
128
static inline PyObject *
129
calculate_suggestions(PyObject *dir,
130
                      PyObject *name)
131
{
132
    assert(!PyErr_Occurred());
133
    assert(PyList_CheckExact(dir));
134
135
    Py_ssize_t dir_size = PyList_GET_SIZE(dir);
136
    if (dir_size >= MAX_CANDIDATE_ITEMS) {
  Branch (136:9): [True: 2, False: 67]
137
        return NULL;
138
    }
139
140
    Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
141
    PyObject *suggestion = NULL;
142
    Py_ssize_t name_size;
143
    const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
144
    if (name_str == NULL) {
  Branch (144:9): [True: 0, False: 67]
145
        return NULL;
146
    }
147
148
    
for (int i = 0; 67
i < dir_size;
++i3.36k
) {
  Branch (148:21): [True: 3.36k, False: 67]
149
        PyObject *item = PyList_GET_ITEM(dir, i);
150
        Py_ssize_t item_size;
151
        const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
152
        if (item_str == NULL) {
  Branch (152:13): [True: 0, False: 3.36k]
153
            return NULL;
154
        }
155
        if (PyUnicode_CompareWithASCIIString(name, item_str) == 0) {
  Branch (155:13): [True: 1, False: 3.36k]
156
            continue;
157
        }
158
        // No more than 1/3 of the involved characters should need changed.
159
        Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
160
        // Don't take matches we've already beaten.
161
        max_distance = Py_MIN(max_distance, suggestion_distance - 1);
162
        Py_ssize_t current_distance =
163
            levenshtein_distance(name_str, name_size,
164
                                 item_str, item_size, max_distance);
165
        if (current_distance > max_distance) {
  Branch (165:13): [True: 3.33k, False: 27]
166
            continue;
167
        }
168
        if (!suggestion || 
current_distance < suggestion_distance0
) {
  Branch (168:13): [True: 27, False: 0]
  Branch (168:28): [True: 0, False: 0]
169
            suggestion = item;
170
            suggestion_distance = current_distance;
171
        }
172
    }
173
    Py_XINCREF(suggestion);
174
    return suggestion;
175
}
176
177
static PyObject *
178
offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
179
{
180
    PyObject *name = exc->name; // borrowed reference
181
    PyObject *obj = exc->obj; // borrowed reference
182
183
    // Abort if we don't have an attribute name or we have an invalid one
184
    if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
  Branch (184:9): [True: 0, False: 24]
  Branch (184:25): [True: 0, False: 24]
  Branch (184:40): [True: 1, False: 23]
185
        return NULL;
186
    }
187
188
    PyObject *dir = PyObject_Dir(obj);
189
    if (dir == NULL) {
  Branch (189:9): [True: 1, False: 22]
190
        return NULL;
191
    }
192
193
    PyObject *suggestions = calculate_suggestions(dir, name);
194
    Py_DECREF(dir);
195
    return suggestions;
196
}
197
198
199
static PyObject *
200
offer_suggestions_for_name_error(PyNameErrorObject *exc)
201
{
202
    PyObject *name = exc->name; // borrowed reference
203
    PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
204
    // Abort if we don't have a variable name or we have an invalid one
205
    // or if we don't have a traceback to work with
206
    if (name == NULL || 
!22
PyUnicode_CheckExact(name) ||
  Branch (206:9): [True: 2, False: 22]
  Branch (206:25): [True: 0, False: 22]
207
        
traceback == NULL22
||
!22
Py_IS_TYPE22
(traceback, &PyTraceBack_Type)
  Branch (207:9): [True: 0, False: 22]
  Branch (207:30): [True: 2, False: 20]
208
    ) {
209
        return NULL;
210
    }
211
212
    // Move to the traceback of the exception
213
    
while (20
1) {
  Branch (213:12): [Folded - Ignored]
214
        PyTracebackObject *next = traceback->tb_next;
215
        if (next == NULL || 
!16
Py_IS_TYPE16
(next, &PyTraceBack_Type)) {
  Branch (215:13): [True: 20, False: 16]
  Branch (215:29): [True: 0, False: 16]
216
            break;
217
        }
218
        else {
219
            traceback = next;
220
        }
221
    }
222
223
    PyFrameObject *frame = traceback->tb_frame;
224
    assert(frame != NULL);
225
    PyCodeObject *code = PyFrame_GetCode(frame);
226
    assert(code != NULL && code->co_localsplusnames != NULL);
227
    PyObject *varnames = _PyCode_GetVarnames(code);
228
    if (varnames == NULL) {
  Branch (228:9): [True: 0, False: 20]
229
        return NULL;
230
    }
231
    PyObject *dir = PySequence_List(varnames);
232
    Py_DECREF(varnames);
233
    Py_DECREF(code);
234
    if (dir == NULL) {
  Branch (234:9): [True: 0, False: 20]
235
        return NULL;
236
    }
237
238
    PyObject *suggestions = calculate_suggestions(dir, name);
239
    Py_DECREF(dir);
240
    if (suggestions != NULL) {
  Branch (240:9): [True: 6, False: 14]
241
        return suggestions;
242
    }
243
244
    dir = PySequence_List(frame->f_frame->f_globals);
245
    if (dir == NULL) {
  Branch (245:9): [True: 0, False: 14]
246
        return NULL;
247
    }
248
    suggestions = calculate_suggestions(dir, name);
249
    Py_DECREF(dir);
250
    if (suggestions != NULL) {
  Branch (250:9): [True: 1, False: 13]
251
        return suggestions;
252
    }
253
254
    dir = PySequence_List(frame->f_frame->f_builtins);
255
    if (dir == NULL) {
  Branch (255:9): [True: 0, False: 13]
256
        return NULL;
257
    }
258
    suggestions = calculate_suggestions(dir, name);
259
    Py_DECREF(dir);
260
261
    return suggestions;
262
}
263
264
// Offer suggestions for a given exception. Returns a python string object containing the
265
// suggestions. This function returns NULL if no suggestion was found or if an exception happened,
266
// users must call PyErr_Occurred() to disambiguate.
267
PyObject *
268
_Py_Offer_Suggestions(PyObject *exception)
269
{
270
    PyObject *result = NULL;
271
    assert(!PyErr_Occurred());
272
    if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
273
        result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
274
    } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
275
        result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
276
    }
277
    return result;
278
}
279
280
Py_ssize_t
281
_Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
282
{
283
    assert(PyUnicode_Check(a) && PyUnicode_Check(b));
284
    Py_ssize_t size_a, size_b;
285
    const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
286
    if (utf8_a == NULL) {
  Branch (286:9): [True: 0, False: 90]
287
        return -1;
288
    }
289
    const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
290
    if (utf8_b == NULL) {
  Branch (290:9): [True: 0, False: 90]
291
        return -1;
292
    }
293
    if (max_cost == -1) {
  Branch (293:9): [True: 19, False: 71]
294
        max_cost = MOVE_COST * Py_MAX(size_a, size_b);
295
    }
296
    return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost);
297
}
298