LCOV - code coverage report
Current view: top level - Python - suggestions.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 139 147 94.6 %
Date: 2022-07-07 18:19:46 Functions: 7 7 100.0 %

          Line data    Source code
       1             : #include "Python.h"
       2             : #include "pycore_frame.h"
       3             : 
       4             : #include "pycore_pyerrors.h"
       5             : #include "pycore_code.h"        // _PyCode_GetVarnames()
       6             : 
       7             : #define MAX_CANDIDATE_ITEMS 750
       8             : #define MAX_STRING_SIZE 40
       9             : 
      10             : #define MOVE_COST 2
      11             : #define CASE_COST 1
      12             : 
      13             : #define LEAST_FIVE_BITS(n) ((n) & 31)
      14             : 
      15             : static inline int
      16       30181 : substitution_cost(char a, char b)
      17             : {
      18       30181 :     if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) {
      19             :         // Not the same, not a case flip.
      20       28426 :         return MOVE_COST;
      21             :     }
      22        1755 :     if (a == b) {
      23        1131 :         return 0;
      24             :     }
      25         624 :     if ('A' <= a && a <= 'Z') {
      26         367 :         a += ('a' - 'A');
      27             :     }
      28         624 :     if ('A' <= b && b <= 'Z') {
      29         244 :         b += ('a' - 'A');
      30             :     }
      31         624 :     if (a == b) {
      32         610 :         return CASE_COST;
      33             :     }
      34          14 :     return MOVE_COST;
      35             : }
      36             : 
      37             : /* Calculate the Levenshtein distance between string1 and string2 */
      38             : static Py_ssize_t
      39        4173 : levenshtein_distance(const char *a, size_t a_size,
      40             :                      const char *b, size_t b_size,
      41             :                      size_t max_cost)
      42             : {
      43             :     static size_t buffer[MAX_STRING_SIZE];
      44             : 
      45             :     // Both strings are the same (by identity)
      46        4173 :     if (a == b) {
      47           2 :         return 0;
      48             :     }
      49             : 
      50             :     // Trim away common affixes.
      51        4797 :     while (a_size && b_size && a[0] == b[0]) {
      52         626 :         a++; a_size--;
      53         626 :         b++; b_size--;
      54             :     }
      55        4388 :     while (a_size && b_size && a[a_size-1] == b[b_size-1]) {
      56         217 :         a_size--;
      57         217 :         b_size--;
      58             :     }
      59        4171 :     if (a_size == 0 || b_size == 0) {
      60          56 :         return (a_size + b_size) * MOVE_COST;
      61             :     }
      62        4115 :     if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) {
      63         209 :         return max_cost + 1;
      64             :     }
      65             : 
      66             :     // Prefer shorter buffer
      67        3906 :     if (b_size < a_size) {
      68         808 :         const char *t = a; a = b; b = t;
      69         808 :         size_t t_size = a_size; a_size = b_size; b_size = t_size;
      70             :     }
      71             : 
      72             :     // quick fail when a match is impossible.
      73        3906 :     if ((b_size - a_size) * MOVE_COST > max_cost) {
      74        3126 :         return max_cost + 1;
      75             :     }
      76             : 
      77             :     // Instead of producing the whole traditional len(a)-by-len(b)
      78             :     // matrix, we can update just one row in place.
      79             :     // Initialize the buffer row
      80         780 :     size_t tmp = MOVE_COST;
      81        6107 :     for (size_t i = 0; i < a_size; i++) {
      82             :         // cost from b[:0] to a[:i+1]
      83        5327 :         buffer[i] = tmp;
      84        5327 :         tmp += MOVE_COST;
      85             :     }
      86             : 
      87         780 :     size_t result = 0;
      88        3039 :     for (size_t b_index = 0; b_index < b_size; b_index++) {
      89        2991 :         char code = b[b_index];
      90             :         // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
      91        2991 :         size_t distance = result = b_index * MOVE_COST;
      92        2991 :         size_t minimum = SIZE_MAX;
      93       33172 :         for (size_t index = 0; index < a_size; index++) {
      94             : 
      95             :             // cost(b[:b_index+1], a[:index+1]) = min(
      96             :             //     // 1) substitute
      97             :             //     cost(b[:b_index], a[:index])
      98             :             //         + substitution_cost(b[b_index], a[index]),
      99             :             //     // 2) delete from b
     100             :             //     cost(b[:b_index], a[:index+1]) + MOVE_COST,
     101             :             //     // 3) delete from a
     102             :             //     cost(b[:b_index+1], a[index]) + MOVE_COST
     103             :             // )
     104             : 
     105             :             // 1) Previous distance in this row is cost(b[:b_index], a[:index])
     106       30181 :             size_t substitute = distance + substitution_cost(code, a[index]);
     107             :             // 2) cost(b[:b_index], a[:index+1]) from previous row
     108       30181 :             distance = buffer[index];
     109             :             // 3) existing result is cost(b[:b_index+1], a[index])
     110             : 
     111       30181 :             size_t insert_delete = Py_MIN(result, distance) + MOVE_COST;
     112       30181 :             result = Py_MIN(insert_delete, substitute);
     113             : 
     114             :             // cost(b[:b_index+1], a[:index+1])
     115       30181 :             buffer[index] = result;
     116       30181 :             if (result < minimum) {
     117        4415 :                 minimum = result;
     118             :             }
     119             :         }
     120        2991 :         if (minimum > max_cost) {
     121             :             // Everything in this row is too big, so bail early.
     122         732 :             return max_cost + 1;
     123             :         }
     124             :     }
     125          48 :     return result;
     126             : }
     127             : 
     128             : static inline PyObject *
     129          79 : calculate_suggestions(PyObject *dir,
     130             :                       PyObject *name)
     131             : {
     132          79 :     assert(!PyErr_Occurred());
     133          79 :     assert(PyList_CheckExact(dir));
     134             : 
     135          79 :     Py_ssize_t dir_size = PyList_GET_SIZE(dir);
     136          79 :     if (dir_size >= MAX_CANDIDATE_ITEMS) {
     137           2 :         return NULL;
     138             :     }
     139             : 
     140          77 :     Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX;
     141          77 :     PyObject *suggestion = NULL;
     142             :     Py_ssize_t name_size;
     143          77 :     const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size);
     144          77 :     if (name_str == NULL) {
     145           0 :         return NULL;
     146             :     }
     147             : 
     148        4161 :     for (int i = 0; i < dir_size; ++i) {
     149        4084 :         PyObject *item = PyList_GET_ITEM(dir, i);
     150             :         Py_ssize_t item_size;
     151        4084 :         const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size);
     152        4084 :         if (item_str == NULL) {
     153           0 :             return NULL;
     154             :         }
     155        4084 :         if (PyUnicode_CompareWithASCIIString(name, item_str) == 0) {
     156        4055 :             continue;
     157             :         }
     158             :         // No more than 1/3 of the involved characters should need changed.
     159        4083 :         Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6;
     160             :         // Don't take matches we've already beaten.
     161        4083 :         max_distance = Py_MIN(max_distance, suggestion_distance - 1);
     162             :         Py_ssize_t current_distance =
     163        4083 :             levenshtein_distance(name_str, name_size,
     164             :                                  item_str, item_size, max_distance);
     165        4083 :         if (current_distance > max_distance) {
     166        4054 :             continue;
     167             :         }
     168          29 :         if (!suggestion || current_distance < suggestion_distance) {
     169          29 :             suggestion = item;
     170          29 :             suggestion_distance = current_distance;
     171             :         }
     172             :     }
     173          77 :     Py_XINCREF(suggestion);
     174          77 :     return suggestion;
     175             : }
     176             : 
     177             : static PyObject *
     178          26 : offer_suggestions_for_attribute_error(PyAttributeErrorObject *exc)
     179             : {
     180          26 :     PyObject *name = exc->name; // borrowed reference
     181          26 :     PyObject *obj = exc->obj; // borrowed reference
     182             : 
     183             :     // Abort if we don't have an attribute name or we have an invalid one
     184          26 :     if (name == NULL || obj == NULL || !PyUnicode_CheckExact(name)) {
     185           2 :         return NULL;
     186             :     }
     187             : 
     188          24 :     PyObject *dir = PyObject_Dir(obj);
     189          24 :     if (dir == NULL) {
     190           1 :         return NULL;
     191             :     }
     192             : 
     193          23 :     PyObject *suggestions = calculate_suggestions(dir, name);
     194          23 :     Py_DECREF(dir);
     195          23 :     return suggestions;
     196             : }
     197             : 
     198             : 
     199             : static PyObject *
     200          28 : offer_suggestions_for_name_error(PyNameErrorObject *exc)
     201             : {
     202          28 :     PyObject *name = exc->name; // borrowed reference
     203          28 :     PyTracebackObject *traceback = (PyTracebackObject *) exc->traceback; // borrowed reference
     204             :     // Abort if we don't have a variable name or we have an invalid one
     205             :     // or if we don't have a traceback to work with
     206          28 :     if (name == NULL || !PyUnicode_CheckExact(name) ||
     207          25 :         traceback == NULL || !Py_IS_TYPE(traceback, &PyTraceBack_Type)
     208             :     ) {
     209           5 :         return NULL;
     210             :     }
     211             : 
     212             :     // Move to the traceback of the exception
     213          17 :     while (1) {
     214          40 :         PyTracebackObject *next = traceback->tb_next;
     215          40 :         if (next == NULL || !Py_IS_TYPE(next, &PyTraceBack_Type)) {
     216             :             break;
     217             :         }
     218             :         else {
     219          17 :             traceback = next;
     220             :         }
     221             :     }
     222             : 
     223          23 :     PyFrameObject *frame = traceback->tb_frame;
     224          23 :     assert(frame != NULL);
     225          23 :     PyCodeObject *code = PyFrame_GetCode(frame);
     226          23 :     assert(code != NULL && code->co_localsplusnames != NULL);
     227          23 :     PyObject *varnames = _PyCode_GetVarnames(code);
     228          23 :     if (varnames == NULL) {
     229           0 :         return NULL;
     230             :     }
     231          23 :     PyObject *dir = PySequence_List(varnames);
     232          23 :     Py_DECREF(varnames);
     233          23 :     Py_DECREF(code);
     234          23 :     if (dir == NULL) {
     235           0 :         return NULL;
     236             :     }
     237             : 
     238          23 :     PyObject *suggestions = calculate_suggestions(dir, name);
     239          23 :     Py_DECREF(dir);
     240          23 :     if (suggestions != NULL) {
     241           6 :         return suggestions;
     242             :     }
     243             : 
     244          17 :     dir = PySequence_List(frame->f_frame->f_globals);
     245          17 :     if (dir == NULL) {
     246           0 :         return NULL;
     247             :     }
     248          17 :     suggestions = calculate_suggestions(dir, name);
     249          17 :     Py_DECREF(dir);
     250          17 :     if (suggestions != NULL) {
     251           1 :         return suggestions;
     252             :     }
     253             : 
     254          16 :     dir = PySequence_List(frame->f_frame->f_builtins);
     255          16 :     if (dir == NULL) {
     256           0 :         return NULL;
     257             :     }
     258          16 :     suggestions = calculate_suggestions(dir, name);
     259          16 :     Py_DECREF(dir);
     260             : 
     261          16 :     return suggestions;
     262             : }
     263             : 
     264             : // Offer suggestions for a given exception. Returns a python string object containing the
     265             : // suggestions. This function returns NULL if no suggestion was found or if an exception happened,
     266             : // users must call PyErr_Occurred() to disambiguate.
     267             : PyObject *
     268         444 : _Py_Offer_Suggestions(PyObject *exception)
     269             : {
     270         444 :     PyObject *result = NULL;
     271         444 :     assert(!PyErr_Occurred());
     272         444 :     if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_AttributeError)) {
     273          26 :         result = offer_suggestions_for_attribute_error((PyAttributeErrorObject *) exception);
     274         418 :     } else if (Py_IS_TYPE(exception, (PyTypeObject*)PyExc_NameError)) {
     275          28 :         result = offer_suggestions_for_name_error((PyNameErrorObject *) exception);
     276             :     }
     277         444 :     return result;
     278             : }
     279             : 
     280             : Py_ssize_t
     281          90 : _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost)
     282             : {
     283          90 :     assert(PyUnicode_Check(a) && PyUnicode_Check(b));
     284             :     Py_ssize_t size_a, size_b;
     285          90 :     const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a);
     286          90 :     if (utf8_a == NULL) {
     287           0 :         return -1;
     288             :     }
     289          90 :     const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b);
     290          90 :     if (utf8_b == NULL) {
     291           0 :         return -1;
     292             :     }
     293          90 :     if (max_cost == -1) {
     294          19 :         max_cost = MOVE_COST * Py_MAX(size_a, size_b);
     295             :     }
     296          90 :     return levenshtein_distance(utf8_a, size_a, utf8_b, size_b, max_cost);
     297             : }
     298             : 

Generated by: LCOV version 1.14