/home/mdboom/Work/builds/cpython/Parser/pegen_errors.c

Source (jump to first uncovered line)
#include <Python.h>
#include <errcode.h>

#include "tokenizer.h"
#include "pegen.h"

// TOKENIZER ERRORS

void
_PyPegen_raise_tokenizer_init_error(PyObject *filename)
{
    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
          || PyErr_ExceptionMatches(PyExc_SyntaxError)21
          || PyErr_ExceptionMatches(PyExc_ValueError)1
          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)0)) {
        return;
    }
    PyObject *errstr = NULL;
    PyObject *tuple = NULL;
    PyObject *type;
    PyObject *value;
    PyObject *tback;
    PyErr_Fetch(&type, &value, &tback);
    errstr = PyObject_Str(value);
    if (!errstr) {
        goto error;
    }

    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
    if (!tmp) {
        goto error;
    }

    tuple = PyTuple_Pack(2, errstr, tmp);
    Py_DECREF(tmp);
    if (!value) {
        goto error;
    }
    PyErr_SetObject(PyExc_SyntaxError, tuple);

error:
    Py_XDECREF(type);
    Py_XDECREF(value);
    Py_XDECREF(tback);
    Py_XDECREF(errstr);
    Py_XDECREF(tuple);
}

static inline void
raise_unclosed_parentheses_error(Parser *p) {
       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
       int error_col = p->tok->parencolstack[p->tok->level-1];
       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
                                  error_lineno, error_col, error_lineno, -1,
                                  "'%c' was never closed",
                                  p->tok->parenstack[p->tok->level-1]);
}

int
_Pypegen_tokenizer_error(Parser *p)
{
    if (PyErr_Occurred()) {
        return -1;
    }

    const char *msg = NULL;
    PyObject* errtype = PyExc_SyntaxError;
    Py_ssize_t col_offset = -1;
    switch (p->tok->done) {
        case E_TOKEN:
            msg = "invalid token";
            break;
        case E_EOF:
            if (p->tok->level) {
                raise_unclosed_parentheses_error(p);
            } else {
                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
            }
            return -1;
        case E_DEDENT:
            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
            return -1;
        case E_INTR:
            if (!PyErr_Occurred()) {
                PyErr_SetNone(PyExc_KeyboardInterrupt);
            }
            return -1;
        case E_NOMEM:
            PyErr_NoMemory();
            return -1;
        case E_TABSPACE:
            errtype = PyExc_TabError;
            msg = "inconsistent use of tabs and spaces in indentation";
            break;
        case E_TOODEEP:
            errtype = PyExc_IndentationError;
            msg = "too many levels of indentation";
            break;
        case E_LINECONT: {
            col_offset = p->tok->cur - p->tok->buf - 1;
            msg = "unexpected character after line continuation character";
            break;
        }
        default:
            msg = "unknown parsing error";
    }

    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
                               col_offset >= 0 ? col_offset7 : 02,
                               p->tok->lineno, -1, msg);
    return -1;
}

int
_Pypegen_raise_decode_error(Parser *p)
{
    assert(PyErr_Occurred());
    const char *errtype = NULL;
    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
        errtype = "unicode error";
    }
    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
        errtype = "value error";
    }
    if (errtype) {
        PyObject *type;
        PyObject *value;
        PyObject *tback;
        PyObject *errstr;
        PyErr_Fetch(&type, &value, &tback);
        errstr = PyObject_Str(value);
        if (errstr) {
            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
            Py_DECREF(errstr);
        }
        else {
            PyErr_Clear();
            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
        }
        Py_XDECREF(type);
        Py_XDECREF(value);
        Py_XDECREF(tback);
    }

    return -1;
}

static int
_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
    // Tokenize the whole input to see if there are any tokenization
    // errors such as mistmatching parentheses. These will get priority
    // over generic syntax errors only if the line number of the error is
    // before the one that we had for the generic error.

    // We don't want to tokenize to the end for interactive input
    if (p->tok->prompt != NULL) {
        return 0;
    }

    PyObject *type, *value, *traceback;
    PyErr_Fetch(&type, &value, &traceback);

    Token *current_token = p->known_err_token != NULL ? p->known_err_token0 : p->tokens[p->fill - 1];
    Py_ssize_t current_err_line = current_token->lineno;

    int ret = 0;

    for (;;) {
        const char *start;
        const char *end;
        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
            case ERRORTOKEN:
                if (p->tok->level != 0) {
                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
                    if (current_err_line > error_lineno) {
                        raise_unclosed_parentheses_error(p);
                        ret = -1;
                        goto exit;
                    }
                }
                break;
            case ENDMARKER:
                break;
            default:
                continue;
        }
        break;
    }


exit:
    if (PyErr_Occurred()) {
        Py_XDECREF(value);
        Py_XDECREF(type);
        Py_XDECREF(traceback);
    } else {
        PyErr_Restore(type, value, traceback);
    }
    return ret;
}

// PARSER ERRORS

void *
_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
{
    if (p->fill == 0) {
        va_list va;
        va_start(va, errmsg);
        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
        va_end(va);
        return NULL;
    }

    Token *t = p->known_err_token != NULL ? p->known_err_token4 : p->tokens[p->fill - 1]630;
    Py_ssize_t col_offset;
    Py_ssize_t end_col_offset = -1;
    if (t->col_offset == -1) {
        if (p->tok->cur == p->tok->buf) {
            col_offset = 0;
        } else {
            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf0;
            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
        }
    } else {
        col_offset = t->col_offset + 1;
    }

    if (t->end_col_offset != -1) {
        end_col_offset = t->end_col_offset + 1;
    }

    va_list va;
    va_start(va, errmsg);
    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
    va_end(va);

    return NULL;
}

static PyObject *
get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
{
    /* If the file descriptor is interactive, the source lines of the current
     * (multi-line) statement are stored in p->tok->interactive_src_start.
     * If not, we're parsing from a string, which means that the whole source
     * is stored in p->tok->str. */
    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);

    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start0 : p->tok->str;
    if (cur_line == NULL) {
        assert(p->tok->fp_interactive);
        // We can reach this point if the tokenizer buffers for interactive source have not been
        // initialized because we failed to decode the original source with the given locale.
        return PyUnicode_FromStringAndSize("", 0);
    }

    Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 11 : lineno24;
    const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end0 : p->tok->inp;

    for (int i = 0; i < relative_lineno - 1; i++16) {
        char *new_line = strchr(cur_line, '\n');
        // The assert is here for debug builds but the conditional that
        // follows is there so in release builds we do not crash at the cost
        // to report a potentially wrong line.
        assert(new_line != NULL && new_line + 1 < buf_end);
        if (new_line == NULL || new_line + 1 > buf_end) {
            break;
        }
        cur_line = new_line + 1;
    }

    char *next_newline;
    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
        next_newline = cur_line + strlen(cur_line);
    }
    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
}

void *
_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                    Py_ssize_t lineno, Py_ssize_t col_offset,
                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
                                    const char *errmsg, va_list va)
{
    PyObject *value = NULL;
    PyObject *errstr = NULL;
    PyObject *error_line = NULL;
    PyObject *tmp = NULL;
    p->error_indicator = 1;

    if (end_lineno == CURRENT_POS) {
        end_lineno = p->tok->lineno;
    }
    if (end_col_offset == CURRENT_POS) {
        end_col_offset = p->tok->cur - p->tok->line_start;
    }

    if (p->start_rule == Py_fstring_input) {
        const char *fstring_msg = "f-string: ";
        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);

        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
        if (!new_errmsg) {
            return (void *) PyErr_NoMemory();
        }

        // Copy both strings into new buffer
        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
        new_errmsg[len] = 0;
        errmsg = new_errmsg;
    }
    errstr = PyUnicode_FromFormatV(errmsg, va);
    if (!errstr) {
        goto error;
    }

    if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL0) {
        error_line = get_error_line_from_tokenizer_buffers(p, lineno);
    }
    else if (p->start_rule == Py_file_input) {
        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
                                                     (int) lineno, p->tok->encoding);
    }

    if (!error_line) {
        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
           then we need to find the error line from some other source, because
           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
           failed or we're parsing from a string or the REPL. There's a third edge case where
           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
           does not physically exist */
        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);

        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf1.61k) {
            Py_ssize_t size = p->tok->inp - p->tok->buf;
            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
        }
        else if (p->tok->fp == NULL || p->tok->fp == stdin0) {
            error_line = get_error_line_from_tokenizer_buffers(p, lineno);
        }
        else {
            error_line = PyUnicode_FromStringAndSize("", 0);
        }
        if (!error_line) {
            goto error;
        }
    }

    if (p->start_rule == Py_fstring_input) {
        col_offset -= p->starting_col_offset;
        end_col_offset -= p->starting_col_offset;
    }

    Py_ssize_t col_number = col_offset;
    Py_ssize_t end_col_number = end_col_offset;

    if (p->tok->encoding != NULL) {
        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
        if (col_number < 0) {
            goto error;
        }
        if (end_col_number > 0) {
            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
            if (end_col_offset < 0) {
                goto error;
            } else {
                end_col_number = end_col_offset;
            }
        }
    }
    tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
    if (!tmp) {
        goto error;
    }
    value = PyTuple_Pack(2, errstr, tmp);
    Py_DECREF(tmp);
    if (!value) {
        goto error;
    }
    PyErr_SetObject(errtype, value);

    Py_DECREF(errstr);
    Py_DECREF(value);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;

error:
    Py_XDECREF(errstr);
    Py_XDECREF(error_line);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;
}

void
_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
    // Existing sintax error
    if (PyErr_Occurred()) {
        // Prioritize tokenizer errors to custom syntax errors raised
        // on the second phase only if the errors come from the parser.
        int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK);
        if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)469) {
            _PyPegen_tokenize_full_source_to_check_for_errors(p);
        }
        // Propagate the existing syntax error.
        return;
    }
    // Initialization error
    if (p->fill == 0) {
        RAISE_SYNTAX_ERROR("error at start before reading any input");
    }
    // Parser encountered EOF (End of File) unexpectedtly
    if (last_token->type == ERRORTOKEN && p->tok->done == 0E_EOF0) {
        if (p->tok->level) {
            raise_unclosed_parentheses_error(p);
        } else {
            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
        }
        return;
    }
    // Indentation error in the tokenizer
    if (last_token->type == INDENT || last_token->type == 485DEDENT485) {
        RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
        return;
    }
    // Unknown error (generic case)

    // Use the last token we found on the first pass to avoid reporting
    // incorrect locations for generic syntax errors just because we reached
    // further away when trying to find specific syntax errors in the second
    // pass.
    RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
    // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
    // generic SyntaxError we just raised if errors are found.
    _PyPegen_tokenize_full_source_to_check_for_errors(p);
}

Coverage Report

Created: 2022-07-08 09:39

Line	Count	Source (jump to first uncovered line)
1	#include <Python.h>
2	#include <errcode.h>
3
4	#include "tokenizer.h"
5	#include "pegen.h"
6
7	// TOKENIZER ERRORS
8
9	void
10	_PyPegen_raise_tokenizer_init_error(PyObject *filename)
11	{
12	if (!(PyErr_ExceptionMatches(PyExc_LookupError) Branch (12:11): [True: 5, False: 21]
13	\|\| PyErr_ExceptionMatches(PyExc_SyntaxError)21 Branch (13:14): [True: 20, False: 1]
14	\|\| PyErr_ExceptionMatches(PyExc_ValueError)1 Branch (14:14): [True: 1, False: 0]
15	\|\| PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)0 )) { Branch (15:14): [True: 0, False: 0]
16	return;
17	}
18	PyObject *errstr = NULL;
19	PyObject *tuple = NULL;
20	PyObject *type;
21	PyObject *value;
22	PyObject *tback;
23	PyErr_Fetch(&type, &value, &tback);
24	errstr = PyObject_Str(value);
25	if (!errstr) { Branch (25:9): [True: 0, False: 26]
26	goto error;
27	}
28
29	PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
30	if (!tmp) { Branch (30:9): [True: 0, False: 26]
31	goto error;
32	}
33
34	tuple = PyTuple_Pack(2, errstr, tmp);
35	Py_DECREF(tmp);
36	if (!value) { Branch (36:9): [True: 0, False: 26]
37	goto error;
38	}
39	PyErr_SetObject(PyExc_SyntaxError, tuple);
40
41	error:
42	Py_XDECREF(type);
43	Py_XDECREF(value);
44	Py_XDECREF(tback);
45	Py_XDECREF(errstr);
46	Py_XDECREF(tuple);
47	}
48
49	static inline void
50	raise_unclosed_parentheses_error(Parser *p) {
51	int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
52	int error_col = p->tok->parencolstack[p->tok->level-1];
53	RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
54	error_lineno, error_col, error_lineno, -1,
55	"'%c' was never closed",
56	p->tok->parenstack[p->tok->level-1]);
57	}
58
59	int
60	_Pypegen_tokenizer_error(Parser *p)
61	{
62	if (PyErr_Occurred()) { Branch (62:9): [True: 198, False: 178]
63	return -1;
64	}
65
66	const char *msg = NULL;
67	PyObject* errtype = PyExc_SyntaxError;
68	Py_ssize_t col_offset = -1;
69	switch (p->tok->done) {
70	case E_TOKEN: Branch (70:9): [True: 0, False: 178]
71	msg = "invalid token";
72	break;
73	case E_EOF: Branch (73:9): [True: 163, False: 15]
74	if (p->tok->level) { Branch (74:17): [True: 155, False: 8]
75	raise_unclosed_parentheses_error(p);
76	} else {
77	RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
78	}
79	return -1;
80	case E_DEDENT: Branch (80:9): [True: 6, False: 172]
81	RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
82	return -1;
83	case E_INTR: Branch (83:9): [True: 0, False: 178]
84	if (!PyErr_Occurred()) { Branch (84:17): [True: 0, False: 0]
85	PyErr_SetNone(PyExc_KeyboardInterrupt);
86	}
87	return -1;
88	case E_NOMEM: Branch (88:9): [True: 0, False: 178]
89	PyErr_NoMemory();
90	return -1;
91	case E_TABSPACE: Branch (91:9): [True: 2, False: 176]
92	errtype = PyExc_TabError;
93	msg = "inconsistent use of tabs and spaces in indentation";
94	break;
95	case E_TOODEEP: Branch (95:9): [True: 0, False: 178]
96	errtype = PyExc_IndentationError;
97	msg = "too many levels of indentation";
98	break;
99	case E_LINECONT: { Branch (99:9): [True: 7, False: 171]
100	col_offset = p->tok->cur - p->tok->buf - 1;
101	msg = "unexpected character after line continuation character";
102	break;
103	}
104	default: Branch (104:9): [True: 0, False: 178]
105	msg = "unknown parsing error";
106	}
107
108	RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
109	col_offset >= 0 ? col_offset7 : 02 , Branch (109:32): [True: 7, False: 2]
110	p->tok->lineno, -1, msg);
111	return -1;
112	}
113
114	int
115	_Pypegen_raise_decode_error(Parser *p)
116	{
117	assert(PyErr_Occurred());
118	const char *errtype = NULL;
119	if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { Branch (119:9): [True: 40, False: 265]
120	errtype = "unicode error";
121	}
122	else if (PyErr_ExceptionMatches(PyExc_ValueError)) { Branch (122:14): [True: 2, False: 263]
123	errtype = "value error";
124	}
125	if (errtype) { Branch (125:9): [True: 42, False: 263]
126	PyObject *type;
127	PyObject *value;
128	PyObject *tback;
129	PyObject *errstr;
130	PyErr_Fetch(&type, &value, &tback);
131	errstr = PyObject_Str(value);
132	if (errstr) { Branch (132:13): [True: 42, False: 0]
133	RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
134	Py_DECREF(errstr);
135	}
136	else {
137	PyErr_Clear();
138	RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
139	}
140	Py_XDECREF(type);
141	Py_XDECREF(value);
142	Py_XDECREF(tback);
143	}
144
145	return -1;
146	}
147
148	static int
149	_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
150	// Tokenize the whole input to see if there are any tokenization
151	// errors such as mistmatching parentheses. These will get priority
152	// over generic syntax errors only if the line number of the error is
153	// before the one that we had for the generic error.
154
155	// We don't want to tokenize to the end for interactive input
156	if (p->tok->prompt != NULL) { Branch (156:9): [True: 0, False: 954]
157	return 0;
158	}
159
160	PyObject type, value, *traceback;
161	PyErr_Fetch(&type, &value, &traceback);
162
163	Token *current_token = p->known_err_token != NULL ? p->known_err_token0 : p->tokens[p->fill - 1]; Branch (163:28): [True: 0, False: 954]
164	Py_ssize_t current_err_line = current_token->lineno;
165
166	int ret = 0;
167
168	for (;;) {
169	const char *start;
170	const char *end;
171	switch (_PyTokenizer_Get(p->tok, &start, &end)) {
172	case ERRORTOKEN: Branch (172:13): [True: 31, False: 4.05k]
173	if (p->tok->level != 0) { Branch (173:21): [True: 31, False: 0]
174	int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
175	if (current_err_line > error_lineno) { Branch (175:25): [True: 3, False: 28]
176	raise_unclosed_parentheses_error(p);
177	ret = -1;
178	goto exit;
179	}
180	}
181	break;
182	case ENDMARKER: Branch (182:13): [True: 923, False: 3.16k]
183	break;
184	default: Branch (184:13): [True: 3.13k, False: 954]
185	continue;
186	}
187	break;
188	}
189
190
191	exit:
192	if (PyErr_Occurred()) { Branch (192:9): [True: 3, False: 951]
193	Py_XDECREF(value);
194	Py_XDECREF(type);
195	Py_XDECREF(traceback);
196	} else {
197	PyErr_Restore(type, value, traceback);
198	}
199	return ret;
200	}
201
202	// PARSER ERRORS
203
204	void *
205	_PyPegen_raise_error(Parser p, PyObject errtype, const char *errmsg, ...)
206	{
207	if (p->fill == 0) { Branch (207:9): [True: 0, False: 634]
208	va_list va;
209	va_start(va, errmsg);
210	_PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
211	va_end(va);
212	return NULL;
213	}
214
215	Token *t = p->known_err_token != NULL ? p->known_err_token4 : p->tokens[p->fill - 1]630 ; Branch (215:16): [True: 4, False: 630]
216	Py_ssize_t col_offset;
217	Py_ssize_t end_col_offset = -1;
218	if (t->col_offset == -1) { Branch (218:9): [True: 538, False: 96]
219	if (p->tok->cur == p->tok->buf) { Branch (219:13): [True: 3, False: 535]
220	col_offset = 0;
221	} else {
222	const char* start = p->tok->buf ? p->tok->line_start : p->tok->buf0 ; Branch (222:33): [True: 535, False: 0]
223	col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
224	}
225	} else {
226	col_offset = t->col_offset + 1;
227	}
228
229	if (t->end_col_offset != -1) { Branch (229:9): [True: 96, False: 538]
230	end_col_offset = t->end_col_offset + 1;
231	}
232
233	va_list va;
234	va_start(va, errmsg);
235	_PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
236	va_end(va);
237
238	return NULL;
239	}
240
241	static PyObject *
242	get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
243	{
244	/* If the file descriptor is interactive, the source lines of the current
245	* (multi-line) statement are stored in p->tok->interactive_src_start.
246	* If not, we're parsing from a string, which means that the whole source
247	* is stored in p->tok->str. */
248	assert((p->tok->fp == NULL && p->tok->str != NULL) \|\| p->tok->fp == stdin);
249
250	char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start0 : p->tok->str; Branch (250:22): [True: 0, False: 25]
251	if (cur_line == NULL) { Branch (251:9): [True: 0, False: 25]
252	assert(p->tok->fp_interactive);
253	// We can reach this point if the tokenizer buffers for interactive source have not been
254	// initialized because we failed to decode the original source with the given locale.
255	return PyUnicode_FromStringAndSize("", 0);
256	}
257
258	Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 11 : lineno24 ; Branch (258:34): [True: 1, False: 24]
259	const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end0 : p->tok->inp; Branch (259:27): [True: 0, False: 25]
260
261	for (int i = 0; i < relative_lineno - 1; i++16 ) { Branch (261:21): [True: 16, False: 25]
262	char *new_line = strchr(cur_line, '\n');
263	// The assert is here for debug builds but the conditional that
264	// follows is there so in release builds we do not crash at the cost
265	// to report a potentially wrong line.
266	assert(new_line != NULL && new_line + 1 < buf_end);
267	if (new_line == NULL \|\| new_line + 1 > buf_end) { Branch (267:13): [True: 0, False: 16] Branch (267:33): [True: 0, False: 16]
268	break;
269	}
270	cur_line = new_line + 1;
271	}
272
273	char *next_newline;
274	if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line Branch (274:9): [True: 2, False: 23]
275	next_newline = cur_line + strlen(cur_line);
276	}
277	return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
278	}
279
280	void *
281	_PyPegen_raise_error_known_location(Parser p, PyObject errtype,
282	Py_ssize_t lineno, Py_ssize_t col_offset,
283	Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
284	const char *errmsg, va_list va)
285	{
286	PyObject *value = NULL;
287	PyObject *errstr = NULL;
288	PyObject *error_line = NULL;
289	PyObject *tmp = NULL;
290	p->error_indicator = 1;
291
292	if (end_lineno == CURRENT_POS) { Branch (292:9): [True: 11, False: 1.64k]
293	end_lineno = p->tok->lineno;
294	}
295	if (end_col_offset == CURRENT_POS) { Branch (295:9): [True: 11, False: 1.64k]
296	end_col_offset = p->tok->cur - p->tok->line_start;
297	}
298
299	if (p->start_rule == Py_fstring_input) { Branch (299:9): [True: 11, False: 1.64k]
300	const char *fstring_msg = "f-string: ";
301	Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
302
303	char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
304	if (!new_errmsg) { Branch (304:13): [True: 0, False: 11]
305	return (void *) PyErr_NoMemory();
306	}
307
308	// Copy both strings into new buffer
309	memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
310	memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
311	new_errmsg[len] = 0;
312	errmsg = new_errmsg;
313	}
314	errstr = PyUnicode_FromFormatV(errmsg, va);
315	if (!errstr) { Branch (315:9): [True: 0, False: 1.65k]
316	goto error;
317	}
318
319	if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL0 ) { Branch (319:9): [True: 0, False: 1.65k] Branch (319:35): [True: 0, False: 0]
320	error_line = get_error_line_from_tokenizer_buffers(p, lineno);
321	}
322	else if (p->start_rule == Py_file_input) { Branch (322:14): [True: 555, False: 1.10k]
323	error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
324	(int) lineno, p->tok->encoding);
325	}
326
327	if (!error_line) { Branch (327:9): [True: 1.63k, False: 24]
328	/* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
329	then we need to find the error line from some other source, because
330	p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
331	failed or we're parsing from a string or the REPL. There's a third edge case where
332	we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
333	`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
334	does not physically exist */
335	assert(p->tok->fp == NULL \|\| p->tok->fp == stdin \|\| p->tok->done == E_EOF);
336
337	if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf1.61k ) { Branch (337:13): [True: 1.61k, False: 23] Branch (337:41): [True: 1.60k, False: 2]
338	Py_ssize_t size = p->tok->inp - p->tok->buf;
339	error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
340	}
341	else if (p->tok->fp == NULL \|\| p->tok->fp == stdin0 ) { Branch (341:18): [True: 25, False: 0] Branch (341:40): [True: 0, False: 0]
342	error_line = get_error_line_from_tokenizer_buffers(p, lineno);
343	}
344	else {
345	error_line = PyUnicode_FromStringAndSize("", 0);
346	}
347	if (!error_line) { Branch (347:13): [True: 0, False: 1.63k]
348	goto error;
349	}
350	}
351
352	if (p->start_rule == Py_fstring_input) { Branch (352:9): [True: 11, False: 1.64k]
353	col_offset -= p->starting_col_offset;
354	end_col_offset -= p->starting_col_offset;
355	}
356
357	Py_ssize_t col_number = col_offset;
358	Py_ssize_t end_col_number = end_col_offset;
359
360	if (p->tok->encoding != NULL) { Branch (360:9): [True: 1.61k, False: 41]
361	col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
362	if (col_number < 0) { Branch (362:13): [True: 0, False: 1.61k]
363	goto error;
364	}
365	if (end_col_number > 0) { Branch (365:13): [True: 922, False: 695]
366	Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
367	if (end_col_offset < 0) { Branch (367:17): [True: 0, False: 922]
368	goto error;
369	} else {
370	end_col_number = end_col_offset;
371	}
372	}
373	}
374	tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
375	if (!tmp) { Branch (375:9): [True: 0, False: 1.65k]
376	goto error;
377	}
378	value = PyTuple_Pack(2, errstr, tmp);
379	Py_DECREF(tmp);
380	if (!value) { Branch (380:9): [True: 0, False: 1.65k]
381	goto error;
382	}
383	PyErr_SetObject(errtype, value);
384
385	Py_DECREF(errstr);
386	Py_DECREF(value);
387	if (p->start_rule == Py_fstring_input) { Branch (387:9): [True: 11, False: 1.64k]
388	PyMem_Free((void *)errmsg);
389	}
390	return NULL;
391
392	error:
393	Py_XDECREF(errstr);
394	Py_XDECREF(error_line);
395	if (p->start_rule == Py_fstring_input) { Branch (395:9): [True: 0, False: 0]
396	PyMem_Free((void *)errmsg);
397	}
398	return NULL;
399	}
400
401	void
402	_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
403	// Existing sintax error
404	if (PyErr_Occurred()) { Branch (404:9): [True: 1.00k, False: 493]
405	// Prioritize tokenizer errors to custom syntax errors raised
406	// on the second phase only if the errors come from the parser.
407	int is_tok_ok = (p->tok->done == E_DONE \|\| p->tok->done == E_OK); Branch (407:26): [True: 0, False: 1.00k] Branch (407:52): [True: 469, False: 537]
408	if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)469 ) { Branch (408:13): [True: 469, False: 537] Branch (408:26): [True: 469, False: 0]
409	_PyPegen_tokenize_full_source_to_check_for_errors(p);
410	}
411	// Propagate the existing syntax error.
412	return;
413	}
414	// Initialization error
415	if (p->fill == 0) { Branch (415:9): [True: 0, False: 493]
416	RAISE_SYNTAX_ERROR("error at start before reading any input");
417	}
418	// Parser encountered EOF (End of File) unexpectedtly
419	if (last_token->type == ERRORTOKEN && p->tok->done == 0 E_EOF0 ) { Branch (419:9): [True: 0, False: 493] Branch (419:43): [True: 0, False: 0]
420	if (p->tok->level) { Branch (420:13): [True: 0, False: 0]
421	raise_unclosed_parentheses_error(p);
422	} else {
423	RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
424	}
425	return;
426	}
427	// Indentation error in the tokenizer
428	if (last_token->type == INDENT \|\| last_token->type == 485 DEDENT485 ) { Branch (428:9): [True: 8, False: 485] Branch (428:39): [True: 0, False: 485]
429	RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
430	return;
431	}
432	// Unknown error (generic case)
433
434	// Use the last token we found on the first pass to avoid reporting
435	// incorrect locations for generic syntax errors just because we reached
436	// further away when trying to find specific syntax errors in the second
437	// pass.
438	RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
439	// _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
440	// generic SyntaxError we just raised if errors are found.
441	_PyPegen_tokenize_full_source_to_check_for_errors(p);
442	}