Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Parser/string_parser.c
Line
Count
Source (jump to first uncovered line)
1
#include <stdbool.h>
2
3
#include <Python.h>
4
5
#include "tokenizer.h"
6
#include "pegen.h"
7
#include "string_parser.h"
8
9
//// STRING HANDLING FUNCTIONS ////
10
11
static int
12
warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
13
{
14
    unsigned char c = *first_invalid_escape;
15
    int octal = ('4' <= c && 
c <= '7'645
);
  Branch (15:18): [True: 645, False: 86]
  Branch (15:30): [True: 516, False: 129]
16
    PyObject *msg =
17
        octal
  Branch (17:9): [True: 516, False: 215]
18
        ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
19
                               first_invalid_escape)
20
        : 
PyUnicode_FromFormat("invalid escape sequence '\\%c'", c)215
;
21
    if (msg == NULL) {
  Branch (21:9): [True: 0, False: 731]
22
        return -1;
23
    }
24
    if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
  Branch (24:9): [True: 4, False: 727]
25
                                 t->lineno, NULL, NULL) < 0) {
26
        if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
  Branch (26:13): [True: 4, False: 0]
27
            /* Replace the DeprecationWarning exception with a SyntaxError
28
               to get a more accurate error report */
29
            PyErr_Clear();
30
31
            /* This is needed, in order for the SyntaxError to point to the token t,
32
               since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
33
               error location, if p->known_err_token is not set. */
34
            p->known_err_token = t;
35
            if (octal) {
  Branch (35:17): [True: 2, False: 2]
36
                RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
37
                                   first_invalid_escape);
38
            }
39
            else {
40
                RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
41
            }
42
        }
43
        Py_DECREF(msg);
44
        return -1;
45
    }
46
    Py_DECREF(msg);
47
    return 0;
48
}
49
50
static PyObject *
51
decode_utf8(const char **sPtr, const char *end)
52
{
53
    const char *s;
54
    const char *t;
55
    t = s = *sPtr;
56
    while (s < end && 
(*s & 0x80)4.87k
) {
  Branch (56:12): [True: 4.87k, False: 23]
  Branch (56:23): [True: 3.30k, False: 1.56k]
57
        s++;
58
    }
59
    *sPtr = s;
60
    return PyUnicode_DecodeUTF8(t, s - t, NULL);
61
}
62
63
static PyObject *
64
decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
65
{
66
    PyObject *v;
67
    PyObject *u;
68
    char *buf;
69
    char *p;
70
    const char *end;
71
72
    /* check for integer overflow */
73
    if (len > SIZE_MAX / 6) {
  Branch (73:9): [True: 0, False: 86.2k]
74
        return NULL;
75
    }
76
    /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
77
       "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
78
    u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
79
    if (u == NULL) {
  Branch (79:9): [True: 0, False: 86.2k]
80
        return NULL;
81
    }
82
    p = buf = PyBytes_AsString(u);
83
    if (p == NULL) {
  Branch (83:9): [True: 0, False: 86.2k]
84
        return NULL;
85
    }
86
    end = s + len;
87
    while (s < end) {
  Branch (87:12): [True: 396k, False: 86.2k]
88
        if (*s == '\\') {
  Branch (88:13): [True: 16.3k, False: 379k]
89
            *p++ = *s++;
90
            if (s >= end || 
*s & 0x8016.3k
) {
  Branch (90:17): [True: 1, False: 16.3k]
  Branch (90:29): [True: 0, False: 16.3k]
91
                strcpy(p, "u005c");
92
                p += 5;
93
                if (s >= end) {
  Branch (93:21): [True: 1, False: 0]
94
                    break;
95
                }
96
            }
97
        }
98
        if (*s & 0x80) {
  Branch (98:13): [True: 1.59k, False: 394k]
99
            PyObject *w;
100
            int kind;
101
            const void *data;
102
            Py_ssize_t w_len;
103
            Py_ssize_t i;
104
            w = decode_utf8(&s, end);
105
            if (w == NULL) {
  Branch (105:17): [True: 0, False: 1.59k]
106
                Py_DECREF(u);
107
                return NULL;
108
            }
109
            kind = PyUnicode_KIND(w);
110
            data = PyUnicode_DATA(w);
111
            w_len = PyUnicode_GET_LENGTH(w);
112
            for (i = 0; i < w_len; 
i++1.63k
) {
  Branch (112:25): [True: 1.63k, False: 1.59k]
113
                Py_UCS4 chr = PyUnicode_READ(kind, data, i);
114
                sprintf(p, "\\U%08x", chr);
115
                p += 10;
116
            }
117
            /* Should be impossible to overflow */
118
            assert(p - buf <= PyBytes_GET_SIZE(u));
119
            Py_DECREF(w);
120
        }
121
        else {
122
            *p++ = *s++;
123
        }
124
    }
125
    len = p - buf;
126
    s = buf;
127
128
    const char *first_invalid_escape;
129
    v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
130
131
    if (v != NULL && 
first_invalid_escape != NULL86.1k
) {
  Branch (131:9): [True: 86.1k, False: 33]
  Branch (131:22): [True: 364, False: 85.8k]
132
        if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
  Branch (132:13): [True: 2, False: 362]
133
            /* We have not decref u before because first_invalid_escape points
134
               inside u. */
135
            Py_XDECREF(u);
136
            Py_DECREF(v);
137
            return NULL;
138
        }
139
    }
140
    Py_XDECREF(u);
141
    return v;
142
}
143
144
static PyObject *
145
decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
146
{
147
    const char *first_invalid_escape;
148
    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
149
    if (result == NULL) {
  Branch (149:9): [True: 2, False: 2.69k]
150
        return NULL;
151
    }
152
153
    if (first_invalid_escape != NULL) {
  Branch (153:9): [True: 366, False: 2.32k]
154
        if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
  Branch (154:13): [True: 2, False: 364]
155
            Py_DECREF(result);
156
            return NULL;
157
        }
158
    }
159
    return result;
160
}
161
162
/* s must include the bracketing quote characters, and r, b, u,
163
   &/or f prefixes (if any), and embedded escape sequences (if any).
164
   _PyPegen_parsestr parses it, and sets *result to decoded Python string object.
165
   If the string is an f-string, set *fstr and *fstrlen to the unparsed
166
   string object.  Return 0 if no errors occurred.  */
167
int
168
_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
169
                  const char **fstr, Py_ssize_t *fstrlen, Token *t)
170
{
171
    const char *s = PyBytes_AsString(t->bytes);
172
    if (s == NULL) {
  Branch (172:9): [True: 0, False: 205k]
173
        return -1;
174
    }
175
176
    size_t len;
177
    int quote = Py_CHARMASK(*s);
178
    int fmode = 0;
179
    *bytesmode = 0;
180
    *rawmode = 0;
181
    *result = NULL;
182
    *fstr = NULL;
183
    if (Py_ISALPHA(quote)) {
184
        while (!*bytesmode || 
!*rawmode4.43k
) {
  Branch (184:16): [True: 15.0k, False: 4.43k]
  Branch (184:31): [True: 4.38k, False: 51]
185
            if (quote == 'b' || 
quote == 'B'15.0k
) {
  Branch (185:17): [True: 4.39k, False: 15.0k]
  Branch (185:33): [True: 0, False: 15.0k]
186
                quote =(unsigned char)*++s;
187
                *bytesmode = 1;
188
            }
189
            else if (quote == 'u' || 
quote == 'U'14.9k
) {
  Branch (189:22): [True: 27, False: 14.9k]
  Branch (189:38): [True: 1, False: 14.9k]
190
                quote = (unsigned char)*++s;
191
            }
192
            else if (quote == 'r' || 
quote == 'R'14.0k
) {
  Branch (192:22): [True: 910, False: 14.0k]
  Branch (192:38): [True: 0, False: 14.0k]
193
                quote = (unsigned char)*++s;
194
                *rawmode = 1;
195
            }
196
            else if (quote == 'f' || 
quote == 'F'9.64k
) {
  Branch (196:22): [True: 4.43k, False: 9.64k]
  Branch (196:38): [True: 0, False: 9.64k]
197
                quote = (unsigned char)*++s;
198
                fmode = 1;
199
            }
200
            else {
201
                break;
202
            }
203
        }
204
    }
205
206
    /* fstrings are only allowed in Python 3.6 and greater */
207
    if (fmode && 
p->feature_version < 64.43k
) {
  Branch (207:9): [True: 4.43k, False: 201k]
  Branch (207:18): [True: 2, False: 4.43k]
208
        p->error_indicator = 1;
209
        RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
210
        return -1;
211
    }
212
213
    if (fmode && 
*bytesmode4.43k
) {
  Branch (213:9): [True: 4.43k, False: 201k]
  Branch (213:18): [True: 0, False: 4.43k]
214
        PyErr_BadInternalCall();
215
        return -1;
216
    }
217
    if (quote != '\'' && 
quote != '\"'98.6k
) {
  Branch (217:9): [True: 98.6k, False: 107k]
  Branch (217:26): [True: 0, False: 98.6k]
218
        PyErr_BadInternalCall();
219
        return -1;
220
    }
221
    /* Skip the leading quote char. */
222
    s++;
223
    len = strlen(s);
224
    if (len > INT_MAX) {
  Branch (224:9): [True: 0, False: 205k]
225
        PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
226
        return -1;
227
    }
228
    if (s[--len] != quote) {
  Branch (228:9): [True: 0, False: 205k]
229
        /* Last quote char must match the first. */
230
        PyErr_BadInternalCall();
231
        return -1;
232
    }
233
    if (len >= 4 && 
s[0] == quote111k
&&
s[1] == quote8.36k
) {
  Branch (233:9): [True: 111k, False: 94.7k]
  Branch (233:21): [True: 8.36k, False: 102k]
  Branch (233:38): [True: 8.36k, False: 0]
234
        /* A triple quoted string. We've already skipped one quote at
235
           the start and one at the end of the string. Now skip the
236
           two at the start. */
237
        s += 2;
238
        len -= 2;
239
        /* And check that the last two match. */
240
        if (s[--len] != quote || s[--len] != quote) {
  Branch (240:13): [True: 0, False: 8.36k]
  Branch (240:34): [True: 0, False: 8.36k]
241
            PyErr_BadInternalCall();
242
            return -1;
243
        }
244
    }
245
246
    if (fmode) {
  Branch (246:9): [True: 4.43k, False: 201k]
247
        /* Just return the bytes. The caller will parse the resulting
248
           string. */
249
        *fstr = s;
250
        *fstrlen = len;
251
        return 0;
252
    }
253
254
    /* Not an f-string. */
255
    /* Avoid invoking escape decoding routines if possible. */
256
    *rawmode = *rawmode || 
strchr(s, '\\') == NULL200k
;
  Branch (256:16): [True: 892, False: 200k]
  Branch (256:28): [True: 184k, False: 16.1k]
257
    if (*bytesmode) {
  Branch (257:9): [True: 4.39k, False: 196k]
258
        /* Disallow non-ASCII characters. */
259
        const char *ch;
260
        for (ch = s; *ch; 
ch++53.1k
) {
  Branch (260:22): [True: 53.2k, False: 4.25k]
261
            if (Py_CHARMASK(*ch) >= 0x80) {
  Branch (261:17): [True: 138, False: 53.1k]
262
                RAISE_SYNTAX_ERROR(
263
                                   "bytes can only contain ASCII "
264
                                   "literal characters");
265
                return -1;
266
            }
267
        }
268
        if (*rawmode) {
  Branch (268:13): [True: 1.56k, False: 2.69k]
269
            *result = PyBytes_FromStringAndSize(s, len);
270
        }
271
        else {
272
            *result = decode_bytes_with_escapes(p, s, len, t);
273
        }
274
    }
275
    else {
276
        if (*rawmode) {
  Branch (276:13): [True: 183k, False: 13.4k]
277
            *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
278
        }
279
        else {
280
            *result = decode_unicode_with_escapes(p, s, len, t);
281
        }
282
    }
283
    return *result == NULL ? 
-137
:
0201k
;
  Branch (283:12): [True: 37, False: 201k]
284
}
285
286
287
288
// FSTRING STUFF
289
290
/* Fix locations for the given node and its children.
291
292
   `parent` is the enclosing node.
293
   `expr_start` is the starting position of the expression (pointing to the open brace).
294
   `n` is the node which locations are going to be fixed relative to parent.
295
   `expr_str` is the child node's string representation, including braces.
296
*/
297
static bool
298
fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
299
{
300
    *p_lines = 0;
301
    *p_cols = 0;
302
    assert(expr_start != NULL && *expr_start == '{');
303
    if (parent && parent->bytes) {
  Branch (303:9): [True: 73.0k, False: 0]
  Branch (303:19): [True: 73.0k, False: 0]
304
        const char *parent_str = PyBytes_AsString(parent->bytes);
305
        if (!parent_str) {
  Branch (305:13): [True: 0, False: 73.0k]
306
            return false;
307
        }
308
        // The following is needed, in order to correctly shift the column
309
        // offset, in the case that (disregarding any whitespace) a newline
310
        // immediately follows the opening curly brace of the fstring expression.
311
        bool newline_after_brace = 1;
312
        const char *start = expr_start + 1;
313
        while (start && *start != '}' && *start != '\n') {
  Branch (313:16): [True: 73.0k, False: 0]
  Branch (313:25): [True: 73.0k, False: 0]
  Branch (313:42): [True: 73.0k, False: 6]
314
            if (*start != ' ' && 
*start != '\t'73.0k
&&
*start != '\f'73.0k
) {
  Branch (314:17): [True: 73.0k, False: 22]
  Branch (314:34): [True: 73.0k, False: 0]
  Branch (314:52): [True: 73.0k, False: 0]
315
                newline_after_brace = 0;
316
                break;
317
            }
318
            start++;
319
        }
320
321
        // Account for the characters from the last newline character to our
322
        // left until the beginning of expr_start.
323
        if (!newline_after_brace) {
  Branch (323:13): [True: 73.0k, False: 6]
324
            start = expr_start;
325
            while (start > parent_str && 
*start != '\n'35.5M
) {
  Branch (325:20): [True: 35.5M, False: 72.9k]
  Branch (325:42): [True: 35.5M, False: 64]
326
                start--;
327
            }
328
            *p_cols += (int)(expr_start - start);
329
        }
330
        /* adjust the start based on the number of newlines encountered
331
           before the f-string expression */
332
        for (const char *p = parent_str; p < expr_start; 
p++35.5M
) {
  Branch (332:42): [True: 35.5M, False: 73.0k]
333
            if (*p == '\n') {
  Branch (333:17): [True: 140, False: 35.5M]
334
                (*p_lines)++;
335
            }
336
        }
337
    }
338
    return true;
339
}
340
341
342
/* Compile this expression in to an expr_ty.  Add parens around the
343
   expression, in order to allow leading spaces in the expression. */
344
static expr_ty
345
fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
346
                     Token *t)
347
{
348
    expr_ty expr = NULL;
349
    char *str;
350
    Py_ssize_t len;
351
    const char *s;
352
    expr_ty result = NULL;
353
354
    assert(expr_end >= expr_start);
355
    assert(*(expr_start-1) == '{');
356
    assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
357
           *expr_end == '=');
358
359
    /* If the substring is all whitespace, it's an error.  We need to catch this
360
       here, and not when we call PyParser_SimpleParseStringFlagsFilename,
361
       because turning the expression '' in to '()' would go from being invalid
362
       to valid. */
363
    for (s = expr_start; s != expr_end; 
s++101
) {
  Branch (363:26): [True: 73.1k, False: 28]
364
        char c = *s;
365
        /* The Python parser ignores only the following whitespace
366
           characters (\r already is converted to \n). */
367
        if (!(c == ' ' || 
c == '\t'73.0k
||
c == '\n'73.0k
||
c == '\f'73.0k
)) {
  Branch (367:15): [True: 83, False: 73.0k]
  Branch (367:27): [True: 4, False: 73.0k]
  Branch (367:40): [True: 10, False: 73.0k]
  Branch (367:53): [True: 4, False: 73.0k]
368
            break;
369
        }
370
    }
371
372
    if (s == expr_end) {
  Branch (372:9): [True: 28, False: 73.0k]
373
        if (*expr_end == '!' || 
*expr_end == ':'16
||
*expr_end == '='11
) {
  Branch (373:13): [True: 12, False: 16]
  Branch (373:33): [True: 5, False: 11]
  Branch (373:53): [True: 6, False: 5]
374
            RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
375
            return NULL;
376
        }
377
        RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
378
        return NULL;
379
    }
380
381
    len = expr_end - expr_start;
382
    /* Allocate 3 extra bytes: open paren, close paren, null byte. */
383
    str = PyMem_Calloc(len + 3, sizeof(char));
384
    if (str == NULL) {
  Branch (384:9): [True: 0, False: 73.0k]
385
        PyErr_NoMemory();
386
        return NULL;
387
    }
388
389
    // The call to fstring_find_expr_location is responsible for finding the column offset
390
    // the generated AST nodes need to be shifted to the right, which is equal to the number
391
    // of the f-string characters before the expression starts.
392
    memcpy(str+1, expr_start, len);
393
    int lines, cols;
394
    if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
  Branch (394:9): [True: 0, False: 73.0k]
395
        PyMem_Free(str);
396
        return NULL;
397
    }
398
399
    // The parentheses are needed in order to allow for leading whitespace within
400
    // the f-string expression. This consequently gets parsed as a group (see the
401
    // group rule in python.gram).
402
    str[0] = '(';
403
    str[len+1] = ')';
404
405
    struct tok_state* tok = _PyTokenizer_FromString(str, 1);
406
    if (tok == NULL) {
  Branch (406:9): [True: 0, False: 73.0k]
407
        PyMem_Free(str);
408
        return NULL;
409
    }
410
    Py_INCREF(p->tok->filename);
411
412
    tok->filename = p->tok->filename;
413
    tok->lineno = t->lineno + lines - 1;
414
415
    Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
416
                                     NULL, p->arena);
417
418
    p2->starting_lineno = t->lineno + lines;
419
    p2->starting_col_offset = t->col_offset + cols;
420
421
    expr = _PyPegen_run_parser(p2);
422
423
    if (expr == NULL) {
  Branch (423:9): [True: 15, False: 73.0k]
424
        goto exit;
425
    }
426
    result = expr;
427
428
exit:
429
    PyMem_Free(str);
430
    _PyPegen_Parser_Free(p2);
431
    _PyTokenizer_Free(tok);
432
    return result;
433
}
434
435
/* Return -1 on error.
436
437
   Return 0 if we reached the end of the literal.
438
439
   Return 1 if we haven't reached the end of the literal, but we want
440
   the caller to process the literal up to this point. Used for
441
   doubled braces.
442
*/
443
static int
444
fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
445
                     PyObject **literal, int recurse_lvl, Token *t)
446
{
447
    /* Get any literal string. It ends when we hit an un-doubled left
448
       brace (which isn't part of a unicode name escape such as
449
       "\N{EULER CONSTANT}"), or the end of the string. */
450
451
    const char *s = *str;
452
    const char *literal_start = s;
453
    int result = 0;
454
455
    assert(*literal == NULL);
456
    while (s < end) {
  Branch (456:12): [True: 199k, False: 4.31k]
457
        char ch = *s++;
458
        if (!raw && 
ch == '\\'198k
&&
s < end366
) {
  Branch (458:13): [True: 198k, False: 1.06k]
  Branch (458:21): [True: 366, False: 198k]
  Branch (458:35): [True: 366, False: 0]
459
            ch = *s++;
460
            if (ch == 'N') {
  Branch (460:17): [True: 16, False: 350]
461
                /* We need to look at and skip matching braces for "\N{name}"
462
                   sequences because otherwise we'll think the opening '{'
463
                   starts an expression, which is not the case with "\N".
464
                   Keep looking for either a matched '{' '}' pair, or the end
465
                   of the string. */
466
467
                if (s < end && 
*s++ == '{'15
) {
  Branch (467:21): [True: 15, False: 1]
  Branch (467:32): [True: 13, False: 2]
468
                    while (s < end && 
*s++ != '}'274
) {
  Branch (468:28): [True: 274, False: 2]
  Branch (468:39): [True: 263, False: 11]
469
                    }
470
                    continue;
471
                }
472
473
                /* This is an invalid "\N" sequence, since it's a "\N" not
474
                   followed by a "{".  Just keep parsing this literal.  This
475
                   error will be caught later by
476
                   decode_unicode_with_escapes(). */
477
                continue;
478
            }
479
            if (ch == '{' && 
warn_invalid_escape_sequence(p, s-1, t) < 01
) {
  Branch (479:17): [True: 1, False: 349]
  Branch (479:30): [True: 0, False: 1]
480
                return -1;
481
            }
482
        }
483
        if (ch == '{' || 
ch == '}'126k
) {
  Branch (483:13): [True: 73.2k, False: 126k]
  Branch (483:26): [True: 290, False: 125k]
484
            /* Check for doubled braces, but only at the top level. If
485
               we checked at every level, then f'{0:{3}}' would fail
486
               with the two closing braces. */
487
            if (recurse_lvl == 0) {
  Branch (487:17): [True: 73.2k, False: 281]
488
                if (s < end && 
*s == ch73.2k
) {
  Branch (488:21): [True: 73.2k, False: 10]
  Branch (488:32): [True: 155, False: 73.0k]
489
                    /* We're going to tell the caller that the literal ends
490
                       here, but that they should continue scanning. But also
491
                       skip over the second brace when we resume scanning. */
492
                    *str = s + 1;
493
                    result = 1;
494
                    goto done;
495
                }
496
497
                /* Where a single '{' is the start of a new expression, a
498
                   single '}' is not allowed. */
499
                if (ch == '}') {
  Branch (499:21): [True: 8, False: 73.0k]
500
                    *str = s - 1;
501
                    RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
502
                    return -1;
503
                }
504
            }
505
            /* We're either at a '{', which means we're starting another
506
               expression; or a '}', which means we're at the end of this
507
               f-string (for a nested format_spec). */
508
            s--;
509
            break;
510
        }
511
    }
512
    *str = s;
513
    assert(s <= end);
514
    assert(s == end || *s == '{' || *s == '}');
515
done:
516
    if (literal_start != s) {
  Branch (516:9): [True: 72.8k, False: 5.00k]
517
        if (raw) {
  Branch (517:13): [True: 35, False: 72.7k]
518
            *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
519
                                                    s - literal_start,
520
                                                    NULL, NULL);
521
        }
522
        else {
523
            *literal = decode_unicode_with_escapes(p, literal_start,
524
                                                   s - literal_start, t);
525
        }
526
        if (!*literal) {
  Branch (526:13): [True: 5, False: 72.7k]
527
            return -1;
528
        }
529
    }
530
    return result;
531
}
532
533
/* Forward declaration because parsing is recursive. */
534
static expr_ty
535
fstring_parse(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
536
              Token *first_token, Token* t, Token *last_token);
537
538
/* Parse the f-string at *str, ending at end.  We know *str starts an
539
   expression (so it must be a '{'). Returns the FormattedValue node, which
540
   includes the expression, conversion character, format_spec expression, and
541
   optionally the text of the expression (if = is used).
542
543
   Note that I don't do a perfect job here: I don't make sure that a
544
   closing brace doesn't match an opening paren, for example. It
545
   doesn't need to error on all invalid expressions, just correctly
546
   find the end of all valid ones. Any errors inside the expression
547
   will be caught when we parse it later.
548
549
   *expression is set to the expression.  For an '=' "debug" expression,
550
   *expr_text is set to the debug text (the original text of the expression,
551
   including the '=' and any whitespace around it, as a string object).  If
552
   not a debug expression, *expr_text set to NULL. */
553
static int
554
fstring_find_expr(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
555
                  PyObject **expr_text, expr_ty *expression, Token *first_token,
556
                  Token *t, Token *last_token)
557
{
558
    /* Return -1 on error, else 0. */
559
560
    const char *expr_start;
561
    const char *expr_end;
562
    expr_ty simple_expression;
563
    expr_ty format_spec = NULL; /* Optional format specifier. */
564
    int conversion = -1; /* The conversion char.  Use default if not
565
                            specified, or !r if using = and no format
566
                            spec. */
567
568
    /* 0 if we're not in a string, else the quote char we're trying to
569
       match (single or double quote). */
570
    char quote_char = 0;
571
572
    /* If we're inside a string, 1=normal, 3=triple-quoted. */
573
    int string_type = 0;
574
575
    /* Keep track of nesting level for braces/parens/brackets in
576
       expressions. */
577
    Py_ssize_t nested_depth = 0;
578
    char parenstack[MAXLEVEL];
579
580
    *expr_text = NULL;
581
582
    /* Can only nest one level deep. */
583
    if (recurse_lvl >= 2) {
  Branch (583:9): [True: 1, False: 73.1k]
584
        RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
585
        goto error;
586
    }
587
588
    /* The first char must be a left brace, or we wouldn't have gotten
589
       here. Skip over it. */
590
    assert(**str == '{');
591
    *str += 1;
592
593
    expr_start = *str;
594
    for (; *str < end; 
(*str)++102k
) {
  Branch (594:12): [True: 175k, False: 15]
595
        char ch;
596
597
        /* Loop invariants. */
598
        assert(nested_depth >= 0);
599
        assert(*str >= expr_start && *str < end);
600
        if (quote_char) {
  Branch (600:13): [True: 1.03k, False: 174k]
601
            assert(string_type == 1 || string_type == 3);
602
        } else {
603
            assert(string_type == 0);
604
        }
605
606
        ch = **str;
607
        /* Nowhere inside an expression is a backslash allowed. */
608
        if (ch == '\\') {
  Branch (608:13): [True: 8, False: 175k]
609
            /* Error: can't include a backslash character, inside
610
               parens or strings or not. */
611
            RAISE_SYNTAX_ERROR(
612
                      "f-string expression part "
613
                      "cannot include a backslash");
614
            goto error;
615
        }
616
        if (quote_char) {
  Branch (616:13): [True: 1.03k, False: 174k]
617
            /* We're inside a string. See if we're at the end. */
618
            /* This code needs to implement the same non-error logic
619
               as tok_get from tokenizer.c, at the letter_quote
620
               label. To actually share that code would be a
621
               nightmare. But, it's unlikely to change and is small,
622
               so duplicate it here. Note we don't need to catch all
623
               of the errors, since they'll be caught when parsing the
624
               expression. We just need to match the non-error
625
               cases. Thus we can ignore \n in single-quoted strings,
626
               for example. Or non-terminated strings. */
627
            if (ch == quote_char) {
  Branch (627:17): [True: 236, False: 797]
628
                /* Does this match the string_type (single or triple
629
                   quoted)? */
630
                if (string_type == 3) {
  Branch (630:21): [True: 42, False: 194]
631
                    if (*str+2 < end && *(*str+1) == ch && 
*(*str+2) == ch25
) {
  Branch (631:25): [True: 42, False: 0]
  Branch (631:41): [True: 25, False: 17]
  Branch (631:60): [True: 25, False: 0]
632
                        /* We're at the end of a triple quoted string. */
633
                        *str += 2;
634
                        string_type = 0;
635
                        quote_char = 0;
636
                        continue;
637
                    }
638
                } else {
639
                    /* We're at the end of a normal string. */
640
                    quote_char = 0;
641
                    string_type = 0;
642
                    continue;
643
                }
644
            }
645
        } else if (ch == '\'' || 
ch == '"'174k
) {
  Branch (645:20): [True: 102, False: 174k]
  Branch (645:34): [True: 122, False: 174k]
646
            /* Is this a triple quoted string? */
647
            if (*str+2 < end && 
*(*str+1) == ch222
&&
*(*str+2) == ch55
) {
  Branch (647:17): [True: 222, False: 2]
  Branch (647:33): [True: 55, False: 167]
  Branch (647:52): [True: 25, False: 30]
648
                string_type = 3;
649
                *str += 2;
650
            } else {
651
                /* Start of a normal string. */
652
                string_type = 1;
653
            }
654
            /* Start looking for the end of the string. */
655
            quote_char = ch;
656
        } else if (ch == '[' || 
ch == '{'174k
||
ch == '('174k
) {
  Branch (656:20): [True: 80, False: 174k]
  Branch (656:33): [True: 20, False: 174k]
  Branch (656:46): [True: 475, False: 174k]
657
            if (nested_depth >= MAXLEVEL) {
  Branch (657:17): [True: 1, False: 574]
658
                RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
659
                goto error;
660
            }
661
            parenstack[nested_depth] = ch;
662
            nested_depth++;
663
        } else if (ch == '#') {
  Branch (663:20): [True: 3, False: 174k]
664
            /* Error: can't include a comment character, inside parens
665
               or not. */
666
            RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
667
            goto error;
668
        } else if (nested_depth == 0 &&
  Branch (668:20): [True: 171k, False: 2.33k]
669
                   
(171k
ch == '!'171k
||
ch == ':'170k
||
ch == '}'170k
||
  Branch (669:21): [True: 1.51k, False: 170k]
  Branch (669:34): [True: 148, False: 170k]
  Branch (669:47): [True: 71.3k, False: 98.8k]
670
                    
ch == '='98.8k
||
ch == '>'98.7k
||
ch == '<'98.6k
)) {
  Branch (670:21): [True: 99, False: 98.7k]
  Branch (670:34): [True: 15, False: 98.6k]
  Branch (670:47): [True: 3, False: 98.6k]
671
            /* See if there's a next character. */
672
            if (*str+1 < end) {
  Branch (672:17): [True: 71.3k, False: 1.76k]
673
                char next = *(*str+1);
674
675
                /* For "!=". since '=' is not an allowed conversion character,
676
                   nothing is lost in this test. */
677
                if ((ch == '!' && 
next == '='1.51k
) || /* != */
  Branch (677:22): [True: 1.51k, False: 69.8k]
  Branch (677:35): [True: 12, False: 1.49k]
678
                    
(71.3k
ch == '='71.3k
&&
next == '='96
) || /* == */
  Branch (678:22): [True: 96, False: 71.2k]
  Branch (678:35): [True: 4, False: 92]
679
                    
(71.3k
ch == '<'71.3k
&&
next == '='2
) || /* <= */
  Branch (679:22): [True: 2, False: 71.3k]
  Branch (679:35): [True: 2, False: 0]
680
                    
(71.3k
ch == '>'71.3k
&&
next == '='14
) /* >= */
  Branch (680:22): [True: 14, False: 71.3k]
  Branch (680:35): [True: 2, False: 12]
681
                    ) {
682
                    *str += 1;
683
                    continue;
684
                }
685
            }
686
            /* Don't get out of the loop for these, if they're single
687
               chars (not part of 2-char tokens). If by themselves, they
688
               don't end an expression (unlike say '!'). */
689
            if (ch == '>' || 
ch == '<'73.0k
) {
  Branch (689:17): [True: 13, False: 73.0k]
  Branch (689:30): [True: 1, False: 73.0k]
690
                continue;
691
            }
692
693
            /* Normal way out of this loop. */
694
            break;
695
        } else if (ch == ']' || 
ch == '}'100k
||
ch == ')'100k
) {
  Branch (695:20): [True: 79, False: 100k]
  Branch (695:33): [True: 23, False: 100k]
  Branch (695:46): [True: 270, False: 100k]
696
            if (!nested_depth) {
  Branch (696:17): [True: 2, False: 370]
697
                RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
698
                goto error;
699
            }
700
            nested_depth--;
701
            int opening = (unsigned char)parenstack[nested_depth];
702
            if (!((opening == '(' && 
ch == ')'270
) ||
  Branch (702:20): [True: 270, False: 100]
  Branch (702:38): [True: 267, False: 3]
703
                  
(103
opening == '['103
&&
ch == ']'80
) ||
  Branch (703:20): [True: 80, False: 23]
  Branch (703:38): [True: 78, False: 2]
704
                  
(25
opening == '{'25
&&
ch == '}'20
)))
  Branch (704:20): [True: 20, False: 5]
  Branch (704:38): [True: 20, False: 0]
705
            {
706
                RAISE_SYNTAX_ERROR(
707
                          "f-string: closing parenthesis '%c' "
708
                          "does not match opening parenthesis '%c'",
709
                          ch, opening);
710
                goto error;
711
            }
712
        } else {
713
            /* Just consume this char and loop around. */
714
        }
715
    }
716
    expr_end = *str;
717
    /* If we leave the above loop in a string or with mismatched parens, we
718
       don't really care. We'll get a syntax error when compiling the
719
       expression. But, we can produce a better error message, so let's just
720
       do that.*/
721
    if (quote_char) {
  Branch (721:9): [True: 4, False: 73.1k]
722
        RAISE_SYNTAX_ERROR("f-string: unterminated string");
723
        goto error;
724
    }
725
    if (nested_depth) {
  Branch (725:9): [True: 0, False: 73.1k]
726
        int opening = (unsigned char)parenstack[nested_depth - 1];
727
        RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
728
        goto error;
729
    }
730
731
    if (*str >= end) {
  Branch (731:9): [True: 11, False: 73.0k]
732
        goto unexpected_end_of_string;
733
    }
734
735
    /* Compile the expression as soon as possible, so we show errors
736
       related to the expression before errors related to the
737
       conversion or format_spec. */
738
    simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
739
    if (!simple_expression) {
  Branch (739:9): [True: 43, False: 73.0k]
740
        goto error;
741
    }
742
743
    /* Check for =, which puts the text value of the expression in
744
       expr_text. */
745
    if (**str == '=') {
  Branch (745:9): [True: 88, False: 72.9k]
746
        if (p->feature_version < 8) {
  Branch (746:13): [True: 1, False: 87]
747
            RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
748
                               "only supported in Python 3.8 and greater");
749
            goto error;
750
        }
751
        *str += 1;
752
753
        /* Skip over ASCII whitespace.  No need to test for end of string
754
           here, since we know there's at least a trailing quote somewhere
755
           ahead. */
756
        while (Py_ISSPACE(**str)) {
757
            *str += 1;
758
        }
759
        if (*str >= end) {
  Branch (759:13): [True: 1, False: 86]
760
            goto unexpected_end_of_string;
761
        }
762
        /* Set *expr_text to the text of the expression. */
763
        *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
764
        if (!*expr_text) {
  Branch (764:13): [True: 0, False: 86]
765
            goto error;
766
        }
767
    }
768
769
    /* Check for a conversion char, if present. */
770
    if (**str == '!') {
  Branch (770:9): [True: 1.53k, False: 71.5k]
771
        *str += 1;
772
        const char *conv_start = *str;
773
        while (1) {
  Branch (773:16): [Folded - Ignored]
774
            if (*str >= end) {
  Branch (774:17): [True: 5, False: 3.07k]
775
                goto unexpected_end_of_string;
776
            }
777
            if (**str == '}' || 
**str == ':'1.59k
) {
  Branch (777:17): [True: 1.47k, False: 1.59k]
  Branch (777:33): [True: 52, False: 1.54k]
778
                break;
779
            }
780
            *str += 1;
781
        }
782
        if (*str == conv_start) {
  Branch (782:13): [True: 3, False: 1.52k]
783
            RAISE_SYNTAX_ERROR(
784
                      "f-string: missed conversion character");
785
            goto error;
786
        }
787
788
        conversion = (unsigned char)*conv_start;
789
        /* Validate the conversion. */
790
        if ((*str != conv_start + 1) ||
  Branch (790:13): [True: 11, False: 1.51k]
791
            
!(1.51k
conversion == 's'1.51k
||
conversion == 'r'1.48k
||
conversion == 'a'39
))
  Branch (791:15): [True: 35, False: 1.48k]
  Branch (791:36): [True: 1.44k, False: 39]
  Branch (791:57): [True: 34, False: 5]
792
        {
793
            PyObject *conv_obj = PyUnicode_FromStringAndSize(conv_start,
794
                                                             *str-conv_start);
795
            if (conv_obj) {
  Branch (795:17): [True: 16, False: 0]
796
                RAISE_SYNTAX_ERROR(
797
                        "f-string: invalid conversion character %R: "
798
                        "expected 's', 'r', or 'a'",
799
                        conv_obj);
800
                Py_DECREF(conv_obj);
801
            }
802
            goto error;
803
        }
804
805
    }
806
807
    /* Check for the format spec, if present. */
808
    assert(*str < end);
809
    if (**str == ':') {
  Branch (809:9): [True: 214, False: 72.8k]
810
        *str += 1;
811
        if (*str >= end) {
  Branch (811:13): [True: 2, False: 212]
812
            goto unexpected_end_of_string;
813
        }
814
815
        /* Parse the format spec. */
816
        format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
817
                                    first_token, t, last_token);
818
        if (!format_spec) {
  Branch (818:13): [True: 7, False: 205]
819
            goto error;
820
        }
821
    }
822
823
    if (*str >= end || **str != '}') {
  Branch (823:9): [True: 0, False: 73.0k]
  Branch (823:24): [True: 1, False: 73.0k]
824
        goto unexpected_end_of_string;
825
    }
826
827
    /* We're at a right brace. Consume it. */
828
    assert(*str < end);
829
    assert(**str == '}');
830
    *str += 1;
831
832
    /* If we're in = mode (detected by non-NULL expr_text), and have no format
833
       spec and no explicit conversion, set the conversion to 'r'. */
834
    if (*expr_text && 
format_spec == NULL85
&&
conversion == -147
) {
  Branch (834:9): [True: 85, False: 72.9k]
  Branch (834:23): [True: 47, False: 38]
  Branch (834:46): [True: 16, False: 31]
835
        conversion = 'r';
836
    }
837
838
    /* And now create the FormattedValue node that represents this
839
       entire expression with the conversion and format spec. */
840
    //TODO: Fix this
841
    *expression = _PyAST_FormattedValue(simple_expression, conversion,
842
                                        format_spec, first_token->lineno,
843
                                        first_token->col_offset,
844
                                        last_token->end_lineno,
845
                                        last_token->end_col_offset, p->arena);
846
    if (!*expression) {
  Branch (846:9): [True: 0, False: 73.0k]
847
        goto error;
848
    }
849
850
    return 0;
851
852
unexpected_end_of_string:
853
    RAISE_SYNTAX_ERROR("f-string: expecting '}'");
854
    /* Falls through to error. */
855
856
error:
857
    Py_XDECREF(*expr_text);
858
    return -1;
859
860
}
861
862
/* Return -1 on error.
863
864
   Return 0 if we have a literal (possible zero length) and an
865
   expression (zero length if at the end of the string.
866
867
   Return 1 if we have a literal, but no expression, and we want the
868
   caller to call us again. This is used to deal with doubled
869
   braces.
870
871
   When called multiple times on the string 'a{{b{0}c', this function
872
   will return:
873
874
   1. the literal 'a{' with no expression, and a return value
875
      of 1. Despite the fact that there's no expression, the return
876
      value of 1 means we're not finished yet.
877
878
   2. the literal 'b' and the expression '0', with a return value of
879
      0. The fact that there's an expression means we're not finished.
880
881
   3. literal 'c' with no expression and a return value of 0. The
882
      combination of the return value of 0 with no expression means
883
      we're finished.
884
*/
885
static int
886
fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int raw,
887
                              int recurse_lvl, PyObject **literal,
888
                              PyObject **expr_text, expr_ty *expression,
889
                              Token *first_token, Token *t, Token *last_token)
890
{
891
    int result;
892
893
    assert(*literal == NULL && *expression == NULL);
894
895
    /* Get any literal string. */
896
    result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
897
    if (result < 0) {
  Branch (897:9): [True: 13, False: 77.8k]
898
        goto error;
899
    }
900
901
    assert(result == 0 || result == 1);
902
903
    if (result == 1) {
  Branch (903:9): [True: 155, False: 77.6k]
904
        /* We have a literal, but don't look at the expression. */
905
        return 1;
906
    }
907
908
    if (*str >= end || 
**str == '}'73.3k
) {
  Branch (908:9): [True: 4.31k, False: 73.3k]
  Branch (908:24): [True: 205, False: 73.1k]
909
        /* We're at the end of the string or the end of a nested
910
           f-string: no expression. The top-level error case where we
911
           expect to be at the end of the string but we're at a '}' is
912
           handled later. */
913
        return 0;
914
    }
915
916
    /* We must now be the start of an expression, on a '{'. */
917
    assert(**str == '{');
918
919
    if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
  Branch (919:9): [True: 114, False: 73.0k]
920
                          expression, first_token, t, last_token) < 0) {
921
        goto error;
922
    }
923
924
    return 0;
925
926
error:
927
    Py_CLEAR(*literal);
928
    return -1;
929
}
930
931
#ifdef NDEBUG
932
#define ExprList_check_invariants(l)
933
#else
934
static void
935
ExprList_check_invariants(ExprList *l)
936
{
937
    /* Check our invariants. Make sure this object is "live", and
938
       hasn't been deallocated. */
939
    assert(l->size >= 0);
940
    assert(l->p != NULL);
941
    if (l->size <= EXPRLIST_N_CACHED) {
942
        assert(l->data == l->p);
943
    }
944
}
945
#endif
946
947
static void
948
ExprList_Init(ExprList *l)
949
{
950
    l->allocated = EXPRLIST_N_CACHED;
951
    l->size = 0;
952
953
    /* Until we start allocating dynamically, p points to data. */
954
    l->p = l->data;
955
956
    ExprList_check_invariants(l);
957
}
958
959
static int
960
ExprList_Append(ExprList *l, expr_ty exp)
961
{
962
    ExprList_check_invariants(l);
963
    if (l->size >= l->allocated) {
  Branch (963:9): [True: 58, False: 146k]
964
        /* We need to alloc (or realloc) the memory. */
965
        Py_ssize_t new_size = l->allocated * 2;
966
967
        /* See if we've ever allocated anything dynamically. */
968
        if (l->p == l->data) {
  Branch (968:13): [True: 15, False: 43]
969
            Py_ssize_t i;
970
            /* We're still using the cached data. Switch to
971
               alloc-ing. */
972
            l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
973
            if (!l->p) {
  Branch (973:17): [True: 0, False: 15]
974
                return -1;
975
            }
976
            /* Copy the cached data into the new buffer. */
977
            
for (i = 0; 15
i < l->size;
i++960
) {
  Branch (977:25): [True: 960, False: 15]
978
                l->p[i] = l->data[i];
979
            }
980
        } else {
981
            /* Just realloc. */
982
            expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
983
            if (!tmp) {
  Branch (983:17): [True: 0, False: 43]
984
                PyMem_Free(l->p);
985
                l->p = NULL;
986
                return -1;
987
            }
988
            l->p = tmp;
989
        }
990
991
        l->allocated = new_size;
992
        assert(l->allocated == 2 * l->size);
993
    }
994
995
    l->p[l->size++] = exp;
996
997
    ExprList_check_invariants(l);
998
    return 0;
999
}
1000
1001
static void
1002
ExprList_Dealloc(ExprList *l)
1003
{
1004
    ExprList_check_invariants(l);
1005
1006
    /* If there's been an error, or we've never dynamically allocated,
1007
       do nothing. */
1008
    if (!l->p || l->p == l->data) {
  Branch (1008:9): [True: 0, False: 3.32k]
  Branch (1008:18): [True: 3.30k, False: 15]
1009
        /* Do nothing. */
1010
    } else {
1011
        /* We have dynamically allocated. Free the memory. */
1012
        PyMem_Free(l->p);
1013
    }
1014
    l->p = NULL;
1015
    l->size = -1;
1016
}
1017
1018
static asdl_expr_seq *
1019
ExprList_Finish(ExprList *l, PyArena *arena)
1020
{
1021
    asdl_expr_seq *seq;
1022
1023
    ExprList_check_invariants(l);
1024
1025
    /* Allocate the asdl_seq and copy the expressions in to it. */
1026
    seq = _Py_asdl_expr_seq_new(l->size, arena);
1027
    if (seq) {
  Branch (1027:9): [True: 3.01k, False: 0]
1028
        Py_ssize_t i;
1029
        for (i = 0; i < l->size; 
i++146k
) {
  Branch (1029:21): [True: 146k, False: 3.01k]
1030
            asdl_seq_SET(seq, i, l->p[i]);
1031
        }
1032
    }
1033
    ExprList_Dealloc(l);
1034
    return seq;
1035
}
1036
1037
#ifdef NDEBUG
1038
#define FstringParser_check_invariants(state)
1039
#else
1040
static void
1041
FstringParser_check_invariants(FstringParser *state)
1042
{
1043
    if (state->last_str) {
1044
        assert(PyUnicode_CheckExact(state->last_str));
1045
    }
1046
    ExprList_check_invariants(&state->expr_list);
1047
}
1048
#endif
1049
1050
void
1051
_PyPegen_FstringParser_Init(FstringParser *state)
1052
{
1053
    state->last_str = NULL;
1054
    state->fmode = 0;
1055
    ExprList_Init(&state->expr_list);
1056
    FstringParser_check_invariants(state);
1057
}
1058
1059
void
1060
_PyPegen_FstringParser_Dealloc(FstringParser *state)
1061
{
1062
    FstringParser_check_invariants(state);
1063
1064
    Py_XDECREF(state->last_str);
1065
    ExprList_Dealloc(&state->expr_list);
1066
}
1067
1068
/* Make a Constant node, but decref the PyUnicode object being added. */
1069
static expr_ty
1070
make_str_node_and_del(Parser *p, PyObject **str, Token* first_token, Token *last_token)
1071
{
1072
    PyObject *s = *str;
1073
    PyObject *kind = NULL;
1074
    *str = NULL;
1075
    assert(PyUnicode_CheckExact(s));
1076
    if (_PyArena_AddPyObject(p->arena, s) < 0) {
  Branch (1076:9): [True: 0, False: 248k]
1077
        Py_DECREF(s);
1078
        return NULL;
1079
    }
1080
    const char* the_str = PyBytes_AsString(first_token->bytes);
1081
    if (the_str && the_str[0] == 'u') {
  Branch (1081:9): [True: 248k, False: 0]
  Branch (1081:20): [True: 27, False: 248k]
1082
        kind = _PyPegen_new_identifier(p, "u");
1083
    }
1084
1085
    if (kind == NULL && 
PyErr_Occurred()248k
) {
  Branch (1085:9): [True: 248k, False: 27]
  Branch (1085:25): [True: 0, False: 248k]
1086
        return NULL;
1087
    }
1088
1089
    return _PyAST_Constant(s, kind, first_token->lineno, first_token->col_offset,
1090
                           last_token->end_lineno, last_token->end_col_offset,
1091
                           p->arena);
1092
1093
}
1094
1095
1096
/* Add a non-f-string (that is, a regular literal string). str is
1097
   decref'd. */
1098
int
1099
_PyPegen_FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
1100
{
1101
    FstringParser_check_invariants(state);
1102
1103
    assert(PyUnicode_CheckExact(str));
1104
1105
    if (PyUnicode_GET_LENGTH(str) == 0) {
  Branch (1105:9): [True: 67.3k, False: 202k]
1106
        Py_DECREF(str);
1107
        return 0;
1108
    }
1109
1110
    if (!state->last_str) {
  Branch (1110:9): [True: 181k, False: 21.3k]
1111
        /* We didn't have a string before, so just remember this one. */
1112
        state->last_str = str;
1113
    } else {
1114
        /* Concatenate this with the previous string. */
1115
        PyUnicode_AppendAndDel(&state->last_str, str);
1116
        if (!state->last_str) {
  Branch (1116:13): [True: 0, False: 21.3k]
1117
            return -1;
1118
        }
1119
    }
1120
    FstringParser_check_invariants(state);
1121
    return 0;
1122
}
1123
1124
/* Parse an f-string. The f-string is in *str to end, with no
1125
   'f' or quotes. */
1126
int
1127
_PyPegen_FstringParser_ConcatFstring(Parser *p, FstringParser *state, const char **str,
1128
                            const char *end, int raw, int recurse_lvl,
1129
                            Token *first_token, Token* t, Token *last_token)
1130
{
1131
    FstringParser_check_invariants(state);
1132
    state->fmode = 1;
1133
1134
    /* Parse the f-string. */
1135
    while (1) {
  Branch (1135:12): [Folded - Ignored]
1136
        PyObject *literal = NULL;
1137
        PyObject *expr_text = NULL;
1138
        expr_ty expression = NULL;
1139
1140
        /* If there's a zero length literal in front of the
1141
           expression, literal will be NULL. If we're at the end of
1142
           the f-string, expression will be NULL (unless result == 1,
1143
           see below). */
1144
        int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
1145
                                                   &literal, &expr_text,
1146
                                                   &expression, first_token, t, last_token);
1147
        if (result < 0) {
  Branch (1147:13): [True: 127, False: 77.6k]
1148
            return -1;
1149
        }
1150
1151
        /* Add the literal, if any. */
1152
        if (literal && 
_PyPegen_FstringParser_ConcatAndDel(state, literal) < 072.7k
) {
  Branch (1152:13): [True: 72.7k, False: 4.90k]
  Branch (1152:24): [True: 0, False: 72.7k]
1153
            Py_XDECREF(expr_text);
1154
            return -1;
1155
        }
1156
        /* Add the expr_text, if any. */
1157
        if (expr_text && 
_PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 085
) {
  Branch (1157:13): [True: 85, False: 77.6k]
  Branch (1157:26): [True: 0, False: 85]
1158
            return -1;
1159
        }
1160
1161
        /* We've dealt with the literal and expr_text, their ownership has
1162
           been transferred to the state object.  Don't look at them again. */
1163
1164
        /* See if we should just loop around to get the next literal
1165
           and expression, while ignoring the expression this
1166
           time. This is used for un-doubling braces, as an
1167
           optimization. */
1168
        if (result == 1) {
  Branch (1168:13): [True: 155, False: 77.5k]
1169
            continue;
1170
        }
1171
1172
        if (!expression) {
  Branch (1172:13): [True: 4.51k, False: 73.0k]
1173
            /* We're done with this f-string. */
1174
            break;
1175
        }
1176
1177
        /* We know we have an expression. Convert any existing string
1178
           to a Constant node. */
1179
        if (state->last_str) {
  Branch (1179:13): [True: 71.9k, False: 1.09k]
1180
            /* Convert the existing last_str literal to a Constant node. */
1181
            expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
1182
            if (!last_str || ExprList_Append(&state->expr_list, last_str) < 0) {
  Branch (1182:17): [True: 0, False: 71.9k]
  Branch (1182:30): [True: 0, False: 71.9k]
1183
                return -1;
1184
            }
1185
        }
1186
1187
        if (ExprList_Append(&state->expr_list, expression) < 0) {
  Branch (1187:13): [True: 0, False: 73.0k]
1188
            return -1;
1189
        }
1190
    }
1191
1192
    /* If recurse_lvl is zero, then we must be at the end of the
1193
       string. Otherwise, we must be at a right brace. */
1194
1195
    if (recurse_lvl == 0 && 
*str < end-14.30k
) {
  Branch (1195:9): [True: 4.30k, False: 207]
  Branch (1195:29): [True: 0, False: 4.30k]
1196
        RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
1197
        return -1;
1198
    }
1199
    if (recurse_lvl != 0 && 
**str != '}'207
) {
  Branch (1199:9): [True: 207, False: 4.30k]
  Branch (1199:29): [True: 2, False: 205]
1200
        RAISE_SYNTAX_ERROR("f-string: expecting '}'");
1201
        return -1;
1202
    }
1203
1204
    FstringParser_check_invariants(state);
1205
    return 0;
1206
}
1207
1208
/* Convert the partial state reflected in last_str and expr_list to an
1209
   expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
1210
expr_ty
1211
_PyPegen_FstringParser_Finish(Parser *p, FstringParser *state, Token* first_token,
1212
                     Token *last_token)
1213
{
1214
    asdl_expr_seq *seq;
1215
1216
    FstringParser_check_invariants(state);
1217
1218
    /* If we're just a constant string with no expressions, return
1219
       that. */
1220
    if (!state->fmode) {
  Branch (1220:9): [True: 174k, False: 3.01k]
1221
        assert(!state->expr_list.size);
1222
        if (!state->last_str) {
  Branch (1222:13): [True: 67.3k, False: 107k]
1223
            /* Create a zero length string. */
1224
            state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
1225
            if (!state->last_str) {
  Branch (1225:17): [True: 0, False: 67.3k]
1226
                goto error;
1227
            }
1228
        }
1229
        return make_str_node_and_del(p, &state->last_str, first_token, last_token);
1230
    }
1231
1232
    /* Create a Constant node out of last_str, if needed. It will be the
1233
       last node in our expression list. */
1234
    if (state->last_str) {
  Branch (1234:9): [True: 1.71k, False: 1.29k]
1235
        expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
1236
        if (!str || ExprList_Append(&state->expr_list, str) < 0) {
  Branch (1236:13): [True: 0, False: 1.71k]
  Branch (1236:21): [True: 0, False: 1.71k]
1237
            goto error;
1238
        }
1239
    }
1240
    /* This has already been freed. */
1241
    assert(state->last_str == NULL);
1242
1243
    seq = ExprList_Finish(&state->expr_list, p->arena);
1244
    if (!seq) {
  Branch (1244:9): [True: 0, False: 3.01k]
1245
        goto error;
1246
    }
1247
1248
    return _PyAST_JoinedStr(seq, first_token->lineno, first_token->col_offset,
1249
                            last_token->end_lineno, last_token->end_col_offset,
1250
                            p->arena);
1251
1252
error:
1253
    _PyPegen_FstringParser_Dealloc(state);
1254
    return NULL;
1255
}
1256
1257
/* Given an f-string (with no 'f' or quotes) that's in *str and ends
1258
   at end, parse it into an expr_ty.  Return NULL on error.  Adjust
1259
   str to point past the parsed portion. */
1260
static expr_ty
1261
fstring_parse(Parser *p, const char **str, const char *end, int raw,
1262
              int recurse_lvl, Token *first_token, Token* t, Token *last_token)
1263
{
1264
    FstringParser state;
1265
1266
    _PyPegen_FstringParser_Init(&state);
1267
    if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
  Branch (1267:9): [True: 7, False: 205]
1268
                                    first_token, t, last_token) < 0) {
1269
        _PyPegen_FstringParser_Dealloc(&state);
1270
        return NULL;
1271
    }
1272
1273
    return _PyPegen_FstringParser_Finish(p, &state, t, t);
1274
}