LCOV - CPython lcov report - Parser/string

LCOV - code coverage report

Current view:	top level - Parser - string_parser.c (source / functions)		Hit	Total	Coverage
Test:	CPython lcov report	Lines:	510	561	90.9 %
Date:	2022-07-07 18:19:46	Functions:	23	23	100.0 %

          Line data    Source code

       1             : #include <stdbool.h>
       2             : 
       3             : #include <Python.h>
       4             : 
       5             : #include "tokenizer.h"
       6             : #include "pegen.h"
       7             : #include "string_parser.h"
       8             : 
       9             : //// STRING HANDLING FUNCTIONS ////
      10             : 
      11             : static int
      12         732 : warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
      13             : {
      14         732 :     unsigned char c = *first_invalid_escape;
      15         732 :     int octal = ('4' <= c && c <= '7');
      16         732 :     PyObject *msg =
      17             :         octal
      18         516 :         ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
      19             :                                first_invalid_escape)
      20         732 :         : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
      21         732 :     if (msg == NULL) {
      22           0 :         return -1;
      23             :     }
      24         732 :     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
      25             :                                  t->lineno, NULL, NULL) < 0) {
      26           5 :         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
      27             :             /* Replace the DeprecationWarning exception with a SyntaxError
      28             :                to get a more accurate error report */
      29           5 :             PyErr_Clear();
      30             : 
      31             :             /* This is needed, in order for the SyntaxError to point to the token t,
      32             :                since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
      33             :                error location, if p->known_err_token is not set. */
      34           5 :             p->known_err_token = t;
      35           5 :             if (octal) {
      36           2 :                 RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
      37             :                                    first_invalid_escape);
      38             :             }
      39             :             else {
      40           3 :                 RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
      41             :             }
      42             :         }
      43           5 :         Py_DECREF(msg);
      44           5 :         return -1;
      45             :     }
      46         727 :     Py_DECREF(msg);
      47         727 :     return 0;
      48             : }
      49             : 
      50             : static PyObject *
      51       22168 : decode_utf8(const char **sPtr, const char *end)
      52             : {
      53             :     const char *s;
      54             :     const char *t;
      55       22168 :     t = s = *sPtr;
      56      152655 :     while (s < end && (*s & 0x80)) {
      57      130487 :         s++;
      58             :     }
      59       22168 :     *sPtr = s;
      60       22168 :     return PyUnicode_DecodeUTF8(t, s - t, NULL);
      61             : }
      62             : 
      63             : static PyObject *
      64      240000 : decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
      65             : {
      66             :     PyObject *v;
      67             :     PyObject *u;
      68             :     char *buf;
      69             :     char *p;
      70             :     const char *end;
      71             : 
      72             :     /* check for integer overflow */
      73      240000 :     if (len > SIZE_MAX / 6) {
      74           0 :         return NULL;
      75             :     }
      76             :     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
      77             :        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
      78      240000 :     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
      79      240000 :     if (u == NULL) {
      80           0 :         return NULL;
      81             :     }
      82      240000 :     p = buf = PyBytes_AsString(u);
      83      240000 :     if (p == NULL) {
      84           0 :         return NULL;
      85             :     }
      86      240000 :     end = s + len;
      87     4389100 :     while (s < end) {
      88     4149100 :         if (*s == '\\') {
      89      227883 :             *p++ = *s++;
      90      227883 :             if (s >= end || *s & 0x80) {
      91           5 :                 strcpy(p, "u005c");
      92           5 :                 p += 5;
      93           5 :                 if (s >= end) {
      94           1 :                     break;
      95             :                 }
      96             :             }
      97             :         }
      98     4149100 :         if (*s & 0x80) {
      99             :             PyObject *w;
     100             :             int kind;
     101             :             const void *data;
     102             :             Py_ssize_t w_len;
     103             :             Py_ssize_t i;
     104       22168 :             w = decode_utf8(&s, end);
     105       22168 :             if (w == NULL) {
     106           0 :                 Py_DECREF(u);
     107           0 :                 return NULL;
     108             :             }
     109       22168 :             kind = PyUnicode_KIND(w);
     110       22168 :             data = PyUnicode_DATA(w);
     111       22168 :             w_len = PyUnicode_GET_LENGTH(w);
     112       60555 :             for (i = 0; i < w_len; i++) {
     113       38387 :                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
     114       38387 :                 sprintf(p, "\\U%08x", chr);
     115       38387 :                 p += 10;
     116             :             }
     117             :             /* Should be impossible to overflow */
     118       22168 :             assert(p - buf <= PyBytes_GET_SIZE(u));
     119       22168 :             Py_DECREF(w);
     120             :         }
     121             :         else {
     122     4126930 :             *p++ = *s++;
     123             :         }
     124             :     }
     125      240000 :     len = p - buf;
     126      240000 :     s = buf;
     127             : 
     128             :     const char *first_invalid_escape;
     129      240000 :     v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
     130             : 
     131      240000 :     if (v != NULL && first_invalid_escape != NULL) {
     132         365 :         if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
     133             :             /* We have not decref u before because first_invalid_escape points
     134             :                inside u. */
     135           3 :             Py_XDECREF(u);
     136           3 :             Py_DECREF(v);
     137           3 :             return NULL;
     138             :         }
     139             :     }
     140      239997 :     Py_XDECREF(u);
     141      239997 :     return v;
     142             : }
     143             : 
     144             : static PyObject *
     145       19186 : decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
     146             : {
     147             :     const char *first_invalid_escape;
     148       19186 :     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
     149       19186 :     if (result == NULL) {
     150           2 :         return NULL;
     151             :     }
     152             : 
     153       19184 :     if (first_invalid_escape != NULL) {
     154         366 :         if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
     155           2 :             Py_DECREF(result);
     156           2 :             return NULL;
     157             :         }
     158             :     }
     159       19182 :     return result;
     160             : }
     161             : 
     162             : /* s must include the bracketing quote characters, and r, b, u,
     163             :    &/or f prefixes (if any), and embedded escape sequences (if any).
     164             :    _PyPegen_parsestr parses it, and sets *result to decoded Python string object.
     165             :    If the string is an f-string, set *fstr and *fstrlen to the unparsed
     166             :    string object.  Return 0 if no errors occurred.  */
     167             : int
     168     1691440 : _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
     169             :                   const char **fstr, Py_ssize_t *fstrlen, Token *t)
     170             : {
     171     1691440 :     const char *s = PyBytes_AsString(t->bytes);
     172     1691440 :     if (s == NULL) {
     173           0 :         return -1;
     174             :     }
     175             : 
     176             :     size_t len;
     177     1691440 :     int quote = Py_CHARMASK(*s);
     178     1691440 :     int fmode = 0;
     179     1691440 :     *bytesmode = 0;
     180     1691440 :     *rawmode = 0;
     181     1691440 :     *result = NULL;
     182     1691440 :     *fstr = NULL;
     183     1691440 :     if (Py_ISALPHA(quote)) {
     184      182880 :         while (!*bytesmode || !*rawmode) {
     185      182111 :             if (quote == 'b' || quote == 'B') {
     186       52450 :                 quote =(unsigned char)*++s;
     187       52450 :                 *bytesmode = 1;
     188             :             }
     189      129661 :             else if (quote == 'u' || quote == 'U') {
     190         909 :                 quote = (unsigned char)*++s;
     191             :             }
     192      128752 :             else if (quote == 'r' || quote == 'R') {
     193       16220 :                 quote = (unsigned char)*++s;
     194       16220 :                 *rawmode = 1;
     195             :             }
     196      112532 :             else if (quote == 'f' || quote == 'F') {
     197       22299 :                 quote = (unsigned char)*++s;
     198       22299 :                 fmode = 1;
     199             :             }
     200             :             else {
     201             :                 break;
     202             :             }
     203             :         }
     204             :     }
     205             : 
     206             :     /* fstrings are only allowed in Python 3.6 and greater */
     207     1691440 :     if (fmode && p->feature_version < 6) {
     208           2 :         p->error_indicator = 1;
     209           2 :         RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
     210           2 :         return -1;
     211             :     }
     212             : 
     213     1691440 :     if (fmode && *bytesmode) {
     214           0 :         PyErr_BadInternalCall();
     215           0 :         return -1;
     216             :     }
     217     1691440 :     if (quote != '\'' && quote != '\"') {
     218           0 :         PyErr_BadInternalCall();
     219           0 :         return -1;
     220             :     }
     221             :     /* Skip the leading quote char. */
     222     1691440 :     s++;
     223     1691440 :     len = strlen(s);
     224     1691440 :     if (len > INT_MAX) {
     225           0 :         PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
     226           0 :         return -1;
     227             :     }
     228     1691440 :     if (s[--len] != quote) {
     229             :         /* Last quote char must match the first. */
     230           0 :         PyErr_BadInternalCall();
     231           0 :         return -1;
     232             :     }
     233     1691440 :     if (len >= 4 && s[0] == quote && s[1] == quote) {
     234             :         /* A triple quoted string. We've already skipped one quote at
     235             :            the start and one at the end of the string. Now skip the
     236             :            two at the start. */
     237      122154 :         s += 2;
     238      122154 :         len -= 2;
     239             :         /* And check that the last two match. */
     240      122154 :         if (s[--len] != quote || s[--len] != quote) {
     241           0 :             PyErr_BadInternalCall();
     242           0 :             return -1;
     243             :         }
     244             :     }
     245             : 
     246     1691440 :     if (fmode) {
     247             :         /* Just return the bytes. The caller will parse the resulting
     248             :            string. */
     249       22297 :         *fstr = s;
     250       22297 :         *fstrlen = len;
     251       22297 :         return 0;
     252             :     }
     253             : 
     254             :     /* Not an f-string. */
     255             :     /* Avoid invoking escape decoding routines if possible. */
     256     1669140 :     *rawmode = *rawmode || strchr(s, '\\') == NULL;
     257     1669140 :     if (*bytesmode) {
     258             :         /* Disallow non-ASCII characters. */
     259             :         const char *ch;
     260      739404 :         for (ch = s; *ch; ch++) {
     261      687092 :             if (Py_CHARMASK(*ch) >= 0x80) {
     262         138 :                 RAISE_SYNTAX_ERROR(
     263             :                                    "bytes can only contain ASCII "
     264             :                                    "literal characters");
     265         138 :                 return -1;
     266             :             }
     267             :         }
     268       52312 :         if (*rawmode) {
     269       33126 :             *result = PyBytes_FromStringAndSize(s, len);
     270             :         }
     271             :         else {
     272       19186 :             *result = decode_bytes_with_escapes(p, s, len, t);
     273             :         }
     274             :     }
     275             :     else {
     276     1616690 :         if (*rawmode) {
     277     1477780 :             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
     278             :         }
     279             :         else {
     280      138910 :             *result = decode_unicode_with_escapes(p, s, len, t);
     281             :         }
     282             :     }
     283     1669000 :     return *result == NULL ? -1 : 0;
     284             : }
     285             : 
     286             : 
     287             : 
     288             : // FSTRING STUFF
     289             : 
     290             : /* Fix locations for the given node and its children.
     291             : 
     292             :    `parent` is the enclosing node.
     293             :    `expr_start` is the starting position of the expression (pointing to the open brace).
     294             :    `n` is the node which locations are going to be fixed relative to parent.
     295             :    `expr_str` is the child node's string representation, including braces.
     296             : */
     297             : static bool
     298       96110 : fstring_find_expr_location(Token *parent, const char* expr_start, char *expr_str, int *p_lines, int *p_cols)
     299             : {
     300       96110 :     *p_lines = 0;
     301       96110 :     *p_cols = 0;
     302       96110 :     assert(expr_start != NULL && *expr_start == '{');
     303       96110 :     if (parent && parent->bytes) {
     304       96110 :         const char *parent_str = PyBytes_AsString(parent->bytes);
     305       96110 :         if (!parent_str) {
     306           0 :             return false;
     307             :         }
     308             :         // The following is needed, in order to correctly shift the column
     309             :         // offset, in the case that (disregarding any whitespace) a newline
     310             :         // immediately follows the opening curly brace of the fstring expression.
     311       96110 :         bool newline_after_brace = 1;
     312       96110 :         const char *start = expr_start + 1;
     313       96152 :         while (start && *start != '}' && *start != '\n') {
     314       96145 :             if (*start != ' ' && *start != '\t' && *start != '\f') {
     315       96103 :                 newline_after_brace = 0;
     316       96103 :                 break;
     317             :             }
     318          42 :             start++;
     319             :         }
     320             : 
     321             :         // Account for the characters from the last newline character to our
     322             :         // left until the beginning of expr_start.
     323       96110 :         if (!newline_after_brace) {
     324       96103 :             start = expr_start;
     325    36140700 :             while (start > parent_str && *start != '\n') {
     326    36044600 :                 start--;
     327             :             }
     328       96103 :             *p_cols += (int)(expr_start - start);
     329             :         }
     330             :         /* adjust the start based on the number of newlines encountered
     331             :            before the f-string expression */
     332    36266400 :         for (const char *p = parent_str; p < expr_start; p++) {
     333    36170200 :             if (*p == '\n') {
     334        4706 :                 (*p_lines)++;
     335             :             }
     336             :         }
     337             :     }
     338       96110 :     return true;
     339             : }
     340             : 
     341             : 
     342             : /* Compile this expression in to an expr_ty.  Add parens around the
     343             :    expression, in order to allow leading spaces in the expression. */
     344             : static expr_ty
     345       96139 : fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
     346             :                      Token *t)
     347             : {
     348       96139 :     expr_ty expr = NULL;
     349             :     char *str;
     350             :     Py_ssize_t len;
     351             :     const char *s;
     352       96139 :     expr_ty result = NULL;
     353             : 
     354       96139 :     assert(expr_end >= expr_start);
     355       96139 :     assert(*(expr_start-1) == '{');
     356       96139 :     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
     357             :            *expr_end == '=');
     358             : 
     359             :     /* If the substring is all whitespace, it's an error.  We need to catch this
     360             :        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
     361             :        because turning the expression '' in to '()' would go from being invalid
     362             :        to valid. */
     363       96261 :     for (s = expr_start; s != expr_end; s++) {
     364       96232 :         char c = *s;
     365             :         /* The Python parser ignores only the following whitespace
     366             :            characters (\r already is converted to \n). */
     367       96232 :         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
     368       96110 :             break;
     369             :         }
     370             :     }
     371             : 
     372       96139 :     if (s == expr_end) {
     373          29 :         if (*expr_end == '!' || *expr_end == ':' || *expr_end == '=') {
     374          23 :             RAISE_SYNTAX_ERROR("f-string: expression required before '%c'", *expr_end);
     375          23 :             return NULL;
     376             :         }
     377           6 :         RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
     378           6 :         return NULL;
     379             :     }
     380             : 
     381       96110 :     len = expr_end - expr_start;
     382             :     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
     383       96110 :     str = PyMem_Calloc(len + 3, sizeof(char));
     384       96110 :     if (str == NULL) {
     385           0 :         PyErr_NoMemory();
     386           0 :         return NULL;
     387             :     }
     388             : 
     389             :     // The call to fstring_find_expr_location is responsible for finding the column offset
     390             :     // the generated AST nodes need to be shifted to the right, which is equal to the number
     391             :     // of the f-string characters before the expression starts.
     392       96110 :     memcpy(str+1, expr_start, len);
     393             :     int lines, cols;
     394       96110 :     if (!fstring_find_expr_location(t, expr_start-1, str+1, &lines, &cols)) {
     395           0 :         PyMem_Free(str);
     396           0 :         return NULL;
     397             :     }
     398             : 
     399             :     // The parentheses are needed in order to allow for leading whitespace within
     400             :     // the f-string expression. This consequently gets parsed as a group (see the
     401             :     // group rule in python.gram).
     402       96110 :     str[0] = '(';
     403       96110 :     str[len+1] = ')';
     404             : 
     405       96110 :     struct tok_state* tok = _PyTokenizer_FromString(str, 1);
     406       96110 :     if (tok == NULL) {
     407           0 :         PyMem_Free(str);
     408           0 :         return NULL;
     409             :     }
     410       96110 :     Py_INCREF(p->tok->filename);
     411             : 
     412       96110 :     tok->filename = p->tok->filename;
     413       96110 :     tok->lineno = t->lineno + lines - 1;
     414             : 
     415       96110 :     Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
     416             :                                      NULL, p->arena);
     417             : 
     418       96110 :     p2->starting_lineno = t->lineno + lines;
     419       96110 :     p2->starting_col_offset = t->col_offset + cols;
     420             : 
     421       96110 :     expr = _PyPegen_run_parser(p2);
     422             : 
     423       96110 :     if (expr == NULL) {
     424          16 :         goto exit;
     425             :     }
     426       96094 :     result = expr;
     427             : 
     428       96110 : exit:
     429       96110 :     PyMem_Free(str);
     430       96110 :     _PyPegen_Parser_Free(p2);
     431       96110 :     _PyTokenizer_Free(tok);
     432       96110 :     return result;
     433             : }
     434             : 
     435             : /* Return -1 on error.
     436             : 
     437             :    Return 0 if we reached the end of the literal.
     438             : 
     439             :    Return 1 if we haven't reached the end of the literal, but we want
     440             :    the caller to process the literal up to this point. Used for
     441             :    doubled braces.
     442             : */
     443             : static int
     444      119840 : fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
     445             :                      PyObject **literal, int recurse_lvl, Token *t)
     446             : {
     447             :     /* Get any literal string. It ends when we hit an un-doubled left
     448             :        brace (which isn't part of a unicode name escape such as
     449             :        "\N{EULER CONSTANT}"), or the end of the string. */
     450             : 
     451      119840 :     const char *s = *str;
     452      119840 :     const char *literal_start = s;
     453      119840 :     int result = 0;
     454             : 
     455      119840 :     assert(*literal == NULL);
     456      672380 :     while (s < end) {
     457      650201 :         char ch = *s++;
     458      650201 :         if (!raw && ch == '\\' && s < end) {
     459        3182 :             ch = *s++;
     460        3182 :             if (ch == 'N') {
     461             :                 /* We need to look at and skip matching braces for "\N{name}"
     462             :                    sequences because otherwise we'll think the opening '{'
     463             :                    starts an expression, which is not the case with "\N".
     464             :                    Keep looking for either a matched '{' '}' pair, or the end
     465             :                    of the string. */
     466             : 
     467          27 :                 if (s < end && *s++ == '{') {
     468         524 :                     while (s < end && *s++ != '}') {
     469             :                     }
     470          24 :                     continue;
     471             :                 }
     472             : 
     473             :                 /* This is an invalid "\N" sequence, since it's a "\N" not
     474             :                    followed by a "{".  Just keep parsing this literal.  This
     475             :                    error will be caught later by
     476             :                    decode_unicode_with_escapes(). */
     477           3 :                 continue;
     478             :             }
     479        3155 :             if (ch == '{' && warn_invalid_escape_sequence(p, s-1, t) < 0) {
     480           0 :                 return -1;
     481             :             }
     482             :         }
     483      650174 :         if (ch == '{' || ch == '}') {
     484             :             /* Check for doubled braces, but only at the top level. If
     485             :                we checked at every level, then f'{0:{3}}' would fail
     486             :                with the two closing braces. */
     487       97661 :             if (recurse_lvl == 0) {
     488       96553 :                 if (s < end && *s == ch) {
     489             :                     /* We're going to tell the caller that the literal ends
     490             :                        here, but that they should continue scanning. But also
     491             :                        skip over the second brace when we resume scanning. */
     492         518 :                     *str = s + 1;
     493         518 :                     result = 1;
     494         518 :                     goto done;
     495             :                 }
     496             : 
     497             :                 /* Where a single '{' is the start of a new expression, a
     498             :                    single '}' is not allowed. */
     499       96035 :                 if (ch == '}') {
     500           8 :                     *str = s - 1;
     501           8 :                     RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
     502           8 :                     return -1;
     503             :                 }
     504             :             }
     505             :             /* We're either at a '{', which means we're starting another
     506             :                expression; or a '}', which means we're at the end of this
     507             :                f-string (for a nested format_spec). */
     508       97135 :             s--;
     509       97135 :             break;
     510             :         }
     511             :     }
     512      119314 :     *str = s;
     513      119314 :     assert(s <= end);
     514      119314 :     assert(s == end || *s == '{' || *s == '}');
     515      119314 : done:
     516      119832 :     if (literal_start != s) {
     517      101446 :         if (raw) {
     518         356 :             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
     519             :                                                     s - literal_start,
     520             :                                                     NULL, NULL);
     521             :         }
     522             :         else {
     523      101090 :             *literal = decode_unicode_with_escapes(p, literal_start,
     524      101090 :                                                    s - literal_start, t);
     525             :         }
     526      101446 :         if (!*literal) {
     527           5 :             return -1;
     528             :         }
     529             :     }
     530      119827 :     return result;
     531             : }
     532             : 
     533             : /* Forward declaration because parsing is recursive. */
     534             : static expr_ty
     535             : fstring_parse(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
     536             :               Token *first_token, Token* t, Token *last_token);
     537             : 
     538             : /* Parse the f-string at *str, ending at end.  We know *str starts an
     539             :    expression (so it must be a '{'). Returns the FormattedValue node, which
     540             :    includes the expression, conversion character, format_spec expression, and
     541             :    optionally the text of the expression (if = is used).
     542             : 
     543             :    Note that I don't do a perfect job here: I don't make sure that a
     544             :    closing brace doesn't match an opening paren, for example. It
     545             :    doesn't need to error on all invalid expressions, just correctly
     546             :    find the end of all valid ones. Any errors inside the expression
     547             :    will be caught when we parse it later.
     548             : 
     549             :    *expression is set to the expression.  For an '=' "debug" expression,
     550             :    *expr_text is set to the debug text (the original text of the expression,
     551             :    including the '=' and any whitespace around it, as a string object).  If
     552             :    not a debug expression, *expr_text set to NULL. */
     553             : static int
     554       96174 : fstring_find_expr(Parser *p, const char **str, const char *end, int raw, int recurse_lvl,
     555             :                   PyObject **expr_text, expr_ty *expression, Token *first_token,
     556             :                   Token *t, Token *last_token)
     557             : {
     558             :     /* Return -1 on error, else 0. */
     559             : 
     560             :     const char *expr_start;
     561             :     const char *expr_end;
     562             :     expr_ty simple_expression;
     563       96174 :     expr_ty format_spec = NULL; /* Optional format specifier. */
     564       96174 :     int conversion = -1; /* The conversion char.  Use default if not
     565             :                             specified, or !r if using = and no format
     566             :                             spec. */
     567             : 
     568             :     /* 0 if we're not in a string, else the quote char we're trying to
     569             :        match (single or double quote). */
     570       96174 :     char quote_char = 0;
     571             : 
     572             :     /* If we're inside a string, 1=normal, 3=triple-quoted. */
     573       96174 :     int string_type = 0;
     574             : 
     575             :     /* Keep track of nesting level for braces/parens/brackets in
     576             :        expressions. */
     577       96174 :     Py_ssize_t nested_depth = 0;
     578             :     char parenstack[MAXLEVEL];
     579             : 
     580       96174 :     *expr_text = NULL;
     581             : 
     582             :     /* Can only nest one level deep. */
     583       96174 :     if (recurse_lvl >= 2) {
     584           1 :         RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
     585           1 :         goto error;
     586             :     }
     587             : 
     588             :     /* The first char must be a left brace, or we wouldn't have gotten
     589             :        here. Skip over it. */
     590       96173 :     assert(**str == '{');
     591       96173 :     *str += 1;
     592             : 
     593       96173 :     expr_start = *str;
     594      429328 :     for (; *str < end; (*str)++) {
     595             :         char ch;
     596             : 
     597             :         /* Loop invariants. */
     598      429313 :         assert(nested_depth >= 0);
     599      429313 :         assert(*str >= expr_start && *str < end);
     600      429313 :         if (quote_char) {
     601        3288 :             assert(string_type == 1 || string_type == 3);
     602             :         } else {
     603      426025 :             assert(string_type == 0);
     604             :         }
     605             : 
     606      429313 :         ch = **str;
     607             :         /* Nowhere inside an expression is a backslash allowed. */
     608      429313 :         if (ch == '\\') {
     609             :             /* Error: can't include a backslash character, inside
     610             :                parens or strings or not. */
     611           8 :             RAISE_SYNTAX_ERROR(
     612             :                       "f-string expression part "
     613             :                       "cannot include a backslash");
     614           8 :             goto error;
     615             :         }
     616      429305 :         if (quote_char) {
     617             :             /* We're inside a string. See if we're at the end. */
     618             :             /* This code needs to implement the same non-error logic
     619             :                as tok_get from tokenizer.c, at the letter_quote
     620             :                label. To actually share that code would be a
     621             :                nightmare. But, it's unlikely to change and is small,
     622             :                so duplicate it here. Note we don't need to catch all
     623             :                of the errors, since they'll be caught when parsing the
     624             :                expression. We just need to match the non-error
     625             :                cases. Thus we can ignore \n in single-quoted strings,
     626             :                for example. Or non-terminated strings. */
     627        3287 :             if (ch == quote_char) {
     628             :                 /* Does this match the string_type (single or triple
     629             :                    quoted)? */
     630         921 :                 if (string_type == 3) {
     631          62 :                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
     632             :                         /* We're at the end of a triple quoted string. */
     633          38 :                         *str += 2;
     634          38 :                         string_type = 0;
     635          38 :                         quote_char = 0;
     636          38 :                         continue;
     637             :                     }
     638             :                 } else {
     639             :                     /* We're at the end of a normal string. */
     640         859 :                     quote_char = 0;
     641         859 :                     string_type = 0;
     642         859 :                     continue;
     643             :                 }
     644             :             }
     645      426018 :         } else if (ch == '\'' || ch == '"') {
     646             :             /* Is this a triple quoted string? */
     647         902 :             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
     648          38 :                 string_type = 3;
     649          38 :                 *str += 2;
     650             :             } else {
     651             :                 /* Start of a normal string. */
     652         864 :                 string_type = 1;
     653             :             }
     654             :             /* Start looking for the end of the string. */
     655         902 :             quote_char = ch;
     656      425116 :         } else if (ch == '[' || ch == '{' || ch == '(') {
     657        3682 :             if (nested_depth >= MAXLEVEL) {
     658           1 :                 RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
     659           1 :                 goto error;
     660             :             }
     661        3681 :             parenstack[nested_depth] = ch;
     662        3681 :             nested_depth++;
     663      421434 :         } else if (ch == '#') {
     664             :             /* Error: can't include a comment character, inside parens
     665             :                or not. */
     666           3 :             RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
     667           3 :             goto error;
     668      421431 :         } else if (nested_depth == 0 &&
     669      391385 :                    (ch == '!' || ch == ':' || ch == '}' ||
     670      302144 :                     ch == '=' || ch == '>' || ch == '<')) {
     671             :             /* See if there's a next character. */
     672       96252 :             if (*str+1 < end) {
     673       89444 :                 char next = *(*str+1);
     674             : 
     675             :                 /* For "!=". since '=' is not an allowed conversion character,
     676             :                    nothing is lost in this test. */
     677       89444 :                 if ((ch == '!' && next == '=') ||   /* != */
     678       89426 :                     (ch == '=' && next == '=') ||   /* == */
     679       89419 :                     (ch == '<' && next == '=') ||   /* <= */
     680          83 :                     (ch == '>' && next == '=')      /* >= */
     681             :                     ) {
     682          31 :                     *str += 1;
     683          31 :                     continue;
     684             :                 }
     685             :             }
     686             :             /* Don't get out of the loop for these, if they're single
     687             :                chars (not part of 2-char tokens). If by themselves, they
     688             :                don't end an expression (unlike say '!'). */
     689       96221 :             if (ch == '>' || ch == '<') {
     690          82 :                 continue;
     691             :             }
     692             : 
     693             :             /* Normal way out of this loop. */
     694       96139 :             break;
     695      325179 :         } else if (ch == ']' || ch == '}' || ch == ')') {
     696        3479 :             if (!nested_depth) {
     697           2 :                 RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
     698           2 :                 goto error;
     699             :             }
     700        3477 :             nested_depth--;
     701        3477 :             int opening = (unsigned char)parenstack[nested_depth];
     702        3479 :             if (!((opening == '(' && ch == ')') ||
     703         924 :                   (opening == '[' && ch == ']') ||
     704          23 :                   (opening == '{' && ch == '}')))
     705             :             {
     706           5 :                 RAISE_SYNTAX_ERROR(
     707             :                           "f-string: closing parenthesis '%c' "
     708             :                           "does not match opening parenthesis '%c'",
     709             :                           ch, opening);
     710           5 :                 goto error;
     711             :             }
     712             :         } else {
     713             :             /* Just consume this char and loop around. */
     714             :         }
     715             :     }
     716       96154 :     expr_end = *str;
     717             :     /* If we leave the above loop in a string or with mismatched parens, we
     718             :        don't really care. We'll get a syntax error when compiling the
     719             :        expression. But, we can produce a better error message, so let's just
     720             :        do that.*/
     721       96154 :     if (quote_char) {
     722           4 :         RAISE_SYNTAX_ERROR("f-string: unterminated string");
     723           4 :         goto error;
     724             :     }
     725       96150 :     if (nested_depth) {
     726           0 :         int opening = (unsigned char)parenstack[nested_depth - 1];
     727           0 :         RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
     728           0 :         goto error;
     729             :     }
     730             : 
     731       96150 :     if (*str >= end) {
     732          11 :         goto unexpected_end_of_string;
     733             :     }
     734             : 
     735             :     /* Compile the expression as soon as possible, so we show errors
     736             :        related to the expression before errors related to the
     737             :        conversion or format_spec. */
     738       96139 :     simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
     739       96139 :     if (!simple_expression) {
     740          45 :         goto error;
     741             :     }
     742             : 
     743             :     /* Check for =, which puts the text value of the expression in
     744             :        expr_text. */
     745       96094 :     if (**str == '=') {
     746         169 :         if (p->feature_version < 8) {
     747           1 :             RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
     748             :                                "only supported in Python 3.8 and greater");
     749           1 :             goto error;
     750             :         }
     751         168 :         *str += 1;
     752             : 
     753             :         /* Skip over ASCII whitespace.  No need to test for end of string
     754             :            here, since we know there's at least a trailing quote somewhere
     755             :            ahead. */
     756         178 :         while (Py_ISSPACE(**str)) {
     757          10 :             *str += 1;
     758             :         }
     759         168 :         if (*str >= end) {
     760           1 :             goto unexpected_end_of_string;
     761             :         }
     762             :         /* Set *expr_text to the text of the expression. */
     763         167 :         *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
     764         167 :         if (!*expr_text) {
     765           0 :             goto error;
     766             :         }
     767             :     }
     768             : 
     769             :     /* Check for a conversion char, if present. */
     770       96092 :     if (**str == '!') {
     771        6945 :         *str += 1;
     772        6945 :         const char *conv_start = *str;
     773             :         while (1) {
     774       13900 :             if (*str >= end) {
     775           5 :                 goto unexpected_end_of_string;
     776             :             }
     777       13895 :             if (**str == '}' || **str == ':') {
     778             :                 break;
     779             :             }
     780        6955 :             *str += 1;
     781             :         }
     782        6940 :         if (*str == conv_start) {
     783           3 :             RAISE_SYNTAX_ERROR(
     784             :                       "f-string: missed conversion character");
     785           3 :             goto error;
     786             :         }
     787             : 
     788        6937 :         conversion = (unsigned char)*conv_start;
     789             :         /* Validate the conversion. */
     790        6937 :         if ((*str != conv_start + 1) ||
     791        6840 :             !(conversion == 's' || conversion == 'r' || conversion == 'a'))
     792             :         {
     793          16 :             PyObject *conv_obj = PyUnicode_FromStringAndSize(conv_start,
     794          16 :                                                              *str-conv_start);
     795          16 :             if (conv_obj) {
     796          16 :                 RAISE_SYNTAX_ERROR(
     797             :                         "f-string: invalid conversion character %R: "
     798             :                         "expected 's', 'r', or 'a'",
     799             :                         conv_obj);
     800          16 :                 Py_DECREF(conv_obj);
     801             :             }
     802          16 :             goto error;
     803             :         }
     804             : 
     805             :     }
     806             : 
     807             :     /* Check for the format spec, if present. */
     808       96068 :     assert(*str < end);
     809       96068 :     if (**str == ':') {
     810         970 :         *str += 1;
     811         970 :         if (*str >= end) {
     812           2 :             goto unexpected_end_of_string;
     813             :         }
     814             : 
     815             :         /* Parse the format spec. */
     816         968 :         format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
     817             :                                     first_token, t, last_token);
     818         968 :         if (!format_spec) {
     819           7 :             goto error;
     820             :         }
     821             :     }
     822             : 
     823       96059 :     if (*str >= end || **str != '}') {
     824           1 :         goto unexpected_end_of_string;
     825             :     }
     826             : 
     827             :     /* We're at a right brace. Consume it. */
     828       96058 :     assert(*str < end);
     829       96058 :     assert(**str == '}');
     830       96058 :     *str += 1;
     831             : 
     832             :     /* If we're in = mode (detected by non-NULL expr_text), and have no format
     833             :        spec and no explicit conversion, set the conversion to 'r'. */
     834       96058 :     if (*expr_text && format_spec == NULL && conversion == -1) {
     835          62 :         conversion = 'r';
     836             :     }
     837             : 
     838             :     /* And now create the FormattedValue node that represents this
     839             :        entire expression with the conversion and format spec. */
     840             :     //TODO: Fix this
     841       96058 :     *expression = _PyAST_FormattedValue(simple_expression, conversion,
     842             :                                         format_spec, first_token->lineno,
     843             :                                         first_token->col_offset,
     844             :                                         last_token->end_lineno,
     845             :                                         last_token->end_col_offset, p->arena);
     846       96058 :     if (!*expression) {
     847           0 :         goto error;
     848             :     }
     849             : 
     850       96058 :     return 0;
     851             : 
     852          20 : unexpected_end_of_string:
     853          20 :     RAISE_SYNTAX_ERROR("f-string: expecting '}'");
     854             :     /* Falls through to error. */
     855             : 
     856         116 : error:
     857         116 :     Py_XDECREF(*expr_text);
     858         116 :     return -1;
     859             : 
     860             : }
     861             : 
     862             : /* Return -1 on error.
     863             : 
     864             :    Return 0 if we have a literal (possible zero length) and an
     865             :    expression (zero length if at the end of the string.
     866             : 
     867             :    Return 1 if we have a literal, but no expression, and we want the
     868             :    caller to call us again. This is used to deal with doubled
     869             :    braces.
     870             : 
     871             :    When called multiple times on the string 'a{{b{0}c', this function
     872             :    will return:
     873             : 
     874             :    1. the literal 'a{' with no expression, and a return value
     875             :       of 1. Despite the fact that there's no expression, the return
     876             :       value of 1 means we're not finished yet.
     877             : 
     878             :    2. the literal 'b' and the expression '0', with a return value of
     879             :       0. The fact that there's an expression means we're not finished.
     880             : 
     881             :    3. literal 'c' with no expression and a return value of 0. The
     882             :       combination of the return value of 0 with no expression means
     883             :       we're finished.
     884             : */
     885             : static int
     886      119840 : fstring_find_literal_and_expr(Parser *p, const char **str, const char *end, int raw,
     887             :                               int recurse_lvl, PyObject **literal,
     888             :                               PyObject **expr_text, expr_ty *expression,
     889             :                               Token *first_token, Token *t, Token *last_token)
     890             : {
     891             :     int result;
     892             : 
     893      119840 :     assert(*literal == NULL && *expression == NULL);
     894             : 
     895             :     /* Get any literal string. */
     896      119840 :     result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
     897      119840 :     if (result < 0) {
     898          13 :         goto error;
     899             :     }
     900             : 
     901      119827 :     assert(result == 0 || result == 1);
     902             : 
     903      119827 :     if (result == 1) {
     904             :         /* We have a literal, but don't look at the expression. */
     905         518 :         return 1;
     906             :     }
     907             : 
     908      119309 :     if (*str >= end || **str == '}') {
     909             :         /* We're at the end of the string or the end of a nested
     910             :            f-string: no expression. The top-level error case where we
     911             :            expect to be at the end of the string but we're at a '}' is
     912             :            handled later. */
     913       23135 :         return 0;
     914             :     }
     915             : 
     916             :     /* We must now be the start of an expression, on a '{'. */
     917       96174 :     assert(**str == '{');
     918             : 
     919       96174 :     if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
     920             :                           expression, first_token, t, last_token) < 0) {
     921         116 :         goto error;
     922             :     }
     923             : 
     924       96058 :     return 0;
     925             : 
     926         129 : error:
     927         129 :     Py_CLEAR(*literal);
     928         129 :     return -1;
     929             : }
     930             : 
     931             : #ifdef NDEBUG
     932             : #define ExprList_check_invariants(l)
     933             : #else
     934             : static void
     935     8590440 : ExprList_check_invariants(ExprList *l)
     936             : {
     937             :     /* Check our invariants. Make sure this object is "live", and
     938             :        hasn't been deallocated. */
     939     8590440 :     assert(l->size >= 0);
     940     8590440 :     assert(l->p != NULL);
     941     8590440 :     if (l->size <= EXPRLIST_N_CACHED) {
     942     8172420 :         assert(l->data == l->p);
     943             :     }
     944     8590440 : }
     945             : #endif
     946             : 
     947             : static void
     948     1608640 : ExprList_Init(ExprList *l)
     949             : {
     950     1608640 :     l->allocated = EXPRLIST_N_CACHED;
     951     1608640 :     l->size = 0;
     952             : 
     953             :     /* Until we start allocating dynamically, p points to data. */
     954     1608640 :     l->p = l->data;
     955             : 
     956     1608640 :     ExprList_check_invariants(l);
     957     1608640 : }
     958             : 
     959             : static int
     960      196241 : ExprList_Append(ExprList *l, expr_ty exp)
     961             : {
     962      196241 :     ExprList_check_invariants(l);
     963      196241 :     if (l->size >= l->allocated) {
     964             :         /* We need to alloc (or realloc) the memory. */
     965          58 :         Py_ssize_t new_size = l->allocated * 2;
     966             : 
     967             :         /* See if we've ever allocated anything dynamically. */
     968          58 :         if (l->p == l->data) {
     969             :             Py_ssize_t i;
     970             :             /* We're still using the cached data. Switch to
     971             :                alloc-ing. */
     972          15 :             l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
     973          15 :             if (!l->p) {
     974           0 :                 return -1;
     975             :             }
     976             :             /* Copy the cached data into the new buffer. */
     977         975 :             for (i = 0; i < l->size; i++) {
     978         960 :                 l->p[i] = l->data[i];
     979             :             }
     980             :         } else {
     981             :             /* Just realloc. */
     982          43 :             expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
     983          43 :             if (!tmp) {
     984           0 :                 PyMem_Free(l->p);
     985           0 :                 l->p = NULL;
     986           0 :                 return -1;
     987             :             }
     988          43 :             l->p = tmp;
     989             :         }
     990             : 
     991          58 :         l->allocated = new_size;
     992          58 :         assert(l->allocated == 2 * l->size);
     993             :     }
     994             : 
     995      196241 :     l->p[l->size++] = exp;
     996             : 
     997      196241 :     ExprList_check_invariants(l);
     998      196241 :     return 0;
     999             : }
    1000             : 
    1001             : static void
    1002       20124 : ExprList_Dealloc(ExprList *l)
    1003             : {
    1004       20124 :     ExprList_check_invariants(l);
    1005             : 
    1006             :     /* If there's been an error, or we've never dynamically allocated,
    1007             :        do nothing. */
    1008       20124 :     if (!l->p || l->p == l->data) {
    1009             :         /* Do nothing. */
    1010             :     } else {
    1011             :         /* We have dynamically allocated. Free the memory. */
    1012          15 :         PyMem_Free(l->p);
    1013             :     }
    1014       20124 :     l->p = NULL;
    1015       20124 :     l->size = -1;
    1016       20124 : }
    1017             : 
    1018             : static asdl_expr_seq *
    1019       19350 : ExprList_Finish(ExprList *l, PyArena *arena)
    1020             : {
    1021             :     asdl_expr_seq *seq;
    1022             : 
    1023       19350 :     ExprList_check_invariants(l);
    1024             : 
    1025             :     /* Allocate the asdl_seq and copy the expressions in to it. */
    1026       19350 :     seq = _Py_asdl_expr_seq_new(l->size, arena);
    1027       19350 :     if (seq) {
    1028             :         Py_ssize_t i;
    1029      215589 :         for (i = 0; i < l->size; i++) {
    1030      196239 :             asdl_seq_SET(seq, i, l->p[i]);
    1031             :         }
    1032             :     }
    1033       19350 :     ExprList_Dealloc(l);
    1034       19350 :     return seq;
    1035             : }
    1036             : 
    1037             : #ifdef NDEBUG
    1038             : #define FstringParser_check_invariants(state)
    1039             : #else
    1040             : static void
    1041     6549840 : FstringParser_check_invariants(FstringParser *state)
    1042             : {
    1043     6549840 :     if (state->last_str) {
    1044     3161850 :         assert(PyUnicode_CheckExact(state->last_str));
    1045             :     }
    1046     6549840 :     ExprList_check_invariants(&state->expr_list);
    1047     6549840 : }
    1048             : #endif
    1049             : 
    1050             : void
    1051     1608640 : _PyPegen_FstringParser_Init(FstringParser *state)
    1052             : {
    1053     1608640 :     state->last_str = NULL;
    1054     1608640 :     state->fmode = 0;
    1055     1608640 :     ExprList_Init(&state->expr_list);
    1056     1608640 :     FstringParser_check_invariants(state);
    1057     1608640 : }
    1058             : 
    1059             : void
    1060         774 : _PyPegen_FstringParser_Dealloc(FstringParser *state)
    1061             : {
    1062         774 :     FstringParser_check_invariants(state);
    1063             : 
    1064         774 :     Py_XDECREF(state->last_str);
    1065         774 :     ExprList_Dealloc(&state->expr_list);
    1066         774 : }
    1067             : 
    1068             : /* Make a Constant node, but decref the PyUnicode object being added. */
    1069             : static expr_ty
    1070     1639960 : make_str_node_and_del(Parser *p, PyObject **str, Token* first_token, Token *last_token)
    1071             : {
    1072     1639960 :     PyObject *s = *str;
    1073     1639960 :     PyObject *kind = NULL;
    1074     1639960 :     *str = NULL;
    1075     1639960 :     assert(PyUnicode_CheckExact(s));
    1076     1639960 :     if (_PyArena_AddPyObject(p->arena, s) < 0) {
    1077           0 :         Py_DECREF(s);
    1078           0 :         return NULL;
    1079             :     }
    1080     1639960 :     const char* the_str = PyBytes_AsString(first_token->bytes);
    1081     1639960 :     if (the_str && the_str[0] == 'u') {
    1082         844 :         kind = _PyPegen_new_identifier(p, "u");
    1083             :     }
    1084             : 
    1085     1639960 :     if (kind == NULL && PyErr_Occurred()) {
    1086           0 :         return NULL;
    1087             :     }
    1088             : 
    1089     1639960 :     return _PyAST_Constant(s, kind, first_token->lineno, first_token->col_offset,
    1090             :                            last_token->end_lineno, last_token->end_col_offset,
    1091             :                            p->arena);
    1092             : 
    1093             : }
    1094             : 
    1095             : 
    1096             : /* Add a non-f-string (that is, a regular literal string). str is
    1097             :    decref'd. */
    1098             : int
    1099     1717790 : _PyPegen_FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
    1100             : {
    1101     1717790 :     FstringParser_check_invariants(state);
    1102             : 
    1103     1717790 :     assert(PyUnicode_CheckExact(str));
    1104             : 
    1105     1717790 :     if (PyUnicode_GET_LENGTH(str) == 0) {
    1106      100678 :         Py_DECREF(str);
    1107      100678 :         return 0;
    1108             :     }
    1109             : 
    1110     1617110 :     if (!state->last_str) {
    1111             :         /* We didn't have a string before, so just remember this one. */
    1112     1539340 :         state->last_str = str;
    1113             :     } else {
    1114             :         /* Concatenate this with the previous string. */
    1115       77766 :         PyUnicode_AppendAndDel(&state->last_str, str);
    1116       77766 :         if (!state->last_str) {
    1117           0 :             return -1;
    1118             :         }
    1119             :     }
    1120     1617110 :     FstringParser_check_invariants(state);
    1121     1617110 :     return 0;
    1122             : }
    1123             : 
    1124             : /* Parse an f-string. The f-string is in *str to end, with no
    1125             :    'f' or quotes. */
    1126             : int
    1127       23264 : _PyPegen_FstringParser_ConcatFstring(Parser *p, FstringParser *state, const char **str,
    1128             :                             const char *end, int raw, int recurse_lvl,
    1129             :                             Token *first_token, Token* t, Token *last_token)
    1130             : {
    1131       23264 :     FstringParser_check_invariants(state);
    1132       23264 :     state->fmode = 1;
    1133             : 
    1134             :     /* Parse the f-string. */
    1135       96576 :     while (1) {
    1136      119840 :         PyObject *literal = NULL;
    1137      119840 :         PyObject *expr_text = NULL;
    1138      119840 :         expr_ty expression = NULL;
    1139             : 
    1140             :         /* If there's a zero length literal in front of the
    1141             :            expression, literal will be NULL. If we're at the end of
    1142             :            the f-string, expression will be NULL (unless result == 1,
    1143             :            see below). */
    1144      119840 :         int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
    1145             :                                                    &literal, &expr_text,
    1146             :                                                    &expression, first_token, t, last_token);
    1147      119840 :         if (result < 0) {
    1148         129 :             return -1;
    1149             :         }
    1150             : 
    1151             :         /* Add the literal, if any. */
    1152      119711 :         if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
    1153           0 :             Py_XDECREF(expr_text);
    1154           0 :             return -1;
    1155             :         }
    1156             :         /* Add the expr_text, if any. */
    1157      119711 :         if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
    1158           0 :             return -1;
    1159             :         }
    1160             : 
    1161             :         /* We've dealt with the literal and expr_text, their ownership has
    1162             :            been transferred to the state object.  Don't look at them again. */
    1163             : 
    1164             :         /* See if we should just loop around to get the next literal
    1165             :            and expression, while ignoring the expression this
    1166             :            time. This is used for un-doubling braces, as an
    1167             :            optimization. */
    1168      119711 :         if (result == 1) {
    1169         518 :             continue;
    1170             :         }
    1171             : 
    1172      119193 :         if (!expression) {
    1173             :             /* We're done with this f-string. */
    1174       23135 :             break;
    1175             :         }
    1176             : 
    1177             :         /* We know we have an expression. Convert any existing string
    1178             :            to a Constant node. */
    1179       96058 :         if (state->last_str) {
    1180             :             /* Convert the existing last_str literal to a Constant node. */
    1181       89599 :             expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
    1182       89599 :             if (!last_str || ExprList_Append(&state->expr_list, last_str) < 0) {
    1183           0 :                 return -1;
    1184             :             }
    1185             :         }
    1186             : 
    1187       96058 :         if (ExprList_Append(&state->expr_list, expression) < 0) {
    1188           0 :             return -1;
    1189             :         }
    1190             :     }
    1191             : 
    1192             :     /* If recurse_lvl is zero, then we must be at the end of the
    1193             :        string. Otherwise, we must be at a right brace. */
    1194             : 
    1195       23135 :     if (recurse_lvl == 0 && *str < end-1) {
    1196           0 :         RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
    1197           0 :         return -1;
    1198             :     }
    1199       23135 :     if (recurse_lvl != 0 && **str != '}') {
    1200           2 :         RAISE_SYNTAX_ERROR("f-string: expecting '}'");
    1201           2 :         return -1;
    1202             :     }
    1203             : 
    1204       23133 :     FstringParser_check_invariants(state);
    1205       23133 :     return 0;
    1206             : }
    1207             : 
    1208             : /* Convert the partial state reflected in last_str and expr_list to an
    1209             :    expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
    1210             : expr_ty
    1211     1559130 : _PyPegen_FstringParser_Finish(Parser *p, FstringParser *state, Token* first_token,
    1212             :                      Token *last_token)
    1213             : {
    1214             :     asdl_expr_seq *seq;
    1215             : 
    1216     1559130 :     FstringParser_check_invariants(state);
    1217             : 
    1218             :     /* If we're just a constant string with no expressions, return
    1219             :        that. */
    1220     1559130 :     if (!state->fmode) {
    1221     1539780 :         assert(!state->expr_list.size);
    1222     1539780 :         if (!state->last_str) {
    1223             :             /* Create a zero length string. */
    1224      100622 :             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
    1225      100622 :             if (!state->last_str) {
    1226           0 :                 goto error;
    1227             :             }
    1228             :         }
    1229     1539780 :         return make_str_node_and_del(p, &state->last_str, first_token, last_token);
    1230             :     }
    1231             : 
    1232             :     /* Create a Constant node out of last_str, if needed. It will be the
    1233             :        last node in our expression list. */
    1234       19350 :     if (state->last_str) {
    1235       10584 :         expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
    1236       10584 :         if (!str || ExprList_Append(&state->expr_list, str) < 0) {
    1237           0 :             goto error;
    1238             :         }
    1239             :     }
    1240             :     /* This has already been freed. */
    1241       19350 :     assert(state->last_str == NULL);
    1242             : 
    1243       19350 :     seq = ExprList_Finish(&state->expr_list, p->arena);
    1244       19350 :     if (!seq) {
    1245           0 :         goto error;
    1246             :     }
    1247             : 
    1248       19350 :     return _PyAST_JoinedStr(seq, first_token->lineno, first_token->col_offset,
    1249             :                             last_token->end_lineno, last_token->end_col_offset,
    1250             :                             p->arena);
    1251             : 
    1252           0 : error:
    1253           0 :     _PyPegen_FstringParser_Dealloc(state);
    1254           0 :     return NULL;
    1255             : }
    1256             : 
    1257             : /* Given an f-string (with no 'f' or quotes) that's in *str and ends
    1258             :    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
    1259             :    str to point past the parsed portion. */
    1260             : static expr_ty
    1261         968 : fstring_parse(Parser *p, const char **str, const char *end, int raw,
    1262             :               int recurse_lvl, Token *first_token, Token* t, Token *last_token)
    1263             : {
    1264             :     FstringParser state;
    1265             : 
    1266         968 :     _PyPegen_FstringParser_Init(&state);
    1267         968 :     if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
    1268             :                                     first_token, t, last_token) < 0) {
    1269           7 :         _PyPegen_FstringParser_Dealloc(&state);
    1270           7 :         return NULL;
    1271             :     }
    1272             : 
    1273         961 :     return _PyPegen_FstringParser_Finish(p, &state, t, t);
    1274             : }

Generated by: LCOV version 1.14