/home/mdboom/Work/builds/cpython/Parser/pegen.c
Line | Count | Source (jump to first uncovered line) |
1 | #include <Python.h> |
2 | #include "pycore_ast.h" // _PyAST_Validate(), |
3 | #include <errcode.h> |
4 | |
5 | #include "tokenizer.h" |
6 | #include "pegen.h" |
7 | |
8 | // Internal parser functions |
9 | |
10 | asdl_stmt_seq* |
11 | _PyPegen_interactive_exit(Parser *p) |
12 | { |
13 | if (p->errcode) { Branch (13:9): [True: 0, False: 0]
|
14 | *(p->errcode) = E_EOF; |
15 | } |
16 | return NULL; |
17 | } |
18 | |
19 | Py_ssize_t |
20 | _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) |
21 | { |
22 | const char *str = PyUnicode_AsUTF8(line); |
23 | if (!str) { Branch (23:9): [True: 0, False: 3.04k]
|
24 | return -1; |
25 | } |
26 | Py_ssize_t len = strlen(str); |
27 | if (col_offset > len + 1) { Branch (27:9): [True: 6, False: 3.04k]
|
28 | col_offset = len + 1; |
29 | } |
30 | assert(col_offset >= 0); |
31 | PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); |
32 | if (!text) { Branch (32:9): [True: 0, False: 3.04k]
|
33 | return -1; |
34 | } |
35 | Py_ssize_t size = PyUnicode_GET_LENGTH(text); |
36 | Py_DECREF(text); |
37 | return size; |
38 | } |
39 | |
40 | // Here, mark is the start of the node, while p->mark is the end. |
41 | // If node==NULL, they should be the same. |
42 | int |
43 | _PyPegen_insert_memo(Parser *p, int mark, int type, void *node) |
44 | { |
45 | // Insert in front |
46 | Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo)); |
47 | if (m == NULL) { Branch (47:9): [True: 0, False: 35.2M]
|
48 | return -1; |
49 | } |
50 | m->type = type; |
51 | m->node = node; |
52 | m->mark = p->mark; |
53 | m->next = p->tokens[mark]->memo; |
54 | p->tokens[mark]->memo = m; |
55 | return 0; |
56 | } |
57 | |
58 | // Like _PyPegen_insert_memo(), but updates an existing node if found. |
59 | int |
60 | _PyPegen_update_memo(Parser *p, int mark, int type, void *node) |
61 | { |
62 | for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next120M ) { Branch (62:43): [True: 135M, False: 16.9M]
|
63 | if (m->type == type) { Branch (63:13): [True: 15.3M, False: 120M]
|
64 | // Update existing node. |
65 | m->node = node; |
66 | m->mark = p->mark; |
67 | return 0; |
68 | } |
69 | } |
70 | // Insert new node. |
71 | return _PyPegen_insert_memo(p, mark, type, node); |
72 | } |
73 | |
74 | static int |
75 | init_normalization(Parser *p) |
76 | { |
77 | if (p->normalize) { Branch (77:9): [True: 63, False: 34]
|
78 | return 1; |
79 | } |
80 | p->normalize = _PyImport_GetModuleAttrString("unicodedata", "normalize"); |
81 | if (!p->normalize) Branch (81:9): [True: 0, False: 34]
|
82 | { |
83 | return 0; |
84 | } |
85 | return 1; |
86 | } |
87 | |
88 | static int |
89 | growable_comment_array_init(growable_comment_array *arr, size_t initial_size) { |
90 | assert(initial_size > 0); |
91 | arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items)); |
92 | arr->size = initial_size; |
93 | arr->num_items = 0; |
94 | |
95 | return arr->items != NULL; |
96 | } |
97 | |
98 | static int |
99 | growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) { |
100 | if (arr->num_items >= arr->size) { Branch (100:9): [True: 0, False: 74]
|
101 | size_t new_size = arr->size * 2; |
102 | void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items)); |
103 | if (!new_items_array) { Branch (103:13): [True: 0, False: 0]
|
104 | return 0; |
105 | } |
106 | arr->items = new_items_array; |
107 | arr->size = new_size; |
108 | } |
109 | |
110 | arr->items[arr->num_items].lineno = lineno; |
111 | arr->items[arr->num_items].comment = comment; // Take ownership |
112 | arr->num_items++; |
113 | return 1; |
114 | } |
115 | |
116 | static void |
117 | growable_comment_array_deallocate(growable_comment_array *arr) { |
118 | for (unsigned i = 0; i < arr->num_items; i++74 ) { Branch (118:26): [True: 74, False: 132k]
|
119 | PyMem_Free(arr->items[i].comment); |
120 | } |
121 | PyMem_Free(arr->items); |
122 | } |
123 | |
124 | static int |
125 | _get_keyword_or_name_type(Parser *p, const char *name, int name_len) |
126 | { |
127 | assert(name_len > 0); |
128 | if (name_len >= p->n_keyword_lists || Branch (128:9): [True: 151k, False: 2.06M]
|
129 | p->keywords[name_len] == NULL2.06M || Branch (129:9): [True: 0, False: 2.06M]
|
130 | p->keywords[name_len]->type == -12.06M ) { Branch (130:9): [True: 1.24M, False: 811k]
|
131 | return NAME; |
132 | } |
133 | for (KeywordToken *k = p->keywords[name_len]; 811k k != NULL && k->type != -1; k++2.63M ) { Branch (133:51): [True: 3.44M, False: 0]
Branch (133:64): [True: 3.02M, False: 420k]
|
134 | if (strncmp(k->str, name, name_len) == 0) { Branch (134:13): [True: 390k, False: 2.63M]
|
135 | return k->type; |
136 | } |
137 | } |
138 | return NAME; |
139 | } |
140 | |
141 | static int |
142 | initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) { |
143 | assert(token != NULL); |
144 | |
145 | token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start))2.21M : token_type5.20M ; Branch (145:19): [True: 2.21M, False: 5.20M]
|
146 | token->bytes = PyBytes_FromStringAndSize(start, end - start); |
147 | if (token->bytes == NULL) { Branch (147:9): [True: 0, False: 7.41M]
|
148 | return -1; |
149 | } |
150 | |
151 | if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) { Branch (151:9): [True: 0, False: 7.41M]
|
152 | Py_DECREF(token->bytes); |
153 | return -1; |
154 | } |
155 | |
156 | token->level = p->tok->level; |
157 | |
158 | const char *line_start = token_type == STRING ? p->tok->multi_line_start205k : p->tok->line_start7.21M ; Branch (158:30): [True: 205k, False: 7.21M]
|
159 | int lineno = token_type == STRING ? p->tok->first_lineno205k : p->tok->lineno7.21M ; Branch (159:18): [True: 205k, False: 7.21M]
|
160 | int end_lineno = p->tok->lineno; |
161 | |
162 | int col_offset = (start != NULL && start >= line_start7.19M ) ? (int)(start - line_start)7.19M : -1217k ; Branch (162:23): [True: 7.19M, False: 217k]
Branch (162:40): [True: 7.19M, False: 0]
|
163 | int end_col_offset = (end != NULL && end >= p->tok->line_start7.19M ) ? (int)(end - p->tok->line_start)7.19M : -1217k ; Branch (163:27): [True: 7.19M, False: 217k]
Branch (163:42): [True: 7.19M, False: 0]
|
164 | |
165 | token->lineno = lineno; |
166 | token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset296k : col_offset7.11M ; Branch (166:25): [True: 296k, False: 7.11M]
|
167 | token->end_lineno = end_lineno; |
168 | token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset296k : end_col_offset7.11M ; Branch (168:29): [True: 296k, False: 7.11M]
|
169 | |
170 | p->fill += 1; |
171 | |
172 | if (token_type == ERRORTOKEN && p->tok->done == 380 E_DECODE380 ) { Branch (172:9): [True: 380, False: 7.41M]
Branch (172:37): [True: 4, False: 376]
|
173 | return _Pypegen_raise_decode_error(p); |
174 | } |
175 | |
176 | return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p)376 : 07.41M ); Branch (176:13): [True: 376, False: 7.41M]
|
177 | } |
178 | |
179 | static int |
180 | _resize_tokens_array(Parser *p) { |
181 | int newsize = p->size * 2; |
182 | Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *)); |
183 | if (new_tokens == NULL) { Branch (183:9): [True: 0, False: 335k]
|
184 | PyErr_NoMemory(); |
185 | return -1; |
186 | } |
187 | p->tokens = new_tokens; |
188 | |
189 | for (int i = p->size; i < newsize; i++10.6M ) { Branch (189:27): [True: 10.6M, False: 335k]
|
190 | p->tokens[i] = PyMem_Calloc(1, sizeof(Token)); |
191 | if (p->tokens[i] == NULL) { Branch (191:13): [True: 0, False: 10.6M]
|
192 | p->size = i; // Needed, in order to cleanup correctly after parser fails |
193 | PyErr_NoMemory(); |
194 | return -1; |
195 | } |
196 | } |
197 | p->size = newsize; |
198 | return 0; |
199 | } |
200 | |
201 | int |
202 | _PyPegen_fill_token(Parser *p) |
203 | { |
204 | const char *start; |
205 | const char *end; |
206 | int type = _PyTokenizer_Get(p->tok, &start, &end); |
207 | |
208 | // Record and skip '# type: ignore' comments |
209 | while (type == TYPE_IGNORE) { Branch (209:12): [True: 74, False: 7.41M]
|
210 | Py_ssize_t len = end - start; |
211 | char *tag = PyMem_Malloc(len + 1); |
212 | if (tag == NULL) { Branch (212:13): [True: 0, False: 74]
|
213 | PyErr_NoMemory(); |
214 | return -1; |
215 | } |
216 | strncpy(tag, start, len); |
217 | tag[len] = '\0'; |
218 | // Ownership of tag passes to the growable array |
219 | if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) { Branch (219:13): [True: 0, False: 74]
|
220 | PyErr_NoMemory(); |
221 | return -1; |
222 | } |
223 | type = _PyTokenizer_Get(p->tok, &start, &end); |
224 | } |
225 | |
226 | // If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing |
227 | if (p->start_rule == Py_single_input && type == 2.84M ENDMARKER2.84M && p->parsing_started836 ) { Branch (227:9): [True: 2.84M, False: 4.57M]
Branch (227:45): [True: 836, False: 2.84M]
Branch (227:66): [True: 809, False: 27]
|
228 | type = NEWLINE; /* Add an extra newline */ |
229 | p->parsing_started = 0; |
230 | |
231 | if (p->tok->indent && !(p->flags & 15 PyPARSE_DONT_IMPLY_DEDENT15 )) { Branch (231:13): [True: 15, False: 794]
Branch (231:31): [True: 9, False: 6]
|
232 | p->tok->pendin = -p->tok->indent; |
233 | p->tok->indent = 0; |
234 | } |
235 | } |
236 | else { |
237 | p->parsing_started = 1; |
238 | } |
239 | |
240 | // Check if we are at the limit of the token array capacity and resize if needed |
241 | if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)335k ) { Branch (241:9): [True: 335k, False: 7.07M]
Branch (241:33): [True: 0, False: 335k]
|
242 | return -1; |
243 | } |
244 | |
245 | Token *t = p->tokens[p->fill]; |
246 | return initialize_token(p, t, start, end, type); |
247 | } |
248 | |
249 | #if defined(Py_DEBUG) |
250 | // Instrumentation to count the effectiveness of memoization. |
251 | // The array counts the number of tokens skipped by memoization, |
252 | // indexed by type. |
253 | |
254 | #define NSTATISTICS 2000 |
255 | static long memo_statistics[NSTATISTICS]; |
256 | |
257 | void |
258 | _PyPegen_clear_memo_statistics() |
259 | { |
260 | for (int i = 0; i < NSTATISTICS; i++) { |
261 | memo_statistics[i] = 0; |
262 | } |
263 | } |
264 | |
265 | PyObject * |
266 | _PyPegen_get_memo_statistics() |
267 | { |
268 | PyObject *ret = PyList_New(NSTATISTICS); |
269 | if (ret == NULL) { |
270 | return NULL; |
271 | } |
272 | for (int i = 0; i < NSTATISTICS; i++) { |
273 | PyObject *value = PyLong_FromLong(memo_statistics[i]); |
274 | if (value == NULL) { |
275 | Py_DECREF(ret); |
276 | return NULL; |
277 | } |
278 | // PyList_SetItem borrows a reference to value. |
279 | if (PyList_SetItem(ret, i, value) < 0) { |
280 | Py_DECREF(ret); |
281 | return NULL; |
282 | } |
283 | } |
284 | return ret; |
285 | } |
286 | #endif |
287 | |
288 | int // bool |
289 | _PyPegen_is_memoized(Parser *p, int type, void *pres) |
290 | { |
291 | if (p->mark == p->fill) { Branch (291:9): [True: 822k, False: 140M]
|
292 | if (_PyPegen_fill_token(p) < 0) { Branch (292:13): [True: 40, False: 822k]
|
293 | p->error_indicator = 1; |
294 | return -1; |
295 | } |
296 | } |
297 | |
298 | Token *t = p->tokens[p->mark]; |
299 | |
300 | for (Memo *m = t->memo; m != NULL; m = m->next273M ) { Branch (300:29): [True: 379M, False: 34.4M]
|
301 | if (m->type == type) { Branch (301:13): [True: 106M, False: 273M]
|
302 | #if defined(PY_DEBUG) |
303 | if (0 <= type && type < NSTATISTICS) { |
304 | long count = m->mark - p->mark; |
305 | // A memoized negative result counts for one. |
306 | if (count <= 0) { |
307 | count = 1; |
308 | } |
309 | memo_statistics[type] += count; |
310 | } |
311 | #endif |
312 | p->mark = m->mark; |
313 | *(void **)(pres) = m->node; |
314 | return 1; |
315 | } |
316 | } |
317 | return 0; |
318 | } |
319 | |
320 | int |
321 | _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) |
322 | { |
323 | int mark = p->mark; |
324 | void *res = func(p); |
325 | p->mark = mark; |
326 | return (res != NULL) == positive; |
327 | } |
328 | |
329 | int |
330 | _PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) |
331 | { |
332 | int mark = p->mark; |
333 | void *res = func(p, arg); |
334 | p->mark = mark; |
335 | return (res != NULL) == positive; |
336 | } |
337 | |
338 | int |
339 | _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) |
340 | { |
341 | int mark = p->mark; |
342 | void *res = func(p, arg); |
343 | p->mark = mark; |
344 | return (res != NULL) == positive; |
345 | } |
346 | |
347 | int |
348 | _PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) |
349 | { |
350 | int mark = p->mark; |
351 | void *res = (void*)func(p); |
352 | p->mark = mark; |
353 | return (res != NULL) == positive; |
354 | } |
355 | |
356 | Token * |
357 | _PyPegen_expect_token(Parser *p, int type) |
358 | { |
359 | if (p->mark == p->fill) { Branch (359:9): [True: 4.60M, False: 133M]
|
360 | if (_PyPegen_fill_token(p) < 0) { Branch (360:13): [True: 148, False: 4.60M]
|
361 | p->error_indicator = 1; |
362 | return NULL; |
363 | } |
364 | } |
365 | Token *t = p->tokens[p->mark]; |
366 | if (t->type != type) { Branch (366:9): [True: 119M, False: 18.8M]
|
367 | return NULL; |
368 | } |
369 | p->mark += 1; |
370 | return t; |
371 | } |
372 | |
373 | void* |
374 | _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) { |
375 |
|
376 | if (p->error_indicator == 1) { Branch (376:9): [True: 0, False: 0]
|
377 | return NULL; |
378 | } |
379 | if (result == NULL) { Branch (379:9): [True: 0, False: 0]
|
380 | RAISE_SYNTAX_ERROR("expected (%s)", expected); |
381 | return NULL; |
382 | } |
383 | return result; |
384 | } |
385 | |
386 | Token * |
387 | _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { |
388 | |
389 | if (p->error_indicator == 1) { Branch (389:9): [True: 0, False: 79.3k]
|
390 | return NULL; |
391 | } |
392 | |
393 | if (p->mark == p->fill) { Branch (393:9): [True: 42.7k, False: 36.5k]
|
394 | if (_PyPegen_fill_token(p) < 0) { Branch (394:13): [True: 0, False: 42.7k]
|
395 | p->error_indicator = 1; |
396 | return NULL; |
397 | } |
398 | } |
399 | Token *t = p->tokens[p->mark]; |
400 | if (t->type != type) { Branch (400:9): [True: 10, False: 79.3k]
|
401 | RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected); |
402 | return NULL; |
403 | } |
404 | p->mark += 1; |
405 | return t; |
406 | } |
407 | |
408 | expr_ty |
409 | _PyPegen_expect_soft_keyword(Parser *p, const char *keyword) |
410 | { |
411 | if (p->mark == p->fill) { Branch (411:9): [True: 2.52k, False: 279k]
|
412 | if (_PyPegen_fill_token(p) < 0) { Branch (412:13): [True: 0, False: 2.52k]
|
413 | p->error_indicator = 1; |
414 | return NULL; |
415 | } |
416 | } |
417 | Token *t = p->tokens[p->mark]; |
418 | if (t->type != NAME) { Branch (418:9): [True: 153k, False: 128k]
|
419 | return NULL; |
420 | } |
421 | const char *s = PyBytes_AsString(t->bytes); |
422 | if (!s) { Branch (422:9): [True: 0, False: 128k]
|
423 | p->error_indicator = 1; |
424 | return NULL; |
425 | } |
426 | if (strcmp(s, keyword) != 0) { Branch (426:9): [True: 125k, False: 2.91k]
|
427 | return NULL; |
428 | } |
429 | return _PyPegen_name_token(p); |
430 | } |
431 | |
432 | Token * |
433 | _PyPegen_get_last_nonnwhitespace_token(Parser *p) |
434 | { |
435 | assert(p->mark >= 0); |
436 | Token *token = NULL; |
437 | for (int m = p->mark - 1; m >= 0; m--371k ) { Branch (437:31): [True: 4.34M, False: 0]
|
438 | token = p->tokens[m]; |
439 | if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > 2.11M DEDENT2.11M )) { Branch (439:13): [True: 4.34M, False: 0]
Branch (439:42): [True: 2.22M, False: 2.11M]
Branch (439:67): [True: 1.74M, False: 371k]
|
440 | break; |
441 | } |
442 | } |
443 | return token; |
444 | } |
445 | |
446 | PyObject * |
447 | _PyPegen_new_identifier(Parser *p, const char *n) |
448 | { |
449 | PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); |
450 | if (!id) { Branch (450:9): [True: 0, False: 6.17M]
|
451 | goto error; |
452 | } |
453 | /* PyUnicode_DecodeUTF8 should always return a ready string. */ |
454 | assert(PyUnicode_IS_READY(id)); |
455 | /* Check whether there are non-ASCII characters in the |
456 | identifier; if so, normalize to NFKC. */ |
457 | if (!PyUnicode_IS_ASCII(id)) Branch (457:9): [True: 97, False: 6.17M]
|
458 | { |
459 | PyObject *id2; |
460 | if (!init_normalization(p)) Branch (460:13): [True: 0, False: 97]
|
461 | { |
462 | Py_DECREF(id); |
463 | goto error; |
464 | } |
465 | PyObject *form = PyUnicode_InternFromString("NFKC"); |
466 | if (form == NULL) Branch (466:13): [True: 0, False: 97]
|
467 | { |
468 | Py_DECREF(id); |
469 | goto error; |
470 | } |
471 | PyObject *args[2] = {form, id}; |
472 | id2 = _PyObject_FastCall(p->normalize, args, 2); |
473 | Py_DECREF(id); |
474 | Py_DECREF(form); |
475 | if (!id2) { Branch (475:13): [True: 0, False: 97]
|
476 | goto error; |
477 | } |
478 | if (!PyUnicode_Check(id2)) Branch (478:13): [True: 1, False: 96]
|
479 | { |
480 | PyErr_Format(PyExc_TypeError, |
481 | "unicodedata.normalize() must return a string, not " |
482 | "%.200s", |
483 | _PyType_Name(Py_TYPE(id2))); |
484 | Py_DECREF(id2); |
485 | goto error; |
486 | } |
487 | id = id2; |
488 | } |
489 | PyUnicode_InternInPlace(&id); |
490 | if (_PyArena_AddPyObject(p->arena, id) < 0) Branch (490:9): [True: 0, False: 6.17M]
|
491 | { |
492 | Py_DECREF(id); |
493 | goto error; |
494 | } |
495 | return id; |
496 | |
497 | error: |
498 | p->error_indicator = 1; |
499 | return NULL; |
500 | } |
501 | |
502 | static expr_ty |
503 | _PyPegen_name_from_token(Parser *p, Token* t) |
504 | { |
505 | if (t == NULL) { Branch (505:9): [True: 7.05M, False: 6.17M]
|
506 | return NULL; |
507 | } |
508 | const char *s = PyBytes_AsString(t->bytes); |
509 | if (!s) { Branch (509:9): [True: 0, False: 6.17M]
|
510 | p->error_indicator = 1; |
511 | return NULL; |
512 | } |
513 | PyObject *id = _PyPegen_new_identifier(p, s); |
514 | if (id == NULL) { Branch (514:9): [True: 1, False: 6.17M]
|
515 | p->error_indicator = 1; |
516 | return NULL; |
517 | } |
518 | return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno, |
519 | t->end_col_offset, p->arena); |
520 | } |
521 | |
522 | expr_ty |
523 | _PyPegen_name_token(Parser *p) |
524 | { |
525 | Token *t = _PyPegen_expect_token(p, NAME); |
526 | return _PyPegen_name_from_token(p, t); |
527 | } |
528 | |
529 | void * |
530 | _PyPegen_string_token(Parser *p) |
531 | { |
532 | return _PyPegen_expect_token(p, STRING); |
533 | } |
534 | |
535 | expr_ty _PyPegen_soft_keyword_token(Parser *p) { |
536 | Token *t = _PyPegen_expect_token(p, NAME); |
537 | if (t == NULL) { Branch (537:9): [True: 1.98k, False: 638]
|
538 | return NULL; |
539 | } |
540 | char *the_token; |
541 | Py_ssize_t size; |
542 | PyBytes_AsStringAndSize(t->bytes, &the_token, &size); |
543 | for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++1.86k ) { Branch (543:45): [True: 1.89k, False: 607]
|
544 | if (strncmp(*keyword, the_token, size) == 0) { Branch (544:13): [True: 31, False: 1.86k]
|
545 | return _PyPegen_name_from_token(p, t); |
546 | } |
547 | } |
548 | return NULL; |
549 | } |
550 | |
551 | static PyObject * |
552 | parsenumber_raw(const char *s) |
553 | { |
554 | const char *end; |
555 | long x; |
556 | double dx; |
557 | Py_complex compl; |
558 | int imflag; |
559 | |
560 | assert(s != NULL); |
561 | errno = 0; |
562 | end = s + strlen(s) - 1; |
563 | imflag = *end == 'j' || *end == 'J'1.29M ; Branch (563:14): [True: 1.19k, False: 1.29M]
Branch (563:29): [True: 0, False: 1.29M]
|
564 | if (s[0] == '0') { Branch (564:9): [True: 642k, False: 656k]
|
565 | x = (long)PyOS_strtoul(s, (char **)&end, 0); |
566 | if (x < 0 && errno == 056 ) { Branch (566:13): [True: 56, False: 642k]
Branch (566:22): [True: 38, False: 18]
|
567 | return PyLong_FromString(s, (char **)0, 0); |
568 | } |
569 | } |
570 | else { |
571 | x = PyOS_strtol(s, (char **)&end, 0); |
572 | } |
573 | if (*end == '\0') { Branch (573:9): [True: 1.29M, False: 8.66k]
|
574 | if (errno != 0) { Branch (574:13): [True: 79, False: 1.29M]
|
575 | return PyLong_FromString(s, (char **)0, 0); |
576 | } |
577 | return PyLong_FromLong(x); |
578 | } |
579 | /* XXX Huge floats may silently fail */ |
580 | if (imflag) { Branch (580:9): [True: 1.19k, False: 7.47k]
|
581 | compl.real = 0.; |
582 | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); |
583 | if (compl.imag == -1.0 && PyErr_Occurred()0 ) { Branch (583:13): [True: 0, False: 1.19k]
Branch (583:35): [True: 0, False: 0]
|
584 | return NULL; |
585 | } |
586 | return PyComplex_FromCComplex(compl); |
587 | } |
588 | dx = PyOS_string_to_double(s, NULL, NULL); |
589 | if (dx == -1.0 && PyErr_Occurred()0 ) { Branch (589:9): [True: 0, False: 7.47k]
Branch (589:23): [True: 0, False: 0]
|
590 | return NULL; |
591 | } |
592 | return PyFloat_FromDouble(dx); |
593 | } |
594 | |
595 | static PyObject * |
596 | parsenumber(const char *s) |
597 | { |
598 | char *dup; |
599 | char *end; |
600 | PyObject *res = NULL; |
601 | |
602 | assert(s != NULL); |
603 | |
604 | if (strchr(s, '_') == NULL) { Branch (604:9): [True: 1.29M, False: 271]
|
605 | return parsenumber_raw(s); |
606 | } |
607 | /* Create a duplicate without underscores. */ |
608 | dup = PyMem_Malloc(strlen(s) + 1); |
609 | if (dup == NULL) { Branch (609:9): [True: 0, False: 271]
|
610 | return PyErr_NoMemory(); |
611 | } |
612 | end = dup; |
613 | for (; *s; s++3.94k ) { Branch (613:12): [True: 3.94k, False: 271]
|
614 | if (*s != '_') { Branch (614:13): [True: 3.39k, False: 550]
|
615 | *end++ = *s; |
616 | } |
617 | } |
618 | *end = '\0'; |
619 | res = parsenumber_raw(dup); |
620 | PyMem_Free(dup); |
621 | return res; |
622 | } |
623 | |
624 | expr_ty |
625 | _PyPegen_number_token(Parser *p) |
626 | { |
627 | Token *t = _PyPegen_expect_token(p, NUMBER); |
628 | if (t == NULL) { Branch (628:9): [True: 926k, False: 1.29M]
|
629 | return NULL; |
630 | } |
631 | |
632 | const char *num_raw = PyBytes_AsString(t->bytes); |
633 | if (num_raw == NULL) { Branch (633:9): [True: 0, False: 1.29M]
|
634 | p->error_indicator = 1; |
635 | return NULL; |
636 | } |
637 | |
638 | if (p->feature_version < 6 && strchr(num_raw, '_') != NULL58 ) { Branch (638:9): [True: 58, False: 1.29M]
Branch (638:35): [True: 2, False: 56]
|
639 | p->error_indicator = 1; |
640 | return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported " |
641 | "in Python 3.6 and greater"); |
642 | } |
643 | |
644 | PyObject *c = parsenumber(num_raw); |
645 | |
646 | if (c == NULL) { Branch (646:9): [True: 0, False: 1.29M]
|
647 | p->error_indicator = 1; |
648 | return NULL; |
649 | } |
650 | |
651 | if (_PyArena_AddPyObject(p->arena, c) < 0) { Branch (651:9): [True: 0, False: 1.29M]
|
652 | Py_DECREF(c); |
653 | p->error_indicator = 1; |
654 | return NULL; |
655 | } |
656 | |
657 | return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, |
658 | t->end_col_offset, p->arena); |
659 | } |
660 | |
661 | /* Check that the source for a single input statement really is a single |
662 | statement by looking at what is left in the buffer after parsing. |
663 | Trailing whitespace and comments are OK. */ |
664 | static int // bool |
665 | bad_single_statement(Parser *p) |
666 | { |
667 | char *cur = p->tok->cur; |
668 | char c = *cur; |
669 | |
670 | for (;;) { |
671 | while (c == ' ' || c == '\t'4.25k || c == '\n'4.25k || c == '\014'4.17k ) { Branch (671:16): [True: 41, False: 4.25k]
Branch (671:28): [True: 0, False: 4.25k]
Branch (671:41): [True: 78, False: 4.17k]
Branch (671:54): [True: 0, False: 4.17k]
|
672 | c = *++cur; |
673 | } |
674 | |
675 | if (!c) { Branch (675:13): [True: 4.10k, False: 75]
|
676 | return 0; |
677 | } |
678 | |
679 | if (c != '#') { Branch (679:13): [True: 8, False: 67]
|
680 | return 1; |
681 | } |
682 | |
683 | /* Suck up comment. */ |
684 | while (67 c && c != '\n') { Branch (684:16): [True: 1.55k, False: 0]
Branch (684:21): [True: 1.48k, False: 67]
|
685 | c = *++cur; |
686 | } |
687 | } |
688 | } |
689 | |
690 | static int |
691 | compute_parser_flags(PyCompilerFlags *flags) |
692 | { |
693 | int parser_flags = 0; |
694 | if (!flags) { Branch (694:9): [True: 175, False: 59.0k]
|
695 | return 0; |
696 | } |
697 | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) { Branch (697:9): [True: 359, False: 58.6k]
|
698 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; |
699 | } |
700 | if (flags->cf_flags & PyCF_IGNORE_COOKIE) { Branch (700:9): [True: 56.8k, False: 2.16k]
|
701 | parser_flags |= PyPARSE_IGNORE_COOKIE; |
702 | } |
703 | if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) { Branch (703:9): [True: 3, False: 59.0k]
|
704 | parser_flags |= PyPARSE_BARRY_AS_BDFL; |
705 | } |
706 | if (flags->cf_flags & PyCF_TYPE_COMMENTS) { Branch (706:9): [True: 250, False: 58.7k]
|
707 | parser_flags |= PyPARSE_TYPE_COMMENTS; |
708 | } |
709 | if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 79.10k ) { Branch (709:9): [True: 9.10k, False: 49.9k]
Branch (709:46): [True: 69, False: 9.03k]
|
710 | parser_flags |= PyPARSE_ASYNC_HACKS; |
711 | } |
712 | if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) { Branch (712:9): [True: 320, False: 58.7k]
|
713 | parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT; |
714 | } |
715 | return parser_flags; |
716 | } |
717 | |
718 | // Parser API |
719 | |
720 | Parser * |
721 | _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, |
722 | int feature_version, int *errcode, PyArena *arena) |
723 | { |
724 | Parser *p = PyMem_Malloc(sizeof(Parser)); |
725 | if (p == NULL) { Branch (725:9): [True: 0, False: 132k]
|
726 | return (Parser *) PyErr_NoMemory(); |
727 | } |
728 | assert(tok != NULL); |
729 | tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0; |
730 | tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0; |
731 | p->tok = tok; |
732 | p->keywords = NULL; |
733 | p->n_keyword_lists = -1; |
734 | p->soft_keywords = NULL; |
735 | p->tokens = PyMem_Malloc(sizeof(Token *)); |
736 | if (!p->tokens) { Branch (736:9): [True: 0, False: 132k]
|
737 | PyMem_Free(p); |
738 | return (Parser *) PyErr_NoMemory(); |
739 | } |
740 | p->tokens[0] = PyMem_Calloc(1, sizeof(Token)); |
741 | if (!p->tokens) { Branch (741:9): [True: 0, False: 132k]
|
742 | PyMem_Free(p->tokens); |
743 | PyMem_Free(p); |
744 | return (Parser *) PyErr_NoMemory(); |
745 | } |
746 | if (!growable_comment_array_init(&p->type_ignore_comments, 10)) { Branch (746:9): [True: 0, False: 132k]
|
747 | PyMem_Free(p->tokens[0]); |
748 | PyMem_Free(p->tokens); |
749 | PyMem_Free(p); |
750 | return (Parser *) PyErr_NoMemory(); |
751 | } |
752 | |
753 | p->mark = 0; |
754 | p->fill = 0; |
755 | p->size = 1; |
756 | |
757 | p->errcode = errcode; |
758 | p->arena = arena; |
759 | p->start_rule = start_rule; |
760 | p->parsing_started = 0; |
761 | p->normalize = NULL; |
762 | p->error_indicator = 0; |
763 | |
764 | p->starting_lineno = 0; |
765 | p->starting_col_offset = 0; |
766 | p->flags = flags; |
767 | p->feature_version = feature_version; |
768 | p->known_err_token = NULL; |
769 | p->level = 0; |
770 | p->call_invalid_rules = 0; |
771 | #ifdef Py_DEBUG |
772 | p->debug = _Py_GetConfig()->parser_debug; |
773 | #endif |
774 | return p; |
775 | } |
776 | |
777 | void |
778 | _PyPegen_Parser_Free(Parser *p) |
779 | { |
780 | Py_XDECREF(p->normalize); |
781 | for (int i = 0; i < p->size; i++10.7M ) { Branch (781:21): [True: 10.7M, False: 132k]
|
782 | PyMem_Free(p->tokens[i]); |
783 | } |
784 | PyMem_Free(p->tokens); |
785 | growable_comment_array_deallocate(&p->type_ignore_comments); |
786 | PyMem_Free(p); |
787 | } |
788 | |
789 | static void |
790 | reset_parser_state_for_error_pass(Parser *p) |
791 | { |
792 | for (int i = 0; i < p->fill; i++7.47k ) { Branch (792:21): [True: 7.47k, False: 1.49k]
|
793 | p->tokens[i]->memo = NULL; |
794 | } |
795 | p->mark = 0; |
796 | p->call_invalid_rules = 1; |
797 | // Don't try to get extra tokens in interactive mode when trying to |
798 | // raise specialized errors in the second pass. |
799 | p->tok->interactive_underflow = IUNDERFLOW_STOP; |
800 | } |
801 | |
802 | static inline int |
803 | _is_end_of_source(Parser *p) { |
804 | int err = p->tok->done; |
805 | return err == E_EOF || err == 55 E_EOFS55 || err == 53 E_EOLS53 ; Branch (805:12): [True: 207, False: 55]
Branch (805:28): [True: 2, False: 53]
Branch (805:45): [True: 4, False: 49]
|
806 | } |
807 | |
808 | void * |
809 | _PyPegen_run_parser(Parser *p) |
810 | { |
811 | void *res = _PyPegen_parse(p); |
812 | assert(p->level == 0); |
813 | if (res == NULL) { Branch (813:9): [True: 1.71k, False: 130k]
|
814 | if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)262 ) { Branch (814:13): [True: 262, False: 1.45k]
Branch (814:61): [True: 213, False: 49]
|
815 | PyErr_Clear(); |
816 | return RAISE_SYNTAX_ERROR("incomplete input"); |
817 | } |
818 | if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)567 ) { Branch (818:13): [True: 567, False: 936]
Branch (818:33): [True: 4, False: 563]
|
819 | return NULL; |
820 | } |
821 | // Make a second parser pass. In this pass we activate heavier and slower checks |
822 | // to produce better error messages and more complete diagnostics. Extra "invalid_*" |
823 | // rules will be active during parsing. |
824 | Token *last_token = p->tokens[p->fill - 1]; |
825 | reset_parser_state_for_error_pass(p); |
826 | _PyPegen_parse(p); |
827 | |
828 | // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure |
829 | // point. |
830 | _Pypegen_set_syntax_error(p, last_token); |
831 | return NULL; |
832 | } |
833 | |
834 | if (p->start_rule == Py_single_input && bad_single_statement(p)4.11k ) { Branch (834:9): [True: 4.11k, False: 126k]
Branch (834:45): [True: 8, False: 4.10k]
|
835 | p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future |
836 | return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); |
837 | } |
838 | |
839 | // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() |
840 | #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) |
841 | if (p->start_rule == Py_single_input || |
842 | p->start_rule == Py_file_input || |
843 | p->start_rule == Py_eval_input) |
844 | { |
845 | if (!_PyAST_Validate(res)) { |
846 | return NULL; |
847 | } |
848 | } |
849 | #endif |
850 | return res; |
851 | } |
852 | |
853 | mod_ty |
854 | _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob, |
855 | const char *enc, const char *ps1, const char *ps2, |
856 | PyCompilerFlags *flags, int *errcode, PyArena *arena) |
857 | { |
858 | struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2); |
859 | if (tok == NULL) { Branch (859:9): [True: 0, False: 1]
|
860 | if (PyErr_Occurred()) { Branch (860:13): [True: 0, False: 0]
|
861 | _PyPegen_raise_tokenizer_init_error(filename_ob); |
862 | return NULL; |
863 | } |
864 | return NULL; |
865 | } |
866 | if (!tok->fp || ps1 != NULL || ps2 != NULL || Branch (866:9): [True: 0, False: 1]
Branch (866:21): [True: 0, False: 1]
Branch (866:36): [True: 0, False: 1]
|
867 | PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) { Branch (867:9): [True: 0, False: 1]
|
868 | tok->fp_interactive = 1; |
869 | } |
870 | // This transfers the ownership to the tokenizer |
871 | tok->filename = filename_ob; |
872 | Py_INCREF(filename_ob); |
873 | |
874 | // From here on we need to clean up even if there's an error |
875 | mod_ty result = NULL; |
876 | |
877 | int parser_flags = compute_parser_flags(flags); |
878 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION, |
879 | errcode, arena); |
880 | if (p == NULL) { Branch (880:9): [True: 0, False: 1]
|
881 | goto error; |
882 | } |
883 | |
884 | result = _PyPegen_run_parser(p); |
885 | _PyPegen_Parser_Free(p); |
886 | |
887 | error: |
888 | _PyTokenizer_Free(tok); |
889 | return result; |
890 | } |
891 | |
892 | mod_ty |
893 | _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob, |
894 | PyCompilerFlags *flags, PyArena *arena) |
895 | { |
896 | int exec_input = start_rule == Py_file_input; |
897 | |
898 | struct tok_state *tok; |
899 | if (flags != NULL && flags->cf_flags & 59.0k PyCF_IGNORE_COOKIE59.0k ) { Branch (899:9): [True: 59.0k, False: 175]
Branch (899:26): [True: 56.8k, False: 2.18k]
|
900 | tok = _PyTokenizer_FromUTF8(str, exec_input); |
901 | } else { |
902 | tok = _PyTokenizer_FromString(str, exec_input); |
903 | } |
904 | if (tok == NULL) { Branch (904:9): [True: 26, False: 59.2k]
|
905 | if (PyErr_Occurred()) { Branch (905:13): [True: 26, False: 0]
|
906 | _PyPegen_raise_tokenizer_init_error(filename_ob); |
907 | } |
908 | return NULL; |
909 | } |
910 | // This transfers the ownership to the tokenizer |
911 | tok->filename = filename_ob; |
912 | Py_INCREF(filename_ob); |
913 | |
914 | // We need to clear up from here on |
915 | mod_ty result = NULL; |
916 | |
917 | int parser_flags = compute_parser_flags(flags); |
918 | int feature_version = flags && (flags->cf_flags & 59.0k PyCF_ONLY_AST59.0k ) ? Branch (918:27): [True: 59.0k, False: 175]
Branch (918:36): [True: 9.10k, False: 49.9k]
|
919 | flags->cf_feature_version9.10k : PY_MINOR_VERSION; |
920 | Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version, |
921 | NULL, arena); |
922 | if (p == NULL) { Branch (922:9): [True: 0, False: 59.2k]
|
923 | goto error; |
924 | } |
925 | |
926 | result = _PyPegen_run_parser(p); |
927 | _PyPegen_Parser_Free(p); |
928 | |
929 | error: |
930 | _PyTokenizer_Free(tok); |
931 | return result; |
932 | } |