Line data Source code
1 : /*
2 : * This file exposes PyAST_Validate interface to check the integrity
3 : * of the given abstract syntax tree (potentially constructed manually).
4 : */
5 : #include "Python.h"
6 : #include "pycore_ast.h" // asdl_stmt_seq
7 : #include "pycore_pystate.h" // _PyThreadState_GET()
8 :
9 : #include <assert.h>
10 : #include <stdbool.h>
11 :
12 : struct validator {
13 : int recursion_depth; /* current recursion depth */
14 : int recursion_limit; /* recursion limit */
15 : };
16 :
17 : static int validate_stmts(struct validator *, asdl_stmt_seq *);
18 : static int validate_exprs(struct validator *, asdl_expr_seq *, expr_context_ty, int);
19 : static int validate_patterns(struct validator *, asdl_pattern_seq *, int);
20 : static int _validate_nonempty_seq(asdl_seq *, const char *, const char *);
21 : static int validate_stmt(struct validator *, stmt_ty);
22 : static int validate_expr(struct validator *, expr_ty, expr_context_ty);
23 : static int validate_pattern(struct validator *, pattern_ty, int);
24 :
25 : #define VALIDATE_POSITIONS(node) \
26 : if (node->lineno > node->end_lineno) { \
27 : PyErr_Format(PyExc_ValueError, \
28 : "AST node line range (%d, %d) is not valid", \
29 : node->lineno, node->end_lineno); \
30 : return 0; \
31 : } \
32 : if ((node->lineno < 0 && node->end_lineno != node->lineno) || \
33 : (node->col_offset < 0 && node->col_offset != node->end_col_offset)) { \
34 : PyErr_Format(PyExc_ValueError, \
35 : "AST node column range (%d, %d) for line range (%d, %d) is not valid", \
36 : node->col_offset, node->end_col_offset, node->lineno, node->end_lineno); \
37 : return 0; \
38 : } \
39 : if (node->lineno == node->end_lineno && node->col_offset > node->end_col_offset) { \
40 : PyErr_Format(PyExc_ValueError, \
41 : "line %d, column %d-%d is not a valid range", \
42 : node->lineno, node->col_offset, node->end_col_offset); \
43 : return 0; \
44 : }
45 :
46 : static int
47 6016400 : validate_name(PyObject *name)
48 : {
49 6016400 : assert(PyUnicode_Check(name));
50 : static const char * const forbidden[] = {
51 : "None",
52 : "True",
53 : "False",
54 : NULL
55 : };
56 24065600 : for (int i = 0; forbidden[i] != NULL; i++) {
57 18049200 : if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
58 7 : PyErr_Format(PyExc_ValueError, "identifier field can't represent '%s' constant", forbidden[i]);
59 7 : return 0;
60 : }
61 : }
62 6016390 : return 1;
63 : }
64 :
65 : static int
66 22960 : validate_comprehension(struct validator *state, asdl_comprehension_seq *gens)
67 : {
68 : Py_ssize_t i;
69 22960 : if (!asdl_seq_LEN(gens)) {
70 4 : PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
71 4 : return 0;
72 : }
73 46734 : for (i = 0; i < asdl_seq_LEN(gens); i++) {
74 23794 : comprehension_ty comp = asdl_seq_GET(gens, i);
75 47584 : if (!validate_expr(state, comp->target, Store) ||
76 47576 : !validate_expr(state, comp->iter, Load) ||
77 23786 : !validate_exprs(state, comp->ifs, Load, 0))
78 16 : return 0;
79 : }
80 22940 : return 1;
81 : }
82 :
83 : static int
84 1679830 : validate_keywords(struct validator *state, asdl_keyword_seq *keywords)
85 : {
86 : Py_ssize_t i;
87 1867090 : for (i = 0; i < asdl_seq_LEN(keywords); i++)
88 187262 : if (!validate_expr(state, (asdl_seq_GET(keywords, i))->value, Load))
89 2 : return 0;
90 1679830 : return 1;
91 : }
92 :
93 : static int
94 1162320 : validate_args(struct validator *state, asdl_arg_seq *args)
95 : {
96 : Py_ssize_t i;
97 1925630 : for (i = 0; i < asdl_seq_LEN(args); i++) {
98 763314 : arg_ty arg = asdl_seq_GET(args, i);
99 763314 : VALIDATE_POSITIONS(arg);
100 763314 : if (arg->annotation && !validate_expr(state, arg->annotation, Load))
101 6 : return 0;
102 : }
103 1162310 : return 1;
104 : }
105 :
106 : static const char *
107 173 : expr_context_name(expr_context_ty ctx)
108 : {
109 173 : switch (ctx) {
110 86 : case Load:
111 86 : return "Load";
112 86 : case Store:
113 86 : return "Store";
114 1 : case Del:
115 1 : return "Del";
116 : // No default case so compiler emits warning for unhandled cases
117 : }
118 0 : Py_UNREACHABLE();
119 : }
120 :
121 : static int
122 387442 : validate_arguments(struct validator *state, arguments_ty args)
123 : {
124 387442 : if (!validate_args(state, args->posonlyargs) || !validate_args(state, args->args)) {
125 4 : return 0;
126 : }
127 387438 : if (args->vararg && args->vararg->annotation
128 839 : && !validate_expr(state, args->vararg->annotation, Load)) {
129 0 : return 0;
130 : }
131 387438 : if (!validate_args(state, args->kwonlyargs))
132 2 : return 0;
133 387436 : if (args->kwarg && args->kwarg->annotation
134 564 : && !validate_expr(state, args->kwarg->annotation, Load)) {
135 0 : return 0;
136 : }
137 387436 : if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
138 2 : PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
139 2 : return 0;
140 : }
141 387434 : if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
142 2 : PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
143 : "kw_defaults on arguments");
144 2 : return 0;
145 : }
146 387432 : return validate_exprs(state, args->defaults, Load, 0) && validate_exprs(state, args->kw_defaults, Load, 1);
147 : }
148 :
149 : static int
150 4639320 : validate_constant(struct validator *state, PyObject *value)
151 : {
152 4639320 : if (value == Py_None || value == Py_Ellipsis)
153 186288 : return 1;
154 :
155 4453040 : if (PyLong_CheckExact(value)
156 1853010 : || PyFloat_CheckExact(value)
157 1825440 : || PyComplex_CheckExact(value)
158 1823290 : || PyBool_Check(value)
159 1715180 : || PyUnicode_CheckExact(value)
160 49608 : || PyBytes_CheckExact(value))
161 4453010 : return 1;
162 :
163 23 : if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
164 16 : if (++state->recursion_depth > state->recursion_limit) {
165 0 : PyErr_SetString(PyExc_RecursionError,
166 : "maximum recursion depth exceeded during compilation");
167 0 : return 0;
168 : }
169 :
170 16 : PyObject *it = PyObject_GetIter(value);
171 16 : if (it == NULL)
172 0 : return 0;
173 :
174 40 : while (1) {
175 56 : PyObject *item = PyIter_Next(it);
176 56 : if (item == NULL) {
177 14 : if (PyErr_Occurred()) {
178 0 : Py_DECREF(it);
179 0 : return 0;
180 : }
181 14 : break;
182 : }
183 :
184 42 : if (!validate_constant(state, item)) {
185 2 : Py_DECREF(it);
186 2 : Py_DECREF(item);
187 2 : return 0;
188 : }
189 40 : Py_DECREF(item);
190 : }
191 :
192 14 : Py_DECREF(it);
193 14 : --state->recursion_depth;
194 14 : return 1;
195 : }
196 :
197 7 : if (!PyErr_Occurred()) {
198 7 : PyErr_Format(PyExc_TypeError,
199 : "got an invalid type in Constant: %s",
200 : _PyType_Name(Py_TYPE(value)));
201 : }
202 7 : return 0;
203 : }
204 :
205 : static int
206 15868100 : validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx)
207 : {
208 15868100 : VALIDATE_POSITIONS(exp);
209 15868100 : int ret = -1;
210 15868100 : if (++state->recursion_depth > state->recursion_limit) {
211 8 : PyErr_SetString(PyExc_RecursionError,
212 : "maximum recursion depth exceeded during compilation");
213 8 : return 0;
214 : }
215 15868100 : int check_ctx = 1;
216 : expr_context_ty actual_ctx;
217 :
218 : /* First check expression context. */
219 15868100 : switch (exp->kind) {
220 1862270 : case Attribute_kind:
221 1862270 : actual_ctx = exp->v.Attribute.ctx;
222 1862270 : break;
223 237463 : case Subscript_kind:
224 237463 : actual_ctx = exp->v.Subscript.ctx;
225 237463 : break;
226 13751 : case Starred_kind:
227 13751 : actual_ctx = exp->v.Starred.ctx;
228 13751 : break;
229 6014980 : case Name_kind:
230 6014980 : if (!validate_name(exp->v.Name.id)) {
231 3 : return 0;
232 : }
233 6014970 : actual_ctx = exp->v.Name.ctx;
234 6014970 : break;
235 99937 : case List_kind:
236 99937 : actual_ctx = exp->v.List.ctx;
237 99937 : break;
238 370136 : case Tuple_kind:
239 370136 : actual_ctx = exp->v.Tuple.ctx;
240 370136 : break;
241 7269570 : default:
242 7269570 : if (ctx != Load) {
243 1 : PyErr_Format(PyExc_ValueError, "expression which can't be "
244 : "assigned to in %s context", expr_context_name(ctx));
245 1 : return 0;
246 : }
247 7269570 : check_ctx = 0;
248 : /* set actual_ctx to prevent gcc warning */
249 7269570 : actual_ctx = 0;
250 : }
251 15868100 : if (check_ctx && actual_ctx != ctx) {
252 86 : PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
253 : expr_context_name(ctx), expr_context_name(actual_ctx));
254 86 : return 0;
255 : }
256 :
257 : /* Now validate expression. */
258 15868000 : switch (exp->kind) {
259 67625 : case BoolOp_kind:
260 67625 : if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
261 2 : PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
262 2 : return 0;
263 : }
264 67623 : ret = validate_exprs(state, exp->v.BoolOp.values, Load, 0);
265 67623 : break;
266 284531 : case BinOp_kind:
267 563256 : ret = validate_expr(state, exp->v.BinOp.left, Load) &&
268 278725 : validate_expr(state, exp->v.BinOp.right, Load);
269 284531 : break;
270 106095 : case UnaryOp_kind:
271 106095 : ret = validate_expr(state, exp->v.UnaryOp.operand, Load);
272 106095 : break;
273 37648 : case Lambda_kind:
274 75289 : ret = validate_arguments(state, exp->v.Lambda.args) &&
275 37641 : validate_expr(state, exp->v.Lambda.body, Load);
276 37648 : break;
277 11335 : case IfExp_kind:
278 22669 : ret = validate_expr(state, exp->v.IfExp.test, Load) &&
279 22669 : validate_expr(state, exp->v.IfExp.body, Load) &&
280 11333 : validate_expr(state, exp->v.IfExp.orelse, Load);
281 11335 : break;
282 42087 : case Dict_kind:
283 42087 : if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
284 1 : PyErr_SetString(PyExc_ValueError,
285 : "Dict doesn't have the same number of keys as values");
286 1 : return 0;
287 : }
288 : /* null_ok=1 for keys expressions to allow dict unpacking to work in
289 : dict literals, i.e. ``{**{a:b}}`` */
290 84172 : ret = validate_exprs(state, exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
291 42086 : validate_exprs(state, exp->v.Dict.values, Load, /*null_ok=*/ 0);
292 42086 : break;
293 3894 : case Set_kind:
294 3894 : ret = validate_exprs(state, exp->v.Set.elts, Load, 0);
295 3894 : break;
296 : #define COMP(NAME) \
297 : case NAME ## _kind: \
298 : ret = validate_comprehension(state, exp->v.NAME.generators) && \
299 : validate_expr(state, exp->v.NAME.elt, Load); \
300 : break;
301 11545 : COMP(ListComp)
302 640 : COMP(SetComp)
303 9329 : COMP(GeneratorExp)
304 : #undef COMP
305 1446 : case DictComp_kind:
306 2887 : ret = validate_comprehension(state, exp->v.DictComp.generators) &&
307 2887 : validate_expr(state, exp->v.DictComp.key, Load) &&
308 1440 : validate_expr(state, exp->v.DictComp.value, Load);
309 1446 : break;
310 13769 : case Yield_kind:
311 13769 : ret = !exp->v.Yield.value || validate_expr(state, exp->v.Yield.value, Load);
312 13769 : break;
313 2237 : case YieldFrom_kind:
314 2237 : ret = validate_expr(state, exp->v.YieldFrom.value, Load);
315 2237 : break;
316 2387 : case Await_kind:
317 2387 : ret = validate_expr(state, exp->v.Await.value, Load);
318 2387 : break;
319 250931 : case Compare_kind:
320 250931 : if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
321 1 : PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
322 1 : return 0;
323 : }
324 501860 : if (asdl_seq_LEN(exp->v.Compare.comparators) !=
325 250930 : asdl_seq_LEN(exp->v.Compare.ops)) {
326 1 : PyErr_SetString(PyExc_ValueError, "Compare has a different number "
327 : "of comparators and operands");
328 1 : return 0;
329 : }
330 501858 : ret = validate_exprs(state, exp->v.Compare.comparators, Load, 0) &&
331 250929 : validate_expr(state, exp->v.Compare.left, Load);
332 250929 : break;
333 1628030 : case Call_kind:
334 3250270 : ret = validate_expr(state, exp->v.Call.func, Load) &&
335 3250270 : validate_exprs(state, exp->v.Call.args, Load, 0) &&
336 1622230 : validate_keywords(state, exp->v.Call.keywords);
337 1628030 : break;
338 4639280 : case Constant_kind:
339 4639280 : if (!validate_constant(state, exp->v.Constant.value)) {
340 7 : return 0;
341 : }
342 4639280 : ret = 1;
343 4639280 : break;
344 19729 : case JoinedStr_kind:
345 19729 : ret = validate_exprs(state, exp->v.JoinedStr.values, Load, 0);
346 19729 : break;
347 96601 : case FormattedValue_kind:
348 96601 : if (validate_expr(state, exp->v.FormattedValue.value, Load) == 0)
349 0 : return 0;
350 96601 : if (exp->v.FormattedValue.format_spec) {
351 978 : ret = validate_expr(state, exp->v.FormattedValue.format_spec, Load);
352 978 : break;
353 : }
354 95623 : ret = 1;
355 95623 : break;
356 1862260 : case Attribute_kind:
357 1862260 : ret = validate_expr(state, exp->v.Attribute.value, Load);
358 1862260 : break;
359 237463 : case Subscript_kind:
360 474919 : ret = validate_expr(state, exp->v.Subscript.slice, Load) &&
361 237456 : validate_expr(state, exp->v.Subscript.value, Load);
362 237463 : break;
363 13751 : case Starred_kind:
364 13751 : ret = validate_expr(state, exp->v.Starred.value, ctx);
365 13751 : break;
366 39429 : case Slice_kind:
367 21796 : ret = (!exp->v.Slice.lower || validate_expr(state, exp->v.Slice.lower, Load)) &&
368 100652 : (!exp->v.Slice.upper || validate_expr(state, exp->v.Slice.upper, Load)) &&
369 39427 : (!exp->v.Slice.step || validate_expr(state, exp->v.Slice.step, Load));
370 39429 : break;
371 99937 : case List_kind:
372 99937 : ret = validate_exprs(state, exp->v.List.elts, ctx, 0);
373 99937 : break;
374 370136 : case Tuple_kind:
375 370136 : ret = validate_exprs(state, exp->v.Tuple.elts, ctx, 0);
376 370136 : break;
377 997 : case NamedExpr_kind:
378 997 : ret = validate_expr(state, exp->v.NamedExpr.value, Load);
379 997 : break;
380 : /* This last case doesn't have any checking. */
381 6014890 : case Name_kind:
382 6014890 : ret = 1;
383 6014890 : break;
384 : // No default case so compiler emits warning for unhandled cases
385 : }
386 15868000 : if (ret < 0) {
387 0 : PyErr_SetString(PyExc_SystemError, "unexpected expression");
388 0 : ret = 0;
389 : }
390 15868000 : state->recursion_depth--;
391 15868000 : return ret;
392 : }
393 :
394 :
395 : // Note: the ensure_literal_* functions are only used to validate a restricted
396 : // set of non-recursive literals that have already been checked with
397 : // validate_expr, so they don't accept the validator state
398 : static int
399 141 : ensure_literal_number(expr_ty exp, bool allow_real, bool allow_imaginary)
400 : {
401 141 : assert(exp->kind == Constant_kind);
402 141 : PyObject *value = exp->v.Constant.value;
403 141 : return (allow_real && PyFloat_CheckExact(value)) ||
404 342 : (allow_real && PyLong_CheckExact(value)) ||
405 60 : (allow_imaginary && PyComplex_CheckExact(value));
406 : }
407 :
408 : static int
409 73 : ensure_literal_negative(expr_ty exp, bool allow_real, bool allow_imaginary)
410 : {
411 73 : assert(exp->kind == UnaryOp_kind);
412 : // Must be negation ...
413 73 : if (exp->v.UnaryOp.op != USub) {
414 0 : return 0;
415 : }
416 : // ... of a constant ...
417 73 : expr_ty operand = exp->v.UnaryOp.operand;
418 73 : if (operand->kind != Constant_kind) {
419 0 : return 0;
420 : }
421 : // ... number
422 73 : return ensure_literal_number(operand, allow_real, allow_imaginary);
423 : }
424 :
425 : static int
426 44 : ensure_literal_complex(expr_ty exp)
427 : {
428 44 : assert(exp->kind == BinOp_kind);
429 44 : expr_ty left = exp->v.BinOp.left;
430 44 : expr_ty right = exp->v.BinOp.right;
431 : // Ensure op is addition or subtraction
432 44 : if (exp->v.BinOp.op != Add && exp->v.BinOp.op != Sub) {
433 0 : return 0;
434 : }
435 : // Check LHS is a real number (potentially signed)
436 44 : switch (left->kind)
437 : {
438 24 : case Constant_kind:
439 24 : if (!ensure_literal_number(left, /*real=*/true, /*imaginary=*/false)) {
440 0 : return 0;
441 : }
442 24 : break;
443 20 : case UnaryOp_kind:
444 20 : if (!ensure_literal_negative(left, /*real=*/true, /*imaginary=*/false)) {
445 0 : return 0;
446 : }
447 20 : break;
448 0 : default:
449 0 : return 0;
450 : }
451 : // Check RHS is an imaginary number (no separate sign allowed)
452 44 : switch (right->kind)
453 : {
454 44 : case Constant_kind:
455 44 : if (!ensure_literal_number(right, /*real=*/false, /*imaginary=*/true)) {
456 0 : return 0;
457 : }
458 44 : break;
459 0 : default:
460 0 : return 0;
461 : }
462 44 : return 1;
463 : }
464 :
465 : static int
466 1933 : validate_pattern_match_value(struct validator *state, expr_ty exp)
467 : {
468 1933 : if (!validate_expr(state, exp, Load)) {
469 2 : return 0;
470 : }
471 :
472 1931 : switch (exp->kind)
473 : {
474 1766 : case Constant_kind:
475 : /* Ellipsis and immutable sequences are not allowed.
476 : For True, False and None, MatchSingleton() should
477 : be used */
478 1766 : if (!validate_expr(state, exp, Load)) {
479 0 : return 0;
480 : }
481 1766 : PyObject *literal = exp->v.Constant.value;
482 1998 : if (PyLong_CheckExact(literal) || PyFloat_CheckExact(literal) ||
483 647 : PyBytes_CheckExact(literal) || PyComplex_CheckExact(literal) ||
484 207 : PyUnicode_CheckExact(literal)) {
485 1763 : return 1;
486 : }
487 3 : PyErr_SetString(PyExc_ValueError,
488 : "unexpected constant inside of a literal pattern");
489 3 : return 0;
490 64 : case Attribute_kind:
491 : // Constants and attribute lookups are always permitted
492 64 : return 1;
493 53 : case UnaryOp_kind:
494 : // Negated numbers are permitted (whether real or imaginary)
495 : // Compiler will complain if AST folding doesn't create a constant
496 53 : if (ensure_literal_negative(exp, /*real=*/true, /*imaginary=*/true)) {
497 53 : return 1;
498 : }
499 0 : break;
500 44 : case BinOp_kind:
501 : // Complex literals are permitted
502 : // Compiler will complain if AST folding doesn't create a constant
503 44 : if (ensure_literal_complex(exp)) {
504 44 : return 1;
505 : }
506 0 : break;
507 3 : case JoinedStr_kind:
508 : // Handled in the later stages
509 3 : return 1;
510 1 : default:
511 1 : break;
512 : }
513 1 : PyErr_SetString(PyExc_ValueError,
514 : "patterns may only match literals and attribute lookups");
515 1 : return 0;
516 : }
517 :
518 : static int
519 1321 : validate_capture(PyObject *name)
520 : {
521 1321 : if (_PyUnicode_EqualToASCIIString(name, "_")) {
522 3 : PyErr_Format(PyExc_ValueError, "can't capture name '_' in patterns");
523 3 : return 0;
524 : }
525 1318 : return validate_name(name);
526 : }
527 :
528 : static int
529 4901 : validate_pattern(struct validator *state, pattern_ty p, int star_ok)
530 : {
531 4901 : VALIDATE_POSITIONS(p);
532 4901 : int ret = -1;
533 4901 : if (++state->recursion_depth > state->recursion_limit) {
534 0 : PyErr_SetString(PyExc_RecursionError,
535 : "maximum recursion depth exceeded during compilation");
536 0 : return 0;
537 : }
538 4901 : switch (p->kind) {
539 1540 : case MatchValue_kind:
540 1540 : ret = validate_pattern_match_value(state, p->v.MatchValue.value);
541 1540 : break;
542 68 : case MatchSingleton_kind:
543 68 : ret = p->v.MatchSingleton.value == Py_None || PyBool_Check(p->v.MatchSingleton.value);
544 68 : if (!ret) {
545 5 : PyErr_SetString(PyExc_ValueError,
546 : "MatchSingleton can only contain True, False and None");
547 : }
548 68 : break;
549 757 : case MatchSequence_kind:
550 757 : ret = validate_patterns(state, p->v.MatchSequence.patterns, /*star_ok=*/1);
551 757 : break;
552 431 : case MatchMapping_kind:
553 431 : if (asdl_seq_LEN(p->v.MatchMapping.keys) != asdl_seq_LEN(p->v.MatchMapping.patterns)) {
554 1 : PyErr_SetString(PyExc_ValueError,
555 : "MatchMapping doesn't have the same number of keys as patterns");
556 1 : ret = 0;
557 1 : break;
558 : }
559 :
560 430 : if (p->v.MatchMapping.rest && !validate_capture(p->v.MatchMapping.rest)) {
561 2 : ret = 0;
562 2 : break;
563 : }
564 :
565 428 : asdl_expr_seq *keys = p->v.MatchMapping.keys;
566 822 : for (Py_ssize_t i = 0; i < asdl_seq_LEN(keys); i++) {
567 395 : expr_ty key = asdl_seq_GET(keys, i);
568 395 : if (key->kind == Constant_kind) {
569 384 : PyObject *literal = key->v.Constant.value;
570 384 : if (literal == Py_None || PyBool_Check(literal)) {
571 : /* validate_pattern_match_value will ensure the key
572 : doesn't contain True, False and None but it is
573 : syntactically valid, so we will pass those on in
574 : a special case. */
575 2 : continue;
576 : }
577 : }
578 393 : if (!validate_pattern_match_value(state, key)) {
579 1 : ret = 0;
580 1 : break;
581 : }
582 : }
583 :
584 428 : ret = validate_patterns(state, p->v.MatchMapping.patterns, /*star_ok=*/0);
585 428 : break;
586 296 : case MatchClass_kind:
587 296 : if (asdl_seq_LEN(p->v.MatchClass.kwd_attrs) != asdl_seq_LEN(p->v.MatchClass.kwd_patterns)) {
588 2 : PyErr_SetString(PyExc_ValueError,
589 : "MatchClass doesn't have the same number of keyword attributes as patterns");
590 2 : ret = 0;
591 2 : break;
592 : }
593 294 : if (!validate_expr(state, p->v.MatchClass.cls, Load)) {
594 0 : ret = 0;
595 0 : break;
596 : }
597 :
598 294 : expr_ty cls = p->v.MatchClass.cls;
599 : while (1) {
600 350 : if (cls->kind == Name_kind) {
601 293 : break;
602 : }
603 57 : else if (cls->kind == Attribute_kind) {
604 56 : cls = cls->v.Attribute.value;
605 56 : continue;
606 : }
607 : else {
608 1 : PyErr_SetString(PyExc_ValueError,
609 : "MatchClass cls field can only contain Name or Attribute nodes.");
610 1 : ret = 0;
611 1 : break;
612 : }
613 : }
614 :
615 395 : for (Py_ssize_t i = 0; i < asdl_seq_LEN(p->v.MatchClass.kwd_attrs); i++) {
616 102 : PyObject *identifier = asdl_seq_GET(p->v.MatchClass.kwd_attrs, i);
617 102 : if (!validate_name(identifier)) {
618 1 : ret = 0;
619 1 : break;
620 : }
621 : }
622 :
623 294 : if (!validate_patterns(state, p->v.MatchClass.patterns, /*star_ok=*/0)) {
624 2 : ret = 0;
625 2 : break;
626 : }
627 :
628 292 : ret = validate_patterns(state, p->v.MatchClass.kwd_patterns, /*star_ok=*/0);
629 292 : break;
630 178 : case MatchStar_kind:
631 178 : if (!star_ok) {
632 2 : PyErr_SetString(PyExc_ValueError, "can't use MatchStar here");
633 2 : ret = 0;
634 2 : break;
635 : }
636 176 : ret = p->v.MatchStar.name == NULL || validate_capture(p->v.MatchStar.name);
637 176 : break;
638 1417 : case MatchAs_kind:
639 1417 : if (p->v.MatchAs.name && !validate_capture(p->v.MatchAs.name)) {
640 2 : ret = 0;
641 2 : break;
642 : }
643 1415 : if (p->v.MatchAs.pattern == NULL) {
644 1232 : ret = 1;
645 : }
646 183 : else if (p->v.MatchAs.name == NULL) {
647 0 : PyErr_SetString(PyExc_ValueError,
648 : "MatchAs must specify a target name if a pattern is given");
649 0 : ret = 0;
650 : }
651 : else {
652 183 : ret = validate_pattern(state, p->v.MatchAs.pattern, /*star_ok=*/0);
653 : }
654 1415 : break;
655 214 : case MatchOr_kind:
656 214 : if (asdl_seq_LEN(p->v.MatchOr.patterns) < 2) {
657 2 : PyErr_SetString(PyExc_ValueError,
658 : "MatchOr requires at least 2 patterns");
659 2 : ret = 0;
660 2 : break;
661 : }
662 212 : ret = validate_patterns(state, p->v.MatchOr.patterns, /*star_ok=*/0);
663 212 : break;
664 : // No default case, so the compiler will emit a warning if new pattern
665 : // kinds are added without being handled here
666 : }
667 4901 : if (ret < 0) {
668 0 : PyErr_SetString(PyExc_SystemError, "unexpected pattern");
669 0 : ret = 0;
670 : }
671 4901 : state->recursion_depth--;
672 4901 : return ret;
673 : }
674 :
675 : static int
676 2129190 : _validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
677 : {
678 2129190 : if (asdl_seq_LEN(seq))
679 2129180 : return 1;
680 17 : PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
681 17 : return 0;
682 : }
683 : #define validate_nonempty_seq(seq, what, owner) _validate_nonempty_seq((asdl_seq*)seq, what, owner)
684 :
685 : static int
686 837821 : validate_assignlist(struct validator *state, asdl_expr_seq *targets, expr_context_ty ctx)
687 : {
688 1675640 : return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
689 837819 : validate_exprs(state, targets, ctx, 0);
690 : }
691 :
692 : static int
693 1155320 : validate_body(struct validator *state, asdl_stmt_seq *body, const char *owner)
694 : {
695 1155320 : return validate_nonempty_seq(body, "body", owner) && validate_stmts(state, body);
696 : }
697 :
698 : static int
699 3339070 : validate_stmt(struct validator *state, stmt_ty stmt)
700 : {
701 3339070 : VALIDATE_POSITIONS(stmt);
702 3339060 : int ret = -1;
703 : Py_ssize_t i;
704 3339060 : if (++state->recursion_depth > state->recursion_limit) {
705 0 : PyErr_SetString(PyExc_RecursionError,
706 : "maximum recursion depth exceeded during compilation");
707 0 : return 0;
708 : }
709 3339060 : switch (stmt->kind) {
710 346297 : case FunctionDef_kind:
711 692593 : ret = validate_body(state, stmt->v.FunctionDef.body, "FunctionDef") &&
712 692585 : validate_arguments(state, stmt->v.FunctionDef.args) &&
713 1038880 : validate_exprs(state, stmt->v.FunctionDef.decorator_list, Load, 0) &&
714 379313 : (!stmt->v.FunctionDef.returns ||
715 33025 : validate_expr(state, stmt->v.FunctionDef.returns, Load));
716 346297 : break;
717 57600 : case ClassDef_kind:
718 115198 : ret = validate_body(state, stmt->v.ClassDef.body, "ClassDef") &&
719 115195 : validate_exprs(state, stmt->v.ClassDef.bases, Load, 0) &&
720 172795 : validate_keywords(state, stmt->v.ClassDef.keywords) &&
721 57596 : validate_exprs(state, stmt->v.ClassDef.decorator_list, Load, 0);
722 57600 : break;
723 248348 : case Return_kind:
724 248348 : ret = !stmt->v.Return.value || validate_expr(state, stmt->v.Return.value, Load);
725 248348 : break;
726 9121 : case Delete_kind:
727 9121 : ret = validate_assignlist(state, stmt->v.Delete.targets, Del);
728 9121 : break;
729 828700 : case Assign_kind:
730 1657400 : ret = validate_assignlist(state, stmt->v.Assign.targets, Store) &&
731 828695 : validate_expr(state, stmt->v.Assign.value, Load);
732 828700 : break;
733 26533 : case AugAssign_kind:
734 53065 : ret = validate_expr(state, stmt->v.AugAssign.target, Store) &&
735 26532 : validate_expr(state, stmt->v.AugAssign.value, Load);
736 26533 : break;
737 11490 : case AnnAssign_kind:
738 11490 : if (stmt->v.AnnAssign.target->kind != Name_kind &&
739 1969 : stmt->v.AnnAssign.simple) {
740 0 : PyErr_SetString(PyExc_TypeError,
741 : "AnnAssign with simple non-Name target");
742 0 : return 0;
743 : }
744 11490 : ret = validate_expr(state, stmt->v.AnnAssign.target, Store) &&
745 19109 : (!stmt->v.AnnAssign.value ||
746 30599 : validate_expr(state, stmt->v.AnnAssign.value, Load)) &&
747 11490 : validate_expr(state, stmt->v.AnnAssign.annotation, Load);
748 11490 : break;
749 62855 : case For_kind:
750 125709 : ret = validate_expr(state, stmt->v.For.target, Store) &&
751 125707 : validate_expr(state, stmt->v.For.iter, Load) &&
752 188562 : validate_body(state, stmt->v.For.body, "For") &&
753 62851 : validate_stmts(state, stmt->v.For.orelse);
754 62855 : break;
755 196 : case AsyncFor_kind:
756 392 : ret = validate_expr(state, stmt->v.AsyncFor.target, Store) &&
757 392 : validate_expr(state, stmt->v.AsyncFor.iter, Load) &&
758 588 : validate_body(state, stmt->v.AsyncFor.body, "AsyncFor") &&
759 196 : validate_stmts(state, stmt->v.AsyncFor.orelse);
760 196 : break;
761 12177 : case While_kind:
762 24353 : ret = validate_expr(state, stmt->v.While.test, Load) &&
763 24353 : validate_body(state, stmt->v.While.body, "While") &&
764 12175 : validate_stmts(state, stmt->v.While.orelse);
765 12177 : break;
766 534318 : case If_kind:
767 1068640 : ret = validate_expr(state, stmt->v.If.test, Load) &&
768 1068640 : validate_body(state, stmt->v.If.body, "If") &&
769 534315 : validate_stmts(state, stmt->v.If.orelse);
770 534318 : break;
771 38968 : case With_kind:
772 38968 : if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
773 1 : return 0;
774 78734 : for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
775 39769 : withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
776 39769 : if (!validate_expr(state, item->context_expr, Load) ||
777 39768 : (item->optional_vars && !validate_expr(state, item->optional_vars, Store)))
778 2 : return 0;
779 : }
780 38965 : ret = validate_body(state, stmt->v.With.body, "With");
781 38965 : break;
782 380 : case AsyncWith_kind:
783 380 : if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
784 0 : return 0;
785 781 : for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
786 401 : withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
787 401 : if (!validate_expr(state, item->context_expr, Load) ||
788 401 : (item->optional_vars && !validate_expr(state, item->optional_vars, Store)))
789 0 : return 0;
790 : }
791 380 : ret = validate_body(state, stmt->v.AsyncWith.body, "AsyncWith");
792 380 : break;
793 1247 : case Match_kind:
794 1247 : if (!validate_expr(state, stmt->v.Match.subject, Load)
795 1247 : || !validate_nonempty_seq(stmt->v.Match.cases, "cases", "Match")) {
796 0 : return 0;
797 : }
798 3030 : for (i = 0; i < asdl_seq_LEN(stmt->v.Match.cases); i++) {
799 1806 : match_case_ty m = asdl_seq_GET(stmt->v.Match.cases, i);
800 1806 : if (!validate_pattern(state, m->pattern, /*star_ok=*/0)
801 1783 : || (m->guard && !validate_expr(state, m->guard, Load))
802 1783 : || !validate_body(state, m->body, "match_case")) {
803 23 : return 0;
804 : }
805 : }
806 1224 : ret = 1;
807 1224 : break;
808 63697 : case Raise_kind:
809 63697 : if (stmt->v.Raise.exc) {
810 117627 : ret = validate_expr(state, stmt->v.Raise.exc, Load) &&
811 58813 : (!stmt->v.Raise.cause || validate_expr(state, stmt->v.Raise.cause, Load));
812 58814 : break;
813 : }
814 4883 : if (stmt->v.Raise.cause) {
815 1 : PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
816 1 : return 0;
817 : }
818 4882 : ret = 1;
819 4882 : break;
820 50772 : case Try_kind:
821 50772 : if (!validate_body(state, stmt->v.Try.body, "Try"))
822 2 : return 0;
823 50770 : if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
824 7601 : !asdl_seq_LEN(stmt->v.Try.finalbody)) {
825 1 : PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
826 1 : return 0;
827 : }
828 50769 : if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
829 7600 : asdl_seq_LEN(stmt->v.Try.orelse)) {
830 1 : PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
831 1 : return 0;
832 : }
833 96670 : for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
834 45904 : excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
835 45904 : VALIDATE_POSITIONS(handler);
836 89543 : if ((handler->v.ExceptHandler.type &&
837 89542 : !validate_expr(state, handler->v.ExceptHandler.type, Load)) ||
838 45903 : !validate_body(state, handler->v.ExceptHandler.body, "ExceptHandler"))
839 2 : return 0;
840 : }
841 17797 : ret = (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
842 109783 : validate_stmts(state, stmt->v.Try.finalbody)) &&
843 57065 : (!asdl_seq_LEN(stmt->v.Try.orelse) ||
844 6300 : validate_stmts(state, stmt->v.Try.orelse));
845 50766 : break;
846 261 : case TryStar_kind:
847 261 : if (!validate_body(state, stmt->v.TryStar.body, "TryStar"))
848 2 : return 0;
849 259 : if (!asdl_seq_LEN(stmt->v.TryStar.handlers) &&
850 2 : !asdl_seq_LEN(stmt->v.TryStar.finalbody)) {
851 1 : PyErr_SetString(PyExc_ValueError, "TryStar has neither except handlers nor finalbody");
852 1 : return 0;
853 : }
854 258 : if (!asdl_seq_LEN(stmt->v.TryStar.handlers) &&
855 1 : asdl_seq_LEN(stmt->v.TryStar.orelse)) {
856 1 : PyErr_SetString(PyExc_ValueError, "TryStar has orelse but no except handlers");
857 1 : return 0;
858 : }
859 573 : for (i = 0; i < asdl_seq_LEN(stmt->v.TryStar.handlers); i++) {
860 318 : excepthandler_ty handler = asdl_seq_GET(stmt->v.TryStar.handlers, i);
861 633 : if ((handler->v.ExceptHandler.type &&
862 632 : !validate_expr(state, handler->v.ExceptHandler.type, Load)) ||
863 317 : !validate_body(state, handler->v.ExceptHandler.body, "ExceptHandler"))
864 2 : return 0;
865 : }
866 116 : ret = (!asdl_seq_LEN(stmt->v.TryStar.finalbody) ||
867 565 : validate_stmts(state, stmt->v.TryStar.finalbody)) &&
868 345 : (!asdl_seq_LEN(stmt->v.TryStar.orelse) ||
869 91 : validate_stmts(state, stmt->v.TryStar.orelse));
870 255 : break;
871 9994 : case Assert_kind:
872 19987 : ret = validate_expr(state, stmt->v.Assert.test, Load) &&
873 9993 : (!stmt->v.Assert.msg || validate_expr(state, stmt->v.Assert.msg, Load));
874 9994 : break;
875 46122 : case Import_kind:
876 46122 : ret = validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
877 46122 : break;
878 46486 : case ImportFrom_kind:
879 46486 : if (stmt->v.ImportFrom.level < 0) {
880 1 : PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
881 1 : return 0;
882 : }
883 46485 : ret = validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
884 46485 : break;
885 1905 : case Global_kind:
886 1905 : ret = validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
887 1905 : break;
888 946 : case Nonlocal_kind:
889 946 : ret = validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
890 946 : break;
891 890076 : case Expr_kind:
892 890076 : ret = validate_expr(state, stmt->v.Expr.value, Load);
893 890076 : break;
894 3498 : case AsyncFunctionDef_kind:
895 6996 : ret = validate_body(state, stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
896 6996 : validate_arguments(state, stmt->v.AsyncFunctionDef.args) &&
897 10494 : validate_exprs(state, stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
898 3914 : (!stmt->v.AsyncFunctionDef.returns ||
899 416 : validate_expr(state, stmt->v.AsyncFunctionDef.returns, Load));
900 3498 : break;
901 47077 : case Pass_kind:
902 : case Break_kind:
903 : case Continue_kind:
904 47077 : ret = 1;
905 47077 : break;
906 : // No default case so compiler emits warning for unhandled cases
907 : }
908 3339020 : if (ret < 0) {
909 0 : PyErr_SetString(PyExc_SystemError, "unexpected statement");
910 0 : ret = 0;
911 : }
912 3339020 : state->recursion_depth--;
913 3339020 : return ret;
914 : }
915 :
916 : static int
917 1832470 : validate_stmts(struct validator *state, asdl_stmt_seq *seq)
918 : {
919 : Py_ssize_t i;
920 5171360 : for (i = 0; i < asdl_seq_LEN(seq); i++) {
921 3339080 : stmt_ty stmt = asdl_seq_GET(seq, i);
922 3339080 : if (stmt) {
923 3339070 : if (!validate_stmt(state, stmt))
924 189 : return 0;
925 : }
926 : else {
927 1 : PyErr_SetString(PyExc_ValueError,
928 : "None disallowed in statement list");
929 1 : return 0;
930 : }
931 : }
932 1832280 : return 1;
933 : }
934 :
935 : static int
936 4620100 : validate_exprs(struct validator *state, asdl_expr_seq *exprs, expr_context_ty ctx, int null_ok)
937 : {
938 : Py_ssize_t i;
939 12049700 : for (i = 0; i < asdl_seq_LEN(exprs); i++) {
940 7429590 : expr_ty expr = asdl_seq_GET(exprs, i);
941 7429590 : if (expr) {
942 7424490 : if (!validate_expr(state, expr, ctx))
943 21 : return 0;
944 : }
945 5102 : else if (!null_ok) {
946 12 : PyErr_SetString(PyExc_ValueError,
947 : "None disallowed in expression list");
948 12 : return 0;
949 : }
950 :
951 : }
952 4620070 : return 1;
953 : }
954 :
955 : static int
956 1983 : validate_patterns(struct validator *state, asdl_pattern_seq *patterns, int star_ok)
957 : {
958 : Py_ssize_t i;
959 4887 : for (i = 0; i < asdl_seq_LEN(patterns); i++) {
960 2912 : pattern_ty pattern = asdl_seq_GET(patterns, i);
961 2912 : if (!validate_pattern(state, pattern, star_ok)) {
962 8 : return 0;
963 : }
964 : }
965 1975 : return 1;
966 : }
967 :
968 :
969 : /* See comments in symtable.c. */
970 : #define COMPILER_STACK_FRAME_SCALE 3
971 :
972 : int
973 126295 : _PyAST_Validate(mod_ty mod)
974 : {
975 126295 : int res = -1;
976 : struct validator state;
977 : PyThreadState *tstate;
978 126295 : int recursion_limit = Py_GetRecursionLimit();
979 : int starting_recursion_depth;
980 :
981 : /* Setup recursion depth check counters */
982 126295 : tstate = _PyThreadState_GET();
983 126295 : if (!tstate) {
984 0 : return 0;
985 : }
986 : /* Be careful here to prevent overflow. */
987 126295 : int recursion_depth = tstate->recursion_limit - tstate->recursion_remaining;
988 126295 : starting_recursion_depth = (recursion_depth< INT_MAX / COMPILER_STACK_FRAME_SCALE) ?
989 126295 : recursion_depth * COMPILER_STACK_FRAME_SCALE : recursion_depth;
990 126295 : state.recursion_depth = starting_recursion_depth;
991 126295 : state.recursion_limit = (recursion_limit < INT_MAX / COMPILER_STACK_FRAME_SCALE) ?
992 126295 : recursion_limit * COMPILER_STACK_FRAME_SCALE : recursion_limit;
993 :
994 126295 : switch (mod->kind) {
995 48772 : case Module_kind:
996 48772 : res = validate_stmts(&state, mod->v.Module.body);
997 48772 : break;
998 4157 : case Interactive_kind:
999 4157 : res = validate_stmts(&state, mod->v.Interactive.body);
1000 4157 : break;
1001 73366 : case Expression_kind:
1002 73366 : res = validate_expr(&state, mod->v.Expression.body, Load);
1003 73366 : break;
1004 0 : case FunctionType_kind:
1005 0 : res = validate_exprs(&state, mod->v.FunctionType.argtypes, Load, /*null_ok=*/0) &&
1006 0 : validate_expr(&state, mod->v.FunctionType.returns, Load);
1007 0 : break;
1008 : // No default case so compiler emits warning for unhandled cases
1009 : }
1010 :
1011 126295 : if (res < 0) {
1012 0 : PyErr_SetString(PyExc_SystemError, "impossible module node");
1013 0 : return 0;
1014 : }
1015 :
1016 : /* Check that the recursion depth counting balanced correctly */
1017 126295 : if (res && state.recursion_depth != starting_recursion_depth) {
1018 0 : PyErr_Format(PyExc_SystemError,
1019 : "AST validator recursion depth mismatch (before=%d, after=%d)",
1020 : starting_recursion_depth, state.recursion_depth);
1021 0 : return 0;
1022 : }
1023 126295 : return res;
1024 : }
1025 :
1026 : PyObject *
1027 939769 : _PyAST_GetDocString(asdl_stmt_seq *body)
1028 : {
1029 939769 : if (!asdl_seq_LEN(body)) {
1030 1358 : return NULL;
1031 : }
1032 938411 : stmt_ty st = asdl_seq_GET(body, 0);
1033 938411 : if (st->kind != Expr_kind) {
1034 647038 : return NULL;
1035 : }
1036 291373 : expr_ty e = st->v.Expr.value;
1037 291373 : if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
1038 204592 : return e->v.Constant.value;
1039 : }
1040 86781 : return NULL;
1041 : }
|