Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Parser/tokenizer.h
Line
Count
Source
1
#ifndef Py_TOKENIZER_H
2
#define Py_TOKENIZER_H
3
#ifdef __cplusplus
4
extern "C" {
5
#endif
6
7
#include "object.h"
8
9
/* Tokenizer interface */
10
11
#include "pycore_token.h" /* For token types */
12
13
#define MAXINDENT 100   /* Max indentation level */
14
#define MAXLEVEL 200    /* Max parentheses level */
15
16
enum decoding_state {
17
    STATE_INIT,
18
    STATE_SEEK_CODING,
19
    STATE_NORMAL
20
};
21
22
enum interactive_underflow_t {
23
    /* Normal mode of operation: return a new token when asked in interactie mode */
24
    IUNDERFLOW_NORMAL,
25
    /* Forcefully return ENDMARKER when asked for a new token in interactive mode. This
26
     * can be used to prevent the tokenizer to prompt the user for new tokens */
27
    IUNDERFLOW_STOP,
28
};
29
30
/* Tokenizer state */
31
struct tok_state {
32
    /* Input state; buf <= cur <= inp <= end */
33
    /* NB an entire line is held in the buffer */
34
    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
35
    char *cur;          /* Next character in buffer */
36
    char *inp;          /* End of data in buffer */
37
    int fp_interactive; /* If the file descriptor is interactive */
38
    char *interactive_src_start; /* The start of the source parsed so far in interactive mode */
39
    char *interactive_src_end; /* The end of the source parsed so far in interactive mode */
40
    const char *end;    /* End of input buffer if buf != NULL */
41
    const char *start;  /* Start of current token if not NULL */
42
    int done;           /* E_OK normally, E_EOF at EOF, otherwise error code */
43
    /* NB If done != E_OK, cur must be == inp!!! */
44
    FILE *fp;           /* Rest of input; NULL if tokenizing a string */
45
    int tabsize;        /* Tab spacing */
46
    int indent;         /* Current indentation index */
47
    int indstack[MAXINDENT];            /* Stack of indents */
48
    int atbol;          /* Nonzero if at begin of new line */
49
    int pendin;         /* Pending indents (if > 0) or dedents (if < 0) */
50
    const char *prompt, *nextprompt;          /* For interactive prompting */
51
    int lineno;         /* Current line number */
52
    int first_lineno;   /* First line of a single line or multi line string
53
                           expression (cf. issue 16806) */
54
    int level;          /* () [] {} Parentheses nesting level */
55
            /* Used to allow free continuations inside them */
56
    char parenstack[MAXLEVEL];
57
    int parenlinenostack[MAXLEVEL];
58
    int parencolstack[MAXLEVEL];
59
    PyObject *filename;
60
    /* Stuff for checking on different tab sizes */
61
    int altindstack[MAXINDENT];         /* Stack of alternate indents */
62
    /* Stuff for PEP 0263 */
63
    enum decoding_state decoding_state;
64
    int decoding_erred;         /* whether erred in decoding  */
65
    char *encoding;         /* Source encoding. */
66
    int cont_line;          /* whether we are in a continuation line. */
67
    const char* line_start;     /* pointer to start of current line */
68
    const char* multi_line_start; /* pointer to start of first line of
69
                                     a single line or multi line string
70
                                     expression (cf. issue 16806) */
71
    PyObject *decoding_readline; /* open(...).readline */
72
    PyObject *decoding_buffer;
73
    const char* enc;        /* Encoding for the current str. */
74
    char* str;          /* Source string being tokenized (if tokenizing from a string)*/
75
    char* input;       /* Tokenizer's newline translated copy of the string. */
76
77
    int type_comments;      /* Whether to look for type comments */
78
79
    /* async/await related fields (still needed depending on feature_version) */
80
    int async_hacks;     /* =1 if async/await aren't always keywords */
81
    int async_def;        /* =1 if tokens are inside an 'async def' body. */
82
    int async_def_indent; /* Indentation level of the outermost 'async def'. */
83
    int async_def_nl;     /* =1 if the outermost 'async def' had at least one
84
                             NEWLINE token after it. */
85
    /* How to proceed when asked for a new token in interactive mode */
86
    enum interactive_underflow_t interactive_underflow;
87
#ifdef Py_DEBUG
88
    int debug;
89
#endif
90
};
91
92
extern struct tok_state *_PyTokenizer_FromString(const char *, int);
93
extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int);
94
extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
95
                                              const char *, const char *);
96
extern void _PyTokenizer_Free(struct tok_state *);
97
extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **);
98
99
#define tok_dump _Py_tok_dump
100
101
#ifdef __cplusplus
102
}
103
#endif
104
#endif /* !Py_TOKENIZER_H */