/home/mdboom/Work/builds/cpython/Python/Python-tokenize.c
Line | Count | Source (jump to first uncovered line) |
1 | #include "Python.h" |
2 | #include "../Parser/tokenizer.h" |
3 | |
4 | static struct PyModuleDef _tokenizemodule; |
5 | |
6 | typedef struct { |
7 | PyTypeObject *TokenizerIter; |
8 | } tokenize_state; |
9 | |
10 | static tokenize_state * |
11 | get_tokenize_state(PyObject *module) { |
12 | return (tokenize_state *)PyModule_GetState(module); |
13 | } |
14 | |
15 | #define _tokenize_get_state_by_type(type) \ |
16 | get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) |
17 | |
18 | #include "clinic/Python-tokenize.c.h" |
19 | |
20 | /*[clinic input] |
21 | module _tokenizer |
22 | class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" |
23 | [clinic start generated code]*/ |
24 | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ |
25 | |
26 | typedef struct |
27 | { |
28 | PyObject_HEAD struct tok_state *tok; |
29 | } tokenizeriterobject; |
30 | |
31 | /*[clinic input] |
32 | @classmethod |
33 | _tokenizer.tokenizeriter.__new__ as tokenizeriter_new |
34 | |
35 | source: str |
36 | [clinic start generated code]*/ |
37 | |
38 | static PyObject * |
39 | tokenizeriter_new_impl(PyTypeObject *type, const char *source) |
40 | /*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/ |
41 | { |
42 | tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); |
43 | if (self == NULL) { Branch (43:9): [True: 0, False: 114]
|
44 | return NULL; |
45 | } |
46 | PyObject *filename = PyUnicode_FromString("<string>"); |
47 | if (filename == NULL) { Branch (47:9): [True: 0, False: 114]
|
48 | return NULL; |
49 | } |
50 | self->tok = _PyTokenizer_FromUTF8(source, 1); |
51 | if (self->tok == NULL) { Branch (51:9): [True: 0, False: 114]
|
52 | Py_DECREF(filename); |
53 | return NULL; |
54 | } |
55 | self->tok->filename = filename; |
56 | return (PyObject *)self; |
57 | } |
58 | |
59 | static PyObject * |
60 | tokenizeriter_next(tokenizeriterobject *it) |
61 | { |
62 | const char *start; |
63 | const char *end; |
64 | int type = _PyTokenizer_Get(it->tok, &start, &end); |
65 | if (type == ERRORTOKEN && PyErr_Occurred()30 ) { Branch (65:9): [True: 30, False: 1.01k]
Branch (65:31): [True: 30, False: 0]
|
66 | return NULL; |
67 | } |
68 | if (type == ERRORTOKEN || type == ENDMARKER) { Branch (68:9): [True: 0, False: 1.01k]
Branch (68:31): [True: 84, False: 931]
|
69 | PyErr_SetString(PyExc_StopIteration, "EOF"); |
70 | return NULL; |
71 | } |
72 | PyObject *str = NULL; |
73 | if (start == NULL || end == NULL907 ) { Branch (73:9): [True: 24, False: 907]
Branch (73:26): [True: 0, False: 907]
|
74 | str = PyUnicode_FromString(""); |
75 | } |
76 | else { |
77 | str = PyUnicode_FromStringAndSize(start, end - start); |
78 | } |
79 | if (str == NULL) { Branch (79:9): [True: 0, False: 931]
|
80 | return NULL; |
81 | } |
82 | |
83 | Py_ssize_t size = it->tok->inp - it->tok->buf; |
84 | PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); |
85 | if (line == NULL) { Branch (85:9): [True: 0, False: 931]
|
86 | Py_DECREF(str); |
87 | return NULL; |
88 | } |
89 | const char *line_start = type == STRING ? it->tok->multi_line_start59 : it->tok->line_start872 ; Branch (89:30): [True: 59, False: 872]
|
90 | int lineno = type == STRING ? it->tok->first_lineno59 : it->tok->lineno872 ; Branch (90:18): [True: 59, False: 872]
|
91 | int end_lineno = it->tok->lineno; |
92 | int col_offset = -1; |
93 | int end_col_offset = -1; |
94 | if (start != NULL && start >= line_start907 ) { Branch (94:9): [True: 907, False: 24]
Branch (94:26): [True: 907, False: 0]
|
95 | col_offset = (int)(start - line_start); |
96 | } |
97 | if (end != NULL && end >= it->tok->line_start907 ) { Branch (97:9): [True: 907, False: 24]
Branch (97:24): [True: 907, False: 0]
|
98 | end_col_offset = (int)(end - it->tok->line_start); |
99 | } |
100 | |
101 | return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); |
102 | } |
103 | |
104 | static void |
105 | tokenizeriter_dealloc(tokenizeriterobject *it) |
106 | { |
107 | PyTypeObject *tp = Py_TYPE(it); |
108 | _PyTokenizer_Free(it->tok); |
109 | tp->tp_free(it); |
110 | Py_DECREF(tp); |
111 | } |
112 | |
113 | static PyType_Slot tokenizeriter_slots[] = { |
114 | {Py_tp_new, tokenizeriter_new}, |
115 | {Py_tp_dealloc, tokenizeriter_dealloc}, |
116 | {Py_tp_getattro, PyObject_GenericGetAttr}, |
117 | {Py_tp_iter, PyObject_SelfIter}, |
118 | {Py_tp_iternext, tokenizeriter_next}, |
119 | {0, NULL}, |
120 | }; |
121 | |
122 | static PyType_Spec tokenizeriter_spec = { |
123 | .name = "_tokenize.TokenizerIter", |
124 | .basicsize = sizeof(tokenizeriterobject), |
125 | .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), |
126 | .slots = tokenizeriter_slots, |
127 | }; |
128 | |
129 | static int |
130 | tokenizemodule_exec(PyObject *m) |
131 | { |
132 | tokenize_state *state = get_tokenize_state(m); |
133 | if (state == NULL) { Branch (133:9): [True: 0, False: 1]
|
134 | return -1; |
135 | } |
136 | |
137 | state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); |
138 | if (state->TokenizerIter == NULL) { Branch (138:9): [True: 0, False: 1]
|
139 | return -1; |
140 | } |
141 | if (PyModule_AddType(m, state->TokenizerIter) < 0) { Branch (141:9): [True: 0, False: 1]
|
142 | return -1; |
143 | } |
144 | |
145 | return 0; |
146 | } |
147 | |
148 | static PyMethodDef tokenize_methods[] = { |
149 | {NULL, NULL, 0, NULL} /* Sentinel */ |
150 | }; |
151 | |
152 | static PyModuleDef_Slot tokenizemodule_slots[] = { |
153 | {Py_mod_exec, tokenizemodule_exec}, |
154 | {0, NULL} |
155 | }; |
156 | |
157 | static int |
158 | tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) |
159 | { |
160 | tokenize_state *state = get_tokenize_state(m); |
161 | Py_VISIT(state->TokenizerIter); |
162 | return 0; |
163 | } |
164 | |
165 | static int |
166 | tokenizemodule_clear(PyObject *m) |
167 | { |
168 | tokenize_state *state = get_tokenize_state(m); |
169 | Py_CLEAR(state->TokenizerIter); |
170 | return 0; |
171 | } |
172 | |
173 | static void |
174 | tokenizemodule_free(void *m) |
175 | { |
176 | tokenizemodule_clear((PyObject *)m); |
177 | } |
178 | |
179 | static struct PyModuleDef _tokenizemodule = { |
180 | PyModuleDef_HEAD_INIT, |
181 | .m_name = "_tokenize", |
182 | .m_size = sizeof(tokenize_state), |
183 | .m_slots = tokenizemodule_slots, |
184 | .m_methods = tokenize_methods, |
185 | .m_traverse = tokenizemodule_traverse, |
186 | .m_clear = tokenizemodule_clear, |
187 | .m_free = tokenizemodule_free, |
188 | }; |
189 | |
190 | PyMODINIT_FUNC |
191 | PyInit__tokenize(void) |
192 | { |
193 | return PyModuleDef_Init(&_tokenizemodule); |
194 | } |