Line data Source code
1 : #include "Python.h"
2 : #include <ctype.h>
3 :
4 : #include "structmember.h" // PyMemberDef
5 : #include "expat.h"
6 :
7 : #include "pyexpat.h"
8 :
9 : /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
10 : included methods. */
11 : /*[clinic input]
12 : module pyexpat
13 : [clinic start generated code]*/
14 : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
15 :
16 : #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
17 :
18 : static XML_Memory_Handling_Suite ExpatMemoryHandler = {
19 : PyObject_Malloc, PyObject_Realloc, PyObject_Free};
20 :
21 : enum HandlerTypes {
22 : StartElement,
23 : EndElement,
24 : ProcessingInstruction,
25 : CharacterData,
26 : UnparsedEntityDecl,
27 : NotationDecl,
28 : StartNamespaceDecl,
29 : EndNamespaceDecl,
30 : Comment,
31 : StartCdataSection,
32 : EndCdataSection,
33 : Default,
34 : DefaultHandlerExpand,
35 : NotStandalone,
36 : ExternalEntityRef,
37 : StartDoctypeDecl,
38 : EndDoctypeDecl,
39 : EntityDecl,
40 : XmlDecl,
41 : ElementDecl,
42 : AttlistDecl,
43 : #if XML_COMBINED_VERSION >= 19504
44 : SkippedEntity,
45 : #endif
46 : _DummyDecl
47 : };
48 :
49 : typedef struct {
50 : PyTypeObject *xml_parse_type;
51 : PyObject *error;
52 : PyObject *str_read;
53 : } pyexpat_state;
54 :
55 : static inline pyexpat_state*
56 3227 : pyexpat_get_state(PyObject *module)
57 : {
58 3227 : void *state = PyModule_GetState(module);
59 3227 : assert(state != NULL);
60 3227 : return (pyexpat_state *)state;
61 : }
62 :
63 : /* ----------------------------------------------------- */
64 :
65 : /* Declarations for objects of type xmlparser */
66 :
67 : typedef struct {
68 : PyObject_HEAD
69 :
70 : XML_Parser itself;
71 : int ordered_attributes; /* Return attributes as a list. */
72 : int specified_attributes; /* Report only specified attributes. */
73 : int in_callback; /* Is a callback active? */
74 : int ns_prefixes; /* Namespace-triplets mode? */
75 : XML_Char *buffer; /* Buffer used when accumulating characters */
76 : /* NULL if not enabled */
77 : int buffer_size; /* Size of buffer, in XML_Char units */
78 : int buffer_used; /* Buffer units in use */
79 : PyObject *intern; /* Dictionary to intern strings */
80 : PyObject **handlers;
81 : } xmlparseobject;
82 :
83 : #include "clinic/pyexpat.c.h"
84 :
85 : #define CHARACTER_DATA_BUFFER_SIZE 8192
86 :
87 : typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
88 : typedef void* xmlhandler;
89 :
90 : struct HandlerInfo {
91 : const char *name;
92 : xmlhandlersetter setter;
93 : xmlhandler handler;
94 : PyGetSetDef getset;
95 : };
96 :
97 : static struct HandlerInfo handler_info[64];
98 :
99 : /* Set an integer attribute on the error object; return true on success,
100 : * false on an exception.
101 : */
102 : static int
103 96 : set_error_attr(PyObject *err, const char *name, int value)
104 : {
105 96 : PyObject *v = PyLong_FromLong(value);
106 :
107 96 : if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 0 : Py_XDECREF(v);
109 0 : return 0;
110 : }
111 96 : Py_DECREF(v);
112 96 : return 1;
113 : }
114 :
115 : /* Build and set an Expat exception, including positioning
116 : * information. Always returns NULL.
117 : */
118 : static PyObject *
119 32 : set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
120 : {
121 : PyObject *err;
122 : PyObject *buffer;
123 32 : XML_Parser parser = self->itself;
124 32 : int lineno = XML_GetErrorLineNumber(parser);
125 32 : int column = XML_GetErrorColumnNumber(parser);
126 :
127 32 : buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 : XML_ErrorString(code), lineno, column);
129 32 : if (buffer == NULL)
130 0 : return NULL;
131 32 : err = PyObject_CallOneArg(state->error, buffer);
132 32 : Py_DECREF(buffer);
133 32 : if ( err != NULL
134 32 : && set_error_attr(err, "code", code)
135 32 : && set_error_attr(err, "offset", column)
136 32 : && set_error_attr(err, "lineno", lineno)) {
137 32 : PyErr_SetObject(state->error, err);
138 : }
139 32 : Py_XDECREF(err);
140 32 : return NULL;
141 : }
142 :
143 : static int
144 27517 : have_handler(xmlparseobject *self, int type)
145 : {
146 27517 : PyObject *handler = self->handlers[type];
147 27517 : return handler != NULL;
148 : }
149 :
150 : /* Convert a string of XML_Chars into a Unicode string.
151 : Returns None if str is a null pointer. */
152 :
153 : static PyObject *
154 15954 : conv_string_to_unicode(const XML_Char *str)
155 : {
156 : /* XXX currently this code assumes that XML_Char is 8-bit,
157 : and hence in UTF-8. */
158 : /* UTF-8 from Expat, Unicode desired */
159 15954 : if (str == NULL) {
160 214 : Py_RETURN_NONE;
161 : }
162 15740 : return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163 : }
164 :
165 : static PyObject *
166 13037 : conv_string_len_to_unicode(const XML_Char *str, int len)
167 : {
168 : /* XXX currently this code assumes that XML_Char is 8-bit,
169 : and hence in UTF-8. */
170 : /* UTF-8 from Expat, Unicode desired */
171 13037 : if (str == NULL) {
172 7 : Py_RETURN_NONE;
173 : }
174 13030 : return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175 : }
176 :
177 : /* Callback routines */
178 :
179 : static void clear_handlers(xmlparseobject *self, int initial);
180 :
181 : /* This handler is used when an error has been detected, in the hope
182 : that actual parsing can be terminated early. This will only help
183 : if an external entity reference is encountered. */
184 : static int
185 0 : error_external_entity_ref_handler(XML_Parser parser,
186 : const XML_Char *context,
187 : const XML_Char *base,
188 : const XML_Char *systemId,
189 : const XML_Char *publicId)
190 : {
191 0 : return 0;
192 : }
193 :
194 : /* Dummy character data handler used when an error (exception) has
195 : been detected, and the actual parsing can be terminated early.
196 : This is needed since character data handler can't be safely removed
197 : from within the character data handler, but can be replaced. It is
198 : used only from the character data handler trampoline, and must be
199 : used right after `flag_error()` is called. */
200 : static void
201 0 : noop_character_data_handler(void *userData, const XML_Char *data, int len)
202 : {
203 : /* Do nothing. */
204 0 : }
205 :
206 : static void
207 30 : flag_error(xmlparseobject *self)
208 : {
209 30 : clear_handlers(self, 0);
210 30 : XML_SetExternalEntityRefHandler(self->itself,
211 : error_external_entity_ref_handler);
212 30 : }
213 :
214 : static PyObject*
215 27514 : call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
216 : xmlparseobject *self)
217 : {
218 : PyObject *res;
219 :
220 27514 : res = PyObject_Call(func, args, NULL);
221 27514 : if (res == NULL) {
222 30 : _PyTraceback_Add(funcname, __FILE__, lineno);
223 30 : XML_StopParser(self->itself, XML_FALSE);
224 : }
225 27514 : return res;
226 : }
227 :
228 : static PyObject*
229 15232 : string_intern(xmlparseobject *self, const char* str)
230 : {
231 15232 : PyObject *result = conv_string_to_unicode(str);
232 : PyObject *value;
233 : /* result can be NULL if the unicode conversion failed. */
234 15232 : if (!result)
235 0 : return result;
236 15232 : if (!self->intern)
237 1624 : return result;
238 13608 : value = PyDict_GetItemWithError(self->intern, result);
239 13608 : if (!value) {
240 9006 : if (!PyErr_Occurred() &&
241 4503 : PyDict_SetItem(self->intern, result, result) == 0)
242 : {
243 4503 : return result;
244 : }
245 : else {
246 0 : Py_DECREF(result);
247 0 : return NULL;
248 : }
249 : }
250 9105 : Py_INCREF(value);
251 9105 : Py_DECREF(result);
252 9105 : return value;
253 : }
254 :
255 : /* Return 0 on success, -1 on exception.
256 : * flag_error() will be called before return if needed.
257 : */
258 : static int
259 11774 : call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260 : {
261 : PyObject *args;
262 : PyObject *temp;
263 :
264 11774 : if (!have_handler(self, CharacterData))
265 0 : return -1;
266 :
267 11774 : args = PyTuple_New(1);
268 11774 : if (args == NULL)
269 0 : return -1;
270 11774 : temp = (conv_string_len_to_unicode(buffer, len));
271 11774 : if (temp == NULL) {
272 0 : Py_DECREF(args);
273 0 : flag_error(self);
274 0 : XML_SetCharacterDataHandler(self->itself,
275 : noop_character_data_handler);
276 0 : return -1;
277 : }
278 11774 : PyTuple_SET_ITEM(args, 0, temp);
279 : /* temp is now a borrowed reference; consider it unused. */
280 11774 : self->in_callback = 1;
281 11774 : temp = call_with_frame("CharacterData", __LINE__,
282 11774 : self->handlers[CharacterData], args, self);
283 : /* temp is an owned reference again, or NULL */
284 11774 : self->in_callback = 0;
285 11774 : Py_DECREF(args);
286 11774 : if (temp == NULL) {
287 1 : flag_error(self);
288 1 : XML_SetCharacterDataHandler(self->itself,
289 : noop_character_data_handler);
290 1 : return -1;
291 : }
292 11773 : Py_DECREF(temp);
293 11773 : return 0;
294 : }
295 :
296 : static int
297 19380 : flush_character_buffer(xmlparseobject *self)
298 : {
299 : int rc;
300 19380 : if (self->buffer == NULL || self->buffer_used == 0)
301 17642 : return 0;
302 1738 : rc = call_character_handler(self, self->buffer, self->buffer_used);
303 1738 : self->buffer_used = 0;
304 1738 : return rc;
305 : }
306 :
307 : static void
308 13595 : my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
309 : {
310 13595 : xmlparseobject *self = (xmlparseobject *) userData;
311 :
312 13595 : if (PyErr_Occurred())
313 1 : return;
314 :
315 13594 : if (self->buffer == NULL)
316 10036 : call_character_handler(self, data, len);
317 : else {
318 3558 : if ((self->buffer_used + len) > self->buffer_size) {
319 3 : if (flush_character_buffer(self) < 0)
320 0 : return;
321 : /* handler might have changed; drop the rest on the floor
322 : * if there isn't a handler anymore
323 : */
324 3 : if (!have_handler(self, CharacterData))
325 0 : return;
326 : }
327 3558 : if (len > self->buffer_size) {
328 0 : call_character_handler(self, data, len);
329 0 : self->buffer_used = 0;
330 : }
331 : else {
332 3558 : memcpy(self->buffer + self->buffer_used,
333 : data, len * sizeof(XML_Char));
334 3558 : self->buffer_used += len;
335 : }
336 : }
337 : }
338 :
339 : static void
340 7064 : my_StartElementHandler(void *userData,
341 : const XML_Char *name, const XML_Char *atts[])
342 : {
343 7064 : xmlparseobject *self = (xmlparseobject *)userData;
344 :
345 7064 : if (have_handler(self, StartElement)) {
346 : PyObject *container, *rv, *args;
347 : int i, max;
348 :
349 7064 : if (PyErr_Occurred())
350 0 : return;
351 :
352 7064 : if (flush_character_buffer(self) < 0)
353 0 : return;
354 : /* Set max to the number of slots filled in atts[]; max/2 is
355 : * the number of attributes we need to process.
356 : */
357 7064 : if (self->specified_attributes) {
358 487 : max = XML_GetSpecifiedAttributeCount(self->itself);
359 : }
360 : else {
361 6577 : max = 0;
362 7078 : while (atts[max] != NULL)
363 501 : max += 2;
364 : }
365 : /* Build the container. */
366 7064 : if (self->ordered_attributes)
367 1428 : container = PyList_New(max);
368 : else
369 5636 : container = PyDict_New();
370 7064 : if (container == NULL) {
371 0 : flag_error(self);
372 0 : return;
373 : }
374 7600 : for (i = 0; i < max; i += 2) {
375 536 : PyObject *n = string_intern(self, (XML_Char *) atts[i]);
376 : PyObject *v;
377 536 : if (n == NULL) {
378 0 : flag_error(self);
379 0 : Py_DECREF(container);
380 0 : return;
381 : }
382 536 : v = conv_string_to_unicode((XML_Char *) atts[i+1]);
383 536 : if (v == NULL) {
384 0 : flag_error(self);
385 0 : Py_DECREF(container);
386 0 : Py_DECREF(n);
387 0 : return;
388 : }
389 536 : if (self->ordered_attributes) {
390 278 : PyList_SET_ITEM(container, i, n);
391 278 : PyList_SET_ITEM(container, i+1, v);
392 : }
393 258 : else if (PyDict_SetItem(container, n, v)) {
394 0 : flag_error(self);
395 0 : Py_DECREF(n);
396 0 : Py_DECREF(v);
397 0 : Py_DECREF(container);
398 0 : return;
399 : }
400 : else {
401 258 : Py_DECREF(n);
402 258 : Py_DECREF(v);
403 : }
404 : }
405 7064 : args = string_intern(self, name);
406 7064 : if (args == NULL) {
407 0 : Py_DECREF(container);
408 0 : return;
409 : }
410 7064 : args = Py_BuildValue("(NN)", args, container);
411 7064 : if (args == NULL) {
412 0 : return;
413 : }
414 : /* Container is now a borrowed reference; ignore it. */
415 7064 : self->in_callback = 1;
416 7064 : rv = call_with_frame("StartElement", __LINE__,
417 7064 : self->handlers[StartElement], args, self);
418 7064 : self->in_callback = 0;
419 7064 : Py_DECREF(args);
420 7064 : if (rv == NULL) {
421 4 : flag_error(self);
422 4 : return;
423 : }
424 7060 : Py_DECREF(rv);
425 : }
426 : }
427 :
428 : #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 : RETURN, GETUSERDATA) \
430 : static RC \
431 : my_##NAME##Handler PARAMS {\
432 : xmlparseobject *self = GETUSERDATA ; \
433 : PyObject *args = NULL; \
434 : PyObject *rv = NULL; \
435 : INIT \
436 : \
437 : if (have_handler(self, NAME)) { \
438 : if (PyErr_Occurred()) \
439 : return RETURN; \
440 : if (flush_character_buffer(self) < 0) \
441 : return RETURN; \
442 : args = Py_BuildValue PARAM_FORMAT ;\
443 : if (!args) { flag_error(self); return RETURN;} \
444 : self->in_callback = 1; \
445 : rv = call_with_frame(#NAME,__LINE__, \
446 : self->handlers[NAME], args, self); \
447 : self->in_callback = 0; \
448 : Py_DECREF(args); \
449 : if (rv == NULL) { \
450 : flag_error(self); \
451 : return RETURN; \
452 : } \
453 : CONVERSION \
454 : Py_DECREF(rv); \
455 : } \
456 : return RETURN; \
457 : }
458 :
459 : #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
460 : RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 : (xmlparseobject *)userData)
462 :
463 : #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
464 : RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 : rc = PyLong_AsLong(rv);, rc, \
466 : (xmlparseobject *)userData)
467 :
468 7004 : VOID_HANDLER(EndElement,
469 : (void *userData, const XML_Char *name),
470 : ("(N)", string_intern(self, name)))
471 :
472 28 : VOID_HANDLER(ProcessingInstruction,
473 : (void *userData,
474 : const XML_Char *target,
475 : const XML_Char *data),
476 : ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
477 :
478 4 : VOID_HANDLER(UnparsedEntityDecl,
479 : (void *userData,
480 : const XML_Char *entityName,
481 : const XML_Char *base,
482 : const XML_Char *systemId,
483 : const XML_Char *publicId,
484 : const XML_Char *notationName),
485 : ("(NNNNN)",
486 : string_intern(self, entityName), string_intern(self, base),
487 : string_intern(self, systemId), string_intern(self, publicId),
488 : string_intern(self, notationName)))
489 :
490 13 : VOID_HANDLER(EntityDecl,
491 : (void *userData,
492 : const XML_Char *entityName,
493 : int is_parameter_entity,
494 : const XML_Char *value,
495 : int value_length,
496 : const XML_Char *base,
497 : const XML_Char *systemId,
498 : const XML_Char *publicId,
499 : const XML_Char *notationName),
500 : ("NiNNNNN",
501 : string_intern(self, entityName), is_parameter_entity,
502 : (conv_string_len_to_unicode(value, value_length)),
503 : string_intern(self, base), string_intern(self, systemId),
504 : string_intern(self, publicId),
505 : string_intern(self, notationName)))
506 :
507 29 : VOID_HANDLER(XmlDecl,
508 : (void *userData,
509 : const XML_Char *version,
510 : const XML_Char *encoding,
511 : int standalone),
512 : ("(O&O&i)",
513 : conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
514 : standalone))
515 :
516 : static PyObject *
517 5 : conv_content_model(XML_Content * const model,
518 : PyObject *(*conv_string)(const XML_Char *))
519 : {
520 5 : PyObject *result = NULL;
521 5 : PyObject *children = PyTuple_New(model->numchildren);
522 : int i;
523 :
524 5 : if (children != NULL) {
525 5 : assert(model->numchildren < INT_MAX);
526 5 : for (i = 0; i < (int)model->numchildren; ++i) {
527 0 : PyObject *child = conv_content_model(&model->children[i],
528 : conv_string);
529 0 : if (child == NULL) {
530 0 : Py_XDECREF(children);
531 0 : return NULL;
532 : }
533 0 : PyTuple_SET_ITEM(children, i, child);
534 : }
535 5 : result = Py_BuildValue("(iiO&N)",
536 5 : model->type, model->quant,
537 : conv_string,model->name, children);
538 : }
539 5 : return result;
540 : }
541 :
542 : static void
543 5 : my_ElementDeclHandler(void *userData,
544 : const XML_Char *name,
545 : XML_Content *model)
546 : {
547 5 : xmlparseobject *self = (xmlparseobject *)userData;
548 5 : PyObject *args = NULL;
549 :
550 5 : if (have_handler(self, ElementDecl)) {
551 5 : PyObject *rv = NULL;
552 : PyObject *modelobj, *nameobj;
553 :
554 5 : if (PyErr_Occurred())
555 0 : return;
556 :
557 5 : if (flush_character_buffer(self) < 0)
558 0 : goto finally;
559 5 : modelobj = conv_content_model(model, (conv_string_to_unicode));
560 5 : if (modelobj == NULL) {
561 0 : flag_error(self);
562 0 : goto finally;
563 : }
564 5 : nameobj = string_intern(self, name);
565 5 : if (nameobj == NULL) {
566 0 : Py_DECREF(modelobj);
567 0 : flag_error(self);
568 0 : goto finally;
569 : }
570 5 : args = Py_BuildValue("NN", nameobj, modelobj);
571 5 : if (args == NULL) {
572 0 : flag_error(self);
573 0 : goto finally;
574 : }
575 5 : self->in_callback = 1;
576 5 : rv = call_with_frame("ElementDecl", __LINE__,
577 5 : self->handlers[ElementDecl], args, self);
578 5 : self->in_callback = 0;
579 5 : if (rv == NULL) {
580 0 : flag_error(self);
581 0 : goto finally;
582 : }
583 5 : Py_DECREF(rv);
584 : }
585 0 : finally:
586 5 : Py_XDECREF(args);
587 5 : XML_FreeContentModel(self->itself, model);
588 5 : return;
589 : }
590 :
591 15 : VOID_HANDLER(AttlistDecl,
592 : (void *userData,
593 : const XML_Char *elname,
594 : const XML_Char *attname,
595 : const XML_Char *att_type,
596 : const XML_Char *dflt,
597 : int isrequired),
598 : ("(NNO&O&i)",
599 : string_intern(self, elname), string_intern(self, attname),
600 : conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
601 : isrequired))
602 :
603 : #if XML_COMBINED_VERSION >= 19504
604 3 : VOID_HANDLER(SkippedEntity,
605 : (void *userData,
606 : const XML_Char *entityName,
607 : int is_parameter_entity),
608 : ("Ni",
609 : string_intern(self, entityName), is_parameter_entity))
610 : #endif
611 :
612 6 : VOID_HANDLER(NotationDecl,
613 : (void *userData,
614 : const XML_Char *notationName,
615 : const XML_Char *base,
616 : const XML_Char *systemId,
617 : const XML_Char *publicId),
618 : ("(NNNN)",
619 : string_intern(self, notationName), string_intern(self, base),
620 : string_intern(self, systemId), string_intern(self, publicId)))
621 :
622 168 : VOID_HANDLER(StartNamespaceDecl,
623 : (void *userData,
624 : const XML_Char *prefix,
625 : const XML_Char *uri),
626 : ("(NN)",
627 : string_intern(self, prefix), string_intern(self, uri)))
628 :
629 30 : VOID_HANDLER(EndNamespaceDecl,
630 : (void *userData,
631 : const XML_Char *prefix),
632 : ("(N)", string_intern(self, prefix)))
633 :
634 54 : VOID_HANDLER(Comment,
635 : (void *userData, const XML_Char *data),
636 : ("(O&)", conv_string_to_unicode ,data))
637 :
638 7 : VOID_HANDLER(StartCdataSection,
639 : (void *userData),
640 : ("()"))
641 :
642 7 : VOID_HANDLER(EndCdataSection,
643 : (void *userData),
644 : ("()"))
645 :
646 153 : VOID_HANDLER(Default,
647 : (void *userData, const XML_Char *s, int len),
648 : ("(N)", (conv_string_len_to_unicode(s,len))))
649 :
650 1097 : VOID_HANDLER(DefaultHandlerExpand,
651 : (void *userData, const XML_Char *s, int len),
652 : ("(N)", (conv_string_len_to_unicode(s,len))))
653 : #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
654 :
655 6 : INT_HANDLER(NotStandalone,
656 : (void *userData),
657 : ("()"))
658 :
659 11 : RC_HANDLER(int, ExternalEntityRef,
660 : (XML_Parser parser,
661 : const XML_Char *context,
662 : const XML_Char *base,
663 : const XML_Char *systemId,
664 : const XML_Char *publicId),
665 : int rc=0;,
666 : ("(O&NNN)",
667 : conv_string_to_unicode ,context, string_intern(self, base),
668 : string_intern(self, systemId), string_intern(self, publicId)),
669 : rc = PyLong_AsLong(rv);, rc,
670 : XML_GetUserData(parser))
671 :
672 : /* XXX UnknownEncodingHandler */
673 :
674 18 : VOID_HANDLER(StartDoctypeDecl,
675 : (void *userData, const XML_Char *doctypeName,
676 : const XML_Char *sysid, const XML_Char *pubid,
677 : int has_internal_subset),
678 : ("(NNNi)", string_intern(self, doctypeName),
679 : string_intern(self, sysid), string_intern(self, pubid),
680 : has_internal_subset))
681 :
682 18 : VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
683 :
684 : /* ---------------------------------------------------------------- */
685 : /*[clinic input]
686 : class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687 : [clinic start generated code]*/
688 : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689 :
690 :
691 : static PyObject *
692 2353 : get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
693 : {
694 2353 : if (PyErr_Occurred()) {
695 36 : return NULL;
696 : }
697 2317 : if (rv == 0) {
698 32 : return set_error(state, self, XML_GetErrorCode(self->itself));
699 : }
700 2285 : if (flush_character_buffer(self) < 0) {
701 0 : return NULL;
702 : }
703 2285 : return PyLong_FromLong(rv);
704 : }
705 :
706 : #define MAX_CHUNK_SIZE (1 << 20)
707 :
708 : /*[clinic input]
709 : pyexpat.xmlparser.Parse
710 :
711 : cls: defining_class
712 : data: object
713 : isfinal: bool(accept={int}) = False
714 : /
715 :
716 : Parse XML data.
717 :
718 : `isfinal' should be true at end of input.
719 : [clinic start generated code]*/
720 :
721 : static PyObject *
722 2211 : pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 : PyObject *data, int isfinal)
724 : /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
725 : {
726 : const char *s;
727 : Py_ssize_t slen;
728 : Py_buffer view;
729 : int rc;
730 2211 : pyexpat_state *state = PyType_GetModuleState(cls);
731 :
732 2211 : if (PyUnicode_Check(data)) {
733 904 : view.buf = NULL;
734 904 : s = PyUnicode_AsUTF8AndSize(data, &slen);
735 904 : if (s == NULL)
736 0 : return NULL;
737 : /* Explicitly set UTF-8 encoding. Return code ignored. */
738 904 : (void)XML_SetEncoding(self->itself, "utf-8");
739 : }
740 : else {
741 1307 : if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 0 : return NULL;
743 1307 : s = view.buf;
744 1307 : slen = view.len;
745 : }
746 :
747 : static_assert(MAX_CHUNK_SIZE <= INT_MAX,
748 : "MAX_CHUNK_SIZE is larger than INT_MAX");
749 2211 : while (slen > MAX_CHUNK_SIZE) {
750 0 : rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
751 0 : if (!rc)
752 0 : goto done;
753 0 : s += MAX_CHUNK_SIZE;
754 0 : slen -= MAX_CHUNK_SIZE;
755 : }
756 :
757 2211 : assert(slen <= INT_MAX);
758 2211 : rc = XML_Parse(self->itself, s, (int)slen, isfinal);
759 :
760 2211 : done:
761 2211 : if (view.buf != NULL) {
762 1307 : PyBuffer_Release(&view);
763 : }
764 2211 : return get_parse_result(state, self, rc);
765 : }
766 :
767 : /* File reading copied from cPickle */
768 :
769 : #define BUF_SIZE 2048
770 :
771 : static int
772 308 : readinst(char *buf, int buf_size, PyObject *meth)
773 : {
774 : PyObject *str;
775 : Py_ssize_t len;
776 : const char *ptr;
777 :
778 308 : str = PyObject_CallFunction(meth, "n", buf_size);
779 308 : if (str == NULL)
780 0 : goto error;
781 :
782 308 : if (PyBytes_Check(str))
783 308 : ptr = PyBytes_AS_STRING(str);
784 0 : else if (PyByteArray_Check(str))
785 0 : ptr = PyByteArray_AS_STRING(str);
786 : else {
787 0 : PyErr_Format(PyExc_TypeError,
788 : "read() did not return a bytes object (type=%.400s)",
789 0 : Py_TYPE(str)->tp_name);
790 0 : goto error;
791 : }
792 308 : len = Py_SIZE(str);
793 308 : if (len > buf_size) {
794 0 : PyErr_Format(PyExc_ValueError,
795 : "read() returned too much data: "
796 : "%i bytes requested, %zd returned",
797 : buf_size, len);
798 0 : goto error;
799 : }
800 308 : memcpy(buf, ptr, len);
801 308 : Py_DECREF(str);
802 : /* len <= buf_size <= INT_MAX */
803 308 : return (int)len;
804 :
805 0 : error:
806 0 : Py_XDECREF(str);
807 0 : return -1;
808 : }
809 :
810 : /*[clinic input]
811 : pyexpat.xmlparser.ParseFile
812 :
813 : cls: defining_class
814 : file: object
815 : /
816 :
817 : Parse XML data from file-like object.
818 : [clinic start generated code]*/
819 :
820 : static PyObject *
821 156 : pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
822 : PyObject *file)
823 : /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
824 : {
825 156 : int rv = 1;
826 156 : PyObject *readmethod = NULL;
827 :
828 156 : pyexpat_state *state = PyType_GetModuleState(cls);
829 :
830 156 : if (_PyObject_LookupAttr(file, state->str_read, &readmethod) < 0) {
831 0 : return NULL;
832 : }
833 156 : if (readmethod == NULL) {
834 0 : PyErr_SetString(PyExc_TypeError,
835 : "argument must have 'read' attribute");
836 0 : return NULL;
837 : }
838 153 : for (;;) {
839 : int bytes_read;
840 309 : void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
841 309 : if (buf == NULL) {
842 1 : Py_XDECREF(readmethod);
843 1 : return get_parse_result(state, self, 0);
844 : }
845 :
846 308 : bytes_read = readinst(buf, BUF_SIZE, readmethod);
847 308 : if (bytes_read < 0) {
848 0 : Py_DECREF(readmethod);
849 0 : return NULL;
850 : }
851 308 : rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
852 308 : if (PyErr_Occurred()) {
853 14 : Py_XDECREF(readmethod);
854 14 : return NULL;
855 : }
856 :
857 294 : if (!rv || bytes_read == 0)
858 : break;
859 : }
860 141 : Py_XDECREF(readmethod);
861 141 : return get_parse_result(state, self, rv);
862 : }
863 :
864 : /*[clinic input]
865 : pyexpat.xmlparser.SetBase
866 :
867 : base: str
868 : /
869 :
870 : Set the base URL for the parser.
871 : [clinic start generated code]*/
872 :
873 : static PyObject *
874 51 : pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
875 : /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
876 : {
877 51 : if (!XML_SetBase(self->itself, base)) {
878 0 : return PyErr_NoMemory();
879 : }
880 51 : Py_RETURN_NONE;
881 : }
882 :
883 : /*[clinic input]
884 : pyexpat.xmlparser.GetBase
885 :
886 : Return base URL string for the parser.
887 : [clinic start generated code]*/
888 :
889 : static PyObject *
890 0 : pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
891 : /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
892 : {
893 0 : return Py_BuildValue("z", XML_GetBase(self->itself));
894 : }
895 :
896 : /*[clinic input]
897 : pyexpat.xmlparser.GetInputContext
898 :
899 : Return the untranslated text of the input that caused the current event.
900 :
901 : If the event was generated by a large amount of text (such as a start tag
902 : for an element with many attributes), not all of the text may be available.
903 : [clinic start generated code]*/
904 :
905 : static PyObject *
906 0 : pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
907 : /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
908 : {
909 0 : if (self->in_callback) {
910 : int offset, size;
911 : const char *buffer
912 0 : = XML_GetInputContext(self->itself, &offset, &size);
913 :
914 0 : if (buffer != NULL)
915 0 : return PyBytes_FromStringAndSize(buffer + offset,
916 0 : size - offset);
917 : else
918 0 : Py_RETURN_NONE;
919 : }
920 : else
921 0 : Py_RETURN_NONE;
922 : }
923 :
924 : /*[clinic input]
925 : pyexpat.xmlparser.ExternalEntityParserCreate
926 :
927 : cls: defining_class
928 : context: str(accept={str, NoneType})
929 : encoding: str = NULL
930 : /
931 :
932 : Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
933 : [clinic start generated code]*/
934 :
935 : static PyObject *
936 2 : pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
937 : PyTypeObject *cls,
938 : const char *context,
939 : const char *encoding)
940 : /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
941 : {
942 : xmlparseobject *new_parser;
943 : int i;
944 :
945 2 : pyexpat_state *state = PyType_GetModuleState(cls);
946 :
947 2 : new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
948 2 : if (new_parser == NULL) {
949 0 : return NULL;
950 : }
951 :
952 2 : new_parser->buffer_size = self->buffer_size;
953 2 : new_parser->buffer_used = 0;
954 2 : new_parser->buffer = NULL;
955 2 : new_parser->ordered_attributes = self->ordered_attributes;
956 2 : new_parser->specified_attributes = self->specified_attributes;
957 2 : new_parser->in_callback = 0;
958 2 : new_parser->ns_prefixes = self->ns_prefixes;
959 2 : new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
960 : encoding);
961 2 : new_parser->handlers = 0;
962 2 : new_parser->intern = self->intern;
963 2 : Py_XINCREF(new_parser->intern);
964 :
965 2 : if (self->buffer != NULL) {
966 1 : new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
967 1 : if (new_parser->buffer == NULL) {
968 0 : Py_DECREF(new_parser);
969 0 : return PyErr_NoMemory();
970 : }
971 : }
972 2 : if (!new_parser->itself) {
973 0 : Py_DECREF(new_parser);
974 0 : return PyErr_NoMemory();
975 : }
976 :
977 2 : XML_SetUserData(new_parser->itself, (void *)new_parser);
978 :
979 : /* allocate and clear handlers first */
980 46 : for (i = 0; handler_info[i].name != NULL; i++)
981 : /* do nothing */;
982 :
983 2 : new_parser->handlers = PyMem_New(PyObject *, i);
984 2 : if (!new_parser->handlers) {
985 0 : Py_DECREF(new_parser);
986 0 : return PyErr_NoMemory();
987 : }
988 2 : clear_handlers(new_parser, 1);
989 :
990 : /* then copy handlers from self */
991 46 : for (i = 0; handler_info[i].name != NULL; i++) {
992 44 : PyObject *handler = self->handlers[i];
993 44 : if (handler != NULL) {
994 11 : Py_INCREF(handler);
995 11 : new_parser->handlers[i] = handler;
996 11 : handler_info[i].setter(new_parser->itself,
997 : handler_info[i].handler);
998 : }
999 : }
1000 :
1001 2 : PyObject_GC_Track(new_parser);
1002 2 : return (PyObject *)new_parser;
1003 : }
1004 :
1005 : /*[clinic input]
1006 : pyexpat.xmlparser.SetParamEntityParsing
1007 :
1008 : flag: int
1009 : /
1010 :
1011 : Controls parsing of parameter entities (including the external DTD subset).
1012 :
1013 : Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1014 : XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1015 : XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1016 : was successful.
1017 : [clinic start generated code]*/
1018 :
1019 : static PyObject *
1020 246 : pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1021 : /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1022 : {
1023 246 : flag = XML_SetParamEntityParsing(self->itself, flag);
1024 246 : return PyLong_FromLong(flag);
1025 : }
1026 :
1027 :
1028 : #if XML_COMBINED_VERSION >= 19505
1029 : /*[clinic input]
1030 : pyexpat.xmlparser.UseForeignDTD
1031 :
1032 : cls: defining_class
1033 : flag: bool = True
1034 : /
1035 :
1036 : Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1037 :
1038 : This readily allows the use of a 'default' document type controlled by the
1039 : application, while still getting the advantage of providing document type
1040 : information to the parser. 'flag' defaults to True if not provided.
1041 : [clinic start generated code]*/
1042 :
1043 : static PyObject *
1044 3 : pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1045 : int flag)
1046 : /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1047 : {
1048 3 : pyexpat_state *state = PyType_GetModuleState(cls);
1049 : enum XML_Error rc;
1050 :
1051 3 : rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1052 3 : if (rc != XML_ERROR_NONE) {
1053 0 : return set_error(state, self, rc);
1054 : }
1055 3 : Py_RETURN_NONE;
1056 : }
1057 : #endif
1058 :
1059 : static struct PyMethodDef xmlparse_methods[] = {
1060 : PYEXPAT_XMLPARSER_PARSE_METHODDEF
1061 : PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1062 : PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1063 : PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1064 : PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1065 : PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1066 : PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1067 : #if XML_COMBINED_VERSION >= 19505
1068 : PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1069 : #endif
1070 : {NULL, NULL} /* sentinel */
1071 : };
1072 :
1073 : /* ---------- */
1074 :
1075 :
1076 :
1077 : /* pyexpat international encoding support.
1078 : Make it as simple as possible.
1079 : */
1080 :
1081 : static int
1082 188 : PyUnknownEncodingHandler(void *encodingHandlerData,
1083 : const XML_Char *name,
1084 : XML_Encoding *info)
1085 : {
1086 : static unsigned char template_buffer[256] = {0};
1087 : PyObject* u;
1088 : int i;
1089 : const void *data;
1090 : int kind;
1091 :
1092 188 : if (PyErr_Occurred())
1093 0 : return XML_STATUS_ERROR;
1094 :
1095 188 : if (template_buffer[1] == 0) {
1096 1028 : for (i = 0; i < 256; i++)
1097 1024 : template_buffer[i] = i;
1098 : }
1099 :
1100 188 : u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1101 188 : if (u == NULL || PyUnicode_READY(u)) {
1102 4 : Py_XDECREF(u);
1103 4 : return XML_STATUS_ERROR;
1104 : }
1105 :
1106 184 : if (PyUnicode_GET_LENGTH(u) != 256) {
1107 36 : Py_DECREF(u);
1108 36 : PyErr_SetString(PyExc_ValueError,
1109 : "multi-byte encodings are not supported");
1110 36 : return XML_STATUS_ERROR;
1111 : }
1112 :
1113 148 : kind = PyUnicode_KIND(u);
1114 148 : data = PyUnicode_DATA(u);
1115 38036 : for (i = 0; i < 256; i++) {
1116 37888 : Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1117 37888 : if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1118 34638 : info->map[i] = ch;
1119 : else
1120 3250 : info->map[i] = -1;
1121 : }
1122 :
1123 148 : info->data = NULL;
1124 148 : info->convert = NULL;
1125 148 : info->release = NULL;
1126 148 : Py_DECREF(u);
1127 :
1128 148 : return XML_STATUS_OK;
1129 : }
1130 :
1131 :
1132 : static PyObject *
1133 1386 : newxmlparseobject(pyexpat_state *state, const char *encoding,
1134 : const char *namespace_separator, PyObject *intern)
1135 : {
1136 : int i;
1137 : xmlparseobject *self;
1138 :
1139 1386 : self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1140 1386 : if (self == NULL)
1141 0 : return NULL;
1142 :
1143 1386 : self->buffer = NULL;
1144 1386 : self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1145 1386 : self->buffer_used = 0;
1146 1386 : self->ordered_attributes = 0;
1147 1386 : self->specified_attributes = 0;
1148 1386 : self->in_callback = 0;
1149 1386 : self->ns_prefixes = 0;
1150 1386 : self->handlers = NULL;
1151 1386 : self->intern = intern;
1152 1386 : Py_XINCREF(self->intern);
1153 :
1154 : /* namespace_separator is either NULL or contains one char + \0 */
1155 1386 : self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1156 : namespace_separator);
1157 1386 : if (self->itself == NULL) {
1158 0 : PyErr_SetString(PyExc_RuntimeError,
1159 : "XML_ParserCreate failed");
1160 0 : Py_DECREF(self);
1161 0 : return NULL;
1162 : }
1163 : #if XML_COMBINED_VERSION >= 20100
1164 : /* This feature was added upstream in libexpat 2.1.0. */
1165 1386 : XML_SetHashSalt(self->itself,
1166 1386 : (unsigned long)_Py_HashSecret.expat.hashsalt);
1167 : #endif
1168 1386 : XML_SetUserData(self->itself, (void *)self);
1169 1386 : XML_SetUnknownEncodingHandler(self->itself,
1170 : (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1171 :
1172 31878 : for (i = 0; handler_info[i].name != NULL; i++)
1173 : /* do nothing */;
1174 :
1175 1386 : self->handlers = PyMem_New(PyObject *, i);
1176 1386 : if (!self->handlers) {
1177 0 : Py_DECREF(self);
1178 0 : return PyErr_NoMemory();
1179 : }
1180 1386 : clear_handlers(self, 1);
1181 :
1182 1386 : PyObject_GC_Track(self);
1183 1386 : return (PyObject*)self;
1184 : }
1185 :
1186 : static int
1187 1864 : xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1188 : {
1189 42872 : for (int i = 0; handler_info[i].name != NULL; i++) {
1190 41008 : Py_VISIT(op->handlers[i]);
1191 : }
1192 1864 : Py_VISIT(Py_TYPE(op));
1193 1864 : return 0;
1194 : }
1195 :
1196 : static int
1197 1389 : xmlparse_clear(xmlparseobject *op)
1198 : {
1199 1389 : clear_handlers(op, 0);
1200 1389 : Py_CLEAR(op->intern);
1201 1389 : return 0;
1202 : }
1203 :
1204 : static void
1205 1388 : xmlparse_dealloc(xmlparseobject *self)
1206 : {
1207 1388 : PyObject_GC_UnTrack(self);
1208 1388 : (void)xmlparse_clear(self);
1209 1388 : if (self->itself != NULL)
1210 1388 : XML_ParserFree(self->itself);
1211 1388 : self->itself = NULL;
1212 :
1213 1388 : if (self->handlers != NULL) {
1214 1388 : PyMem_Free(self->handlers);
1215 1388 : self->handlers = NULL;
1216 : }
1217 1388 : if (self->buffer != NULL) {
1218 611 : PyMem_Free(self->buffer);
1219 611 : self->buffer = NULL;
1220 : }
1221 1388 : PyTypeObject *tp = Py_TYPE(self);
1222 1388 : PyObject_GC_Del(self);
1223 1388 : Py_DECREF(tp);
1224 1388 : }
1225 :
1226 :
1227 : static PyObject *
1228 0 : xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1229 : {
1230 0 : assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1231 0 : int handlernum = (int)(hi - handler_info);
1232 0 : PyObject *result = self->handlers[handlernum];
1233 0 : if (result == NULL)
1234 0 : result = Py_None;
1235 0 : Py_INCREF(result);
1236 0 : return result;
1237 : }
1238 :
1239 : static int
1240 8844 : xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1241 : {
1242 8844 : assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1243 8844 : int handlernum = (int)(hi - handler_info);
1244 8844 : if (v == NULL) {
1245 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1246 0 : return -1;
1247 : }
1248 8844 : if (handlernum == CharacterData) {
1249 : /* If we're changing the character data handler, flush all
1250 : * cached data with the old handler. Not sure there's a
1251 : * "right" thing to do, though, but this probably won't
1252 : * happen.
1253 : */
1254 1349 : if (flush_character_buffer(self) < 0)
1255 0 : return -1;
1256 : }
1257 :
1258 8844 : xmlhandler c_handler = NULL;
1259 8844 : if (v == Py_None) {
1260 : /* If this is the character data handler, and a character
1261 : data handler is already active, we need to be more
1262 : careful. What we can safely do is replace the existing
1263 : character data handler callback function with a no-op
1264 : function that will refuse to call Python. The downside
1265 : is that this doesn't completely remove the character
1266 : data handler from the C layer if there's any callback
1267 : active, so Expat does a little more work than it
1268 : otherwise would, but that's really an odd case. A more
1269 : elaborate system of handlers and state could remove the
1270 : C handler more effectively. */
1271 15 : if (handlernum == CharacterData && self->in_callback)
1272 0 : c_handler = noop_character_data_handler;
1273 15 : v = NULL;
1274 : }
1275 8829 : else if (v != NULL) {
1276 8829 : Py_INCREF(v);
1277 8829 : c_handler = handler_info[handlernum].handler;
1278 : }
1279 8844 : Py_XSETREF(self->handlers[handlernum], v);
1280 8844 : handler_info[handlernum].setter(self->itself, c_handler);
1281 8844 : return 0;
1282 : }
1283 :
1284 : #define INT_GETTER(name) \
1285 : static PyObject * \
1286 : xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1287 : { \
1288 : return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1289 : }
1290 0 : INT_GETTER(ErrorCode)
1291 14 : INT_GETTER(ErrorLineNumber)
1292 14 : INT_GETTER(ErrorColumnNumber)
1293 0 : INT_GETTER(ErrorByteIndex)
1294 17 : INT_GETTER(CurrentLineNumber)
1295 6 : INT_GETTER(CurrentColumnNumber)
1296 6 : INT_GETTER(CurrentByteIndex)
1297 :
1298 : #undef INT_GETTER
1299 :
1300 : static PyObject *
1301 9 : xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1302 : {
1303 9 : return PyBool_FromLong(self->buffer != NULL);
1304 : }
1305 :
1306 : static int
1307 618 : xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1308 : {
1309 618 : if (v == NULL) {
1310 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1311 0 : return -1;
1312 : }
1313 618 : int b = PyObject_IsTrue(v);
1314 618 : if (b < 0)
1315 0 : return -1;
1316 618 : if (b) {
1317 614 : if (self->buffer == NULL) {
1318 610 : self->buffer = PyMem_Malloc(self->buffer_size);
1319 610 : if (self->buffer == NULL) {
1320 0 : PyErr_NoMemory();
1321 0 : return -1;
1322 : }
1323 610 : self->buffer_used = 0;
1324 : }
1325 : }
1326 4 : else if (self->buffer != NULL) {
1327 3 : if (flush_character_buffer(self) < 0)
1328 0 : return -1;
1329 3 : PyMem_Free(self->buffer);
1330 3 : self->buffer = NULL;
1331 : }
1332 618 : return 0;
1333 : }
1334 :
1335 : static PyObject *
1336 11 : xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1337 : {
1338 11 : return PyLong_FromLong((long) self->buffer_size);
1339 : }
1340 :
1341 : static int
1342 13 : xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1343 : {
1344 13 : if (v == NULL) {
1345 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1346 0 : return -1;
1347 : }
1348 : long new_buffer_size;
1349 13 : if (!PyLong_Check(v)) {
1350 1 : PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1351 1 : return -1;
1352 : }
1353 :
1354 12 : new_buffer_size = PyLong_AsLong(v);
1355 12 : if (new_buffer_size <= 0) {
1356 3 : if (!PyErr_Occurred())
1357 2 : PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1358 3 : return -1;
1359 : }
1360 :
1361 : /* trivial case -- no change */
1362 9 : if (new_buffer_size == self->buffer_size) {
1363 1 : return 0;
1364 : }
1365 :
1366 : /* check maximum */
1367 8 : if (new_buffer_size > INT_MAX) {
1368 : char errmsg[100];
1369 0 : sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1370 0 : PyErr_SetString(PyExc_ValueError, errmsg);
1371 0 : return -1;
1372 : }
1373 :
1374 8 : if (self->buffer != NULL) {
1375 : /* there is already a buffer */
1376 5 : if (self->buffer_used != 0) {
1377 0 : if (flush_character_buffer(self) < 0) {
1378 0 : return -1;
1379 : }
1380 : }
1381 : /* free existing buffer */
1382 5 : PyMem_Free(self->buffer);
1383 : }
1384 8 : self->buffer = PyMem_Malloc(new_buffer_size);
1385 8 : if (self->buffer == NULL) {
1386 0 : PyErr_NoMemory();
1387 0 : return -1;
1388 : }
1389 8 : self->buffer_size = new_buffer_size;
1390 8 : return 0;
1391 : }
1392 :
1393 : static PyObject *
1394 0 : xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1395 : {
1396 0 : return PyLong_FromLong((long) self->buffer_used);
1397 : }
1398 :
1399 : static PyObject *
1400 5 : xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1401 : {
1402 5 : return PyBool_FromLong(self->ns_prefixes);
1403 : }
1404 :
1405 : static int
1406 115 : xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1407 : {
1408 115 : if (v == NULL) {
1409 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1410 0 : return -1;
1411 : }
1412 115 : int b = PyObject_IsTrue(v);
1413 115 : if (b < 0)
1414 0 : return -1;
1415 115 : self->ns_prefixes = b;
1416 115 : XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1417 115 : return 0;
1418 : }
1419 :
1420 : static PyObject *
1421 5 : xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1422 : {
1423 5 : return PyBool_FromLong(self->ordered_attributes);
1424 : }
1425 :
1426 : static int
1427 611 : xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1428 : {
1429 611 : if (v == NULL) {
1430 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1431 0 : return -1;
1432 : }
1433 611 : int b = PyObject_IsTrue(v);
1434 611 : if (b < 0)
1435 0 : return -1;
1436 611 : self->ordered_attributes = b;
1437 611 : return 0;
1438 : }
1439 :
1440 : static PyObject *
1441 5 : xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1442 : {
1443 5 : return PyBool_FromLong((long) self->specified_attributes);
1444 : }
1445 :
1446 : static int
1447 103 : xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1448 : {
1449 103 : if (v == NULL) {
1450 0 : PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1451 0 : return -1;
1452 : }
1453 103 : int b = PyObject_IsTrue(v);
1454 103 : if (b < 0)
1455 0 : return -1;
1456 103 : self->specified_attributes = b;
1457 103 : return 0;
1458 : }
1459 :
1460 : static PyMemberDef xmlparse_members[] = {
1461 : {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1462 : {NULL}
1463 : };
1464 :
1465 : #define XMLPARSE_GETTER_DEF(name) \
1466 : {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1467 : #define XMLPARSE_GETTER_SETTER_DEF(name) \
1468 : {#name, (getter)xmlparse_##name##_getter, \
1469 : (setter)xmlparse_##name##_setter, NULL},
1470 :
1471 : static PyGetSetDef xmlparse_getsetlist[] = {
1472 : XMLPARSE_GETTER_DEF(ErrorCode)
1473 : XMLPARSE_GETTER_DEF(ErrorLineNumber)
1474 : XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1475 : XMLPARSE_GETTER_DEF(ErrorByteIndex)
1476 : XMLPARSE_GETTER_DEF(CurrentLineNumber)
1477 : XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1478 : XMLPARSE_GETTER_DEF(CurrentByteIndex)
1479 : XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1480 : XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1481 : XMLPARSE_GETTER_DEF(buffer_used)
1482 : XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1483 : XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1484 : XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1485 : {NULL},
1486 : };
1487 :
1488 : #undef XMLPARSE_GETTER_DEF
1489 : #undef XMLPARSE_GETTER_SETTER_DEF
1490 :
1491 : PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1492 :
1493 : static PyType_Slot _xml_parse_type_spec_slots[] = {
1494 : {Py_tp_dealloc, xmlparse_dealloc},
1495 : {Py_tp_doc, (void *)Xmlparsetype__doc__},
1496 : {Py_tp_traverse, xmlparse_traverse},
1497 : {Py_tp_clear, xmlparse_clear},
1498 : {Py_tp_methods, xmlparse_methods},
1499 : {Py_tp_members, xmlparse_members},
1500 : {Py_tp_getset, xmlparse_getsetlist},
1501 : {0, 0}
1502 : };
1503 :
1504 : static PyType_Spec _xml_parse_type_spec = {
1505 : .name = "pyexpat.xmlparser",
1506 : .basicsize = sizeof(xmlparseobject),
1507 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1508 : Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1509 : .slots = _xml_parse_type_spec_slots,
1510 : };
1511 :
1512 : /* End of code for xmlparser objects */
1513 : /* -------------------------------------------------------- */
1514 :
1515 : /*[clinic input]
1516 : pyexpat.ParserCreate
1517 :
1518 : encoding: str(accept={str, NoneType}) = None
1519 : namespace_separator: str(accept={str, NoneType}) = None
1520 : intern: object = NULL
1521 :
1522 : Return a new XML parser object.
1523 : [clinic start generated code]*/
1524 :
1525 : static PyObject *
1526 1387 : pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1527 : const char *namespace_separator, PyObject *intern)
1528 : /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1529 : {
1530 : PyObject *result;
1531 1387 : int intern_decref = 0;
1532 :
1533 1387 : if (namespace_separator != NULL
1534 622 : && strlen(namespace_separator) > 1) {
1535 1 : PyErr_SetString(PyExc_ValueError,
1536 : "namespace_separator must be at most one"
1537 : " character, omitted, or None");
1538 1 : return NULL;
1539 : }
1540 : /* Explicitly passing None means no interning is desired.
1541 : Not passing anything means that a new dictionary is used. */
1542 1386 : if (intern == Py_None)
1543 243 : intern = NULL;
1544 1143 : else if (intern == NULL) {
1545 1143 : intern = PyDict_New();
1546 1143 : if (!intern)
1547 0 : return NULL;
1548 1143 : intern_decref = 1;
1549 : }
1550 0 : else if (!PyDict_Check(intern)) {
1551 0 : PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1552 0 : return NULL;
1553 : }
1554 :
1555 1386 : pyexpat_state *state = pyexpat_get_state(module);
1556 1386 : result = newxmlparseobject(state, encoding, namespace_separator, intern);
1557 1386 : if (intern_decref) {
1558 1143 : Py_DECREF(intern);
1559 : }
1560 1386 : return result;
1561 : }
1562 :
1563 : /*[clinic input]
1564 : pyexpat.ErrorString
1565 :
1566 : code: long
1567 : /
1568 :
1569 : Returns string error for given number.
1570 : [clinic start generated code]*/
1571 :
1572 : static PyObject *
1573 8 : pyexpat_ErrorString_impl(PyObject *module, long code)
1574 : /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1575 : {
1576 8 : return Py_BuildValue("z", XML_ErrorString((int)code));
1577 : }
1578 :
1579 : /* List of methods defined in the module */
1580 :
1581 : static struct PyMethodDef pyexpat_methods[] = {
1582 : PYEXPAT_PARSERCREATE_METHODDEF
1583 : PYEXPAT_ERRORSTRING_METHODDEF
1584 : {NULL, NULL} /* sentinel */
1585 : };
1586 :
1587 : /* Module docstring */
1588 :
1589 : PyDoc_STRVAR(pyexpat_module_documentation,
1590 : "Python wrapper for Expat parser.");
1591 :
1592 : /* Initialization function for the module */
1593 :
1594 : #ifndef MODULE_NAME
1595 : #define MODULE_NAME "pyexpat"
1596 : #endif
1597 :
1598 38 : static int init_handler_descrs(pyexpat_state *state)
1599 : {
1600 : int i;
1601 38 : assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
1602 874 : for (i = 0; handler_info[i].name != NULL; i++) {
1603 836 : struct HandlerInfo *hi = &handler_info[i];
1604 836 : hi->getset.name = hi->name;
1605 836 : hi->getset.get = (getter)xmlparse_handler_getter;
1606 836 : hi->getset.set = (setter)xmlparse_handler_setter;
1607 836 : hi->getset.closure = &handler_info[i];
1608 :
1609 836 : PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1610 836 : if (descr == NULL)
1611 0 : return -1;
1612 :
1613 836 : if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
1614 0 : Py_DECREF(descr);
1615 0 : return -1;
1616 : }
1617 836 : Py_DECREF(descr);
1618 : }
1619 38 : return 0;
1620 : }
1621 :
1622 : static PyObject *
1623 76 : add_submodule(PyObject *mod, const char *fullname)
1624 : {
1625 76 : const char *name = strrchr(fullname, '.') + 1;
1626 :
1627 76 : PyObject *submodule = PyModule_New(fullname);
1628 76 : if (submodule == NULL) {
1629 0 : return NULL;
1630 : }
1631 :
1632 76 : PyObject *mod_name = PyUnicode_FromString(fullname);
1633 76 : if (mod_name == NULL) {
1634 0 : Py_DECREF(submodule);
1635 0 : return NULL;
1636 : }
1637 :
1638 76 : if (_PyImport_SetModule(mod_name, submodule) < 0) {
1639 0 : Py_DECREF(submodule);
1640 0 : Py_DECREF(mod_name);
1641 0 : return NULL;
1642 : }
1643 76 : Py_DECREF(mod_name);
1644 :
1645 : /* gives away the reference to the submodule */
1646 76 : if (PyModule_AddObject(mod, name, submodule) < 0) {
1647 0 : Py_DECREF(submodule);
1648 0 : return NULL;
1649 : }
1650 :
1651 76 : return submodule;
1652 : }
1653 :
1654 : struct ErrorInfo {
1655 : const char * name; /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1656 : const char * description; /* Error description as returned by XML_ErrorString(<int>) */
1657 : };
1658 :
1659 : static
1660 : struct ErrorInfo error_info_of[] = {
1661 : {NULL, NULL}, /* XML_ERROR_NONE (value 0) is not exposed */
1662 :
1663 : {"XML_ERROR_NO_MEMORY", "out of memory"},
1664 : {"XML_ERROR_SYNTAX", "syntax error"},
1665 : {"XML_ERROR_NO_ELEMENTS", "no element found"},
1666 : {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1667 : {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1668 : {"XML_ERROR_PARTIAL_CHAR", "partial character"},
1669 : {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1670 : {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1671 : {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1672 : {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1673 : {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1674 : {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1675 : {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1676 : {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1677 : {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1678 : {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1679 : {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1680 : {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1681 : {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1682 : {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1683 : {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1684 : {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1685 : {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1686 : {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1687 : {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1688 : {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1689 :
1690 : /* Added in Expat 1.95.7. */
1691 : {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1692 :
1693 : /* Added in Expat 1.95.8. */
1694 : {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1695 : {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1696 : {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1697 : {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1698 : {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1699 : {"XML_ERROR_SUSPENDED", "parser suspended"},
1700 : {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1701 : {"XML_ERROR_ABORTED", "parsing aborted"},
1702 : {"XML_ERROR_FINISHED", "parsing finished"},
1703 : {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1704 :
1705 : /* Added in 2.0.0. */
1706 : {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1707 : {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1708 : {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1709 :
1710 : /* Added in 2.2.1. */
1711 : {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1712 :
1713 : /* Added in 2.3.0. */
1714 : {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1715 :
1716 : /* Added in 2.4.0. */
1717 : {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1718 : };
1719 :
1720 : static int
1721 1634 : add_error(PyObject *errors_module, PyObject *codes_dict,
1722 : PyObject *rev_codes_dict, size_t error_index)
1723 : {
1724 1634 : const char * const name = error_info_of[error_index].name;
1725 1634 : const int error_code = (int)error_index;
1726 :
1727 : /* NOTE: This keeps the source of truth regarding error
1728 : * messages with libexpat and (by definiton) in bulletproof sync
1729 : * with the other uses of the XML_ErrorString function
1730 : * elsewhere within this file. pyexpat's copy of the messages
1731 : * only acts as a fallback in case of outdated runtime libexpat,
1732 : * where it returns NULL. */
1733 1634 : const char *error_string = XML_ErrorString(error_code);
1734 1634 : if (error_string == NULL) {
1735 0 : error_string = error_info_of[error_index].description;
1736 : }
1737 :
1738 1634 : if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1739 0 : return -1;
1740 : }
1741 :
1742 1634 : PyObject *num = PyLong_FromLong(error_code);
1743 1634 : if (num == NULL) {
1744 0 : return -1;
1745 : }
1746 :
1747 1634 : if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1748 0 : Py_DECREF(num);
1749 0 : return -1;
1750 : }
1751 :
1752 1634 : PyObject *str = PyUnicode_FromString(error_string);
1753 1634 : if (str == NULL) {
1754 0 : Py_DECREF(num);
1755 0 : return -1;
1756 : }
1757 :
1758 1634 : int res = PyDict_SetItem(rev_codes_dict, num, str);
1759 1634 : Py_DECREF(str);
1760 1634 : Py_DECREF(num);
1761 1634 : if (res < 0) {
1762 0 : return -1;
1763 : }
1764 :
1765 1634 : return 0;
1766 : }
1767 :
1768 : static int
1769 38 : add_errors_module(PyObject *mod)
1770 : {
1771 38 : PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1772 38 : if (errors_module == NULL) {
1773 0 : return -1;
1774 : }
1775 :
1776 38 : PyObject *codes_dict = PyDict_New();
1777 38 : PyObject *rev_codes_dict = PyDict_New();
1778 38 : if (codes_dict == NULL || rev_codes_dict == NULL) {
1779 0 : goto error;
1780 : }
1781 :
1782 38 : size_t error_index = 0;
1783 1710 : for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1784 1672 : if (error_info_of[error_index].name == NULL) {
1785 38 : continue;
1786 : }
1787 :
1788 1634 : if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1789 0 : goto error;
1790 : }
1791 : }
1792 :
1793 38 : if (PyModule_AddStringConstant(errors_module, "__doc__",
1794 : "Constants used to describe "
1795 : "error conditions.") < 0) {
1796 0 : goto error;
1797 : }
1798 :
1799 38 : Py_INCREF(codes_dict);
1800 38 : if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1801 0 : Py_DECREF(codes_dict);
1802 0 : goto error;
1803 : }
1804 38 : Py_CLEAR(codes_dict);
1805 :
1806 38 : Py_INCREF(rev_codes_dict);
1807 38 : if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1808 0 : Py_DECREF(rev_codes_dict);
1809 0 : goto error;
1810 : }
1811 38 : Py_CLEAR(rev_codes_dict);
1812 :
1813 38 : return 0;
1814 :
1815 0 : error:
1816 0 : Py_XDECREF(codes_dict);
1817 0 : Py_XDECREF(rev_codes_dict);
1818 0 : return -1;
1819 : }
1820 :
1821 : static int
1822 38 : add_model_module(PyObject *mod)
1823 : {
1824 38 : PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1825 38 : if (model_module == NULL) {
1826 0 : return -1;
1827 : }
1828 :
1829 : #define MYCONST(c) do { \
1830 : if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1831 : return -1; \
1832 : } \
1833 : } while(0)
1834 :
1835 38 : if (PyModule_AddStringConstant(
1836 : model_module, "__doc__",
1837 : "Constants used to interpret content model information.") < 0) {
1838 0 : return -1;
1839 : }
1840 :
1841 38 : MYCONST(XML_CTYPE_EMPTY);
1842 38 : MYCONST(XML_CTYPE_ANY);
1843 38 : MYCONST(XML_CTYPE_MIXED);
1844 38 : MYCONST(XML_CTYPE_NAME);
1845 38 : MYCONST(XML_CTYPE_CHOICE);
1846 38 : MYCONST(XML_CTYPE_SEQ);
1847 :
1848 38 : MYCONST(XML_CQUANT_NONE);
1849 38 : MYCONST(XML_CQUANT_OPT);
1850 38 : MYCONST(XML_CQUANT_REP);
1851 38 : MYCONST(XML_CQUANT_PLUS);
1852 : #undef MYCONST
1853 38 : return 0;
1854 : }
1855 :
1856 : #if XML_COMBINED_VERSION > 19505
1857 : static int
1858 38 : add_features(PyObject *mod)
1859 : {
1860 38 : PyObject *list = PyList_New(0);
1861 38 : if (list == NULL) {
1862 0 : return -1;
1863 : }
1864 :
1865 38 : const XML_Feature *features = XML_GetFeatureList();
1866 304 : for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1867 266 : PyObject *item = Py_BuildValue("si", features[i].name,
1868 266 : features[i].value);
1869 266 : if (item == NULL) {
1870 0 : goto error;
1871 : }
1872 266 : int ok = PyList_Append(list, item);
1873 266 : Py_DECREF(item);
1874 266 : if (ok < 0) {
1875 0 : goto error;
1876 : }
1877 : }
1878 38 : if (PyModule_AddObject(mod, "features", list) < 0) {
1879 0 : goto error;
1880 : }
1881 38 : return 0;
1882 :
1883 0 : error:
1884 0 : Py_DECREF(list);
1885 0 : return -1;
1886 : }
1887 : #endif
1888 :
1889 : static void
1890 38 : pyexpat_destructor(PyObject *op)
1891 : {
1892 38 : void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
1893 38 : PyMem_Free(p);
1894 38 : }
1895 :
1896 : static int
1897 38 : pyexpat_exec(PyObject *mod)
1898 : {
1899 38 : pyexpat_state *state = pyexpat_get_state(mod);
1900 38 : state->str_read = PyUnicode_InternFromString("read");
1901 38 : if (state->str_read == NULL) {
1902 0 : return -1;
1903 : }
1904 38 : state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1905 : mod, &_xml_parse_type_spec, NULL);
1906 :
1907 38 : if (state->xml_parse_type == NULL) {
1908 0 : return -1;
1909 : }
1910 :
1911 38 : if (init_handler_descrs(state) < 0) {
1912 0 : return -1;
1913 : }
1914 38 : state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1915 : NULL, NULL);
1916 38 : if (state->error == NULL) {
1917 0 : return -1;
1918 : }
1919 :
1920 : /* Add some symbolic constants to the module */
1921 :
1922 38 : if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1923 0 : return -1;
1924 : }
1925 :
1926 38 : if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1927 0 : return -1;
1928 : }
1929 :
1930 38 : if (PyModule_AddObjectRef(mod, "XMLParserType",
1931 38 : (PyObject *) state->xml_parse_type) < 0) {
1932 0 : return -1;
1933 : }
1934 :
1935 38 : if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1936 38 : XML_ExpatVersion()) < 0) {
1937 0 : return -1;
1938 : }
1939 : {
1940 38 : XML_Expat_Version info = XML_ExpatVersionInfo();
1941 38 : PyObject *versionInfo = Py_BuildValue("(iii)",
1942 : info.major,
1943 : info.minor,
1944 : info.micro);
1945 38 : if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1946 0 : Py_DECREF(versionInfo);
1947 0 : return -1;
1948 : }
1949 : }
1950 : /* XXX When Expat supports some way of figuring out how it was
1951 : compiled, this should check and set native_encoding
1952 : appropriately.
1953 : */
1954 38 : if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1955 0 : return -1;
1956 : }
1957 :
1958 38 : if (add_errors_module(mod) < 0) {
1959 0 : return -1;
1960 : }
1961 :
1962 38 : if (add_model_module(mod) < 0) {
1963 0 : return -1;
1964 : }
1965 :
1966 : #if XML_COMBINED_VERSION > 19505
1967 38 : if (add_features(mod) < 0) {
1968 0 : return -1;
1969 : }
1970 : #endif
1971 :
1972 : #define MYCONST(c) do { \
1973 : if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1974 : return -1; \
1975 : } \
1976 : } while(0)
1977 :
1978 38 : MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1979 38 : MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1980 38 : MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1981 : #undef MYCONST
1982 :
1983 38 : struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
1984 38 : if (capi == NULL) {
1985 0 : PyErr_NoMemory();
1986 0 : return -1;
1987 : }
1988 : /* initialize pyexpat dispatch table */
1989 38 : capi->size = sizeof(*capi);
1990 38 : capi->magic = PyExpat_CAPI_MAGIC;
1991 38 : capi->MAJOR_VERSION = XML_MAJOR_VERSION;
1992 38 : capi->MINOR_VERSION = XML_MINOR_VERSION;
1993 38 : capi->MICRO_VERSION = XML_MICRO_VERSION;
1994 38 : capi->ErrorString = XML_ErrorString;
1995 38 : capi->GetErrorCode = XML_GetErrorCode;
1996 38 : capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
1997 38 : capi->GetErrorLineNumber = XML_GetErrorLineNumber;
1998 38 : capi->Parse = XML_Parse;
1999 38 : capi->ParserCreate_MM = XML_ParserCreate_MM;
2000 38 : capi->ParserFree = XML_ParserFree;
2001 38 : capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
2002 38 : capi->SetCommentHandler = XML_SetCommentHandler;
2003 38 : capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2004 38 : capi->SetElementHandler = XML_SetElementHandler;
2005 38 : capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2006 38 : capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2007 38 : capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2008 38 : capi->SetUserData = XML_SetUserData;
2009 38 : capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
2010 38 : capi->SetEncoding = XML_SetEncoding;
2011 38 : capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
2012 : #if XML_COMBINED_VERSION >= 20100
2013 38 : capi->SetHashSalt = XML_SetHashSalt;
2014 : #else
2015 : capi->SetHashSalt = NULL;
2016 : #endif
2017 :
2018 : /* export using capsule */
2019 38 : PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
2020 : pyexpat_destructor);
2021 38 : if (capi_object == NULL) {
2022 0 : PyMem_Free(capi);
2023 0 : return -1;
2024 : }
2025 :
2026 38 : if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
2027 0 : Py_DECREF(capi_object);
2028 0 : return -1;
2029 : }
2030 :
2031 38 : return 0;
2032 : }
2033 :
2034 : static int
2035 1740 : pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
2036 : {
2037 1740 : pyexpat_state *state = pyexpat_get_state(module);
2038 1740 : Py_VISIT(state->xml_parse_type);
2039 1740 : Py_VISIT(state->error);
2040 1740 : Py_VISIT(state->str_read);
2041 1740 : return 0;
2042 : }
2043 :
2044 : static int
2045 63 : pyexpat_clear(PyObject *module)
2046 : {
2047 63 : pyexpat_state *state = pyexpat_get_state(module);
2048 63 : Py_CLEAR(state->xml_parse_type);
2049 63 : Py_CLEAR(state->error);
2050 63 : Py_CLEAR(state->str_read);
2051 63 : return 0;
2052 : }
2053 :
2054 : static void
2055 38 : pyexpat_free(void *module)
2056 : {
2057 38 : pyexpat_clear((PyObject *)module);
2058 38 : }
2059 :
2060 : static PyModuleDef_Slot pyexpat_slots[] = {
2061 : {Py_mod_exec, pyexpat_exec},
2062 : {0, NULL}
2063 : };
2064 :
2065 : static struct PyModuleDef pyexpatmodule = {
2066 : PyModuleDef_HEAD_INIT,
2067 : .m_name = MODULE_NAME,
2068 : .m_doc = pyexpat_module_documentation,
2069 : .m_size = sizeof(pyexpat_state),
2070 : .m_methods = pyexpat_methods,
2071 : .m_slots = pyexpat_slots,
2072 : .m_traverse = pyexpat_traverse,
2073 : .m_clear = pyexpat_clear,
2074 : .m_free = pyexpat_free
2075 : };
2076 :
2077 : PyMODINIT_FUNC
2078 38 : PyInit_pyexpat(void)
2079 : {
2080 38 : return PyModuleDef_Init(&pyexpatmodule);
2081 : }
2082 :
2083 : static void
2084 2807 : clear_handlers(xmlparseobject *self, int initial)
2085 : {
2086 2807 : int i = 0;
2087 :
2088 64561 : for (; handler_info[i].name != NULL; i++) {
2089 61754 : if (initial)
2090 30536 : self->handlers[i] = NULL;
2091 : else {
2092 31218 : Py_CLEAR(self->handlers[i]);
2093 31218 : handler_info[i].setter(self->itself, NULL);
2094 : }
2095 : }
2096 2807 : }
2097 :
2098 : static struct HandlerInfo handler_info[] = {
2099 :
2100 : #define HANDLER_INFO(name) \
2101 : {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2102 :
2103 : HANDLER_INFO(StartElementHandler)
2104 : HANDLER_INFO(EndElementHandler)
2105 : HANDLER_INFO(ProcessingInstructionHandler)
2106 : HANDLER_INFO(CharacterDataHandler)
2107 : HANDLER_INFO(UnparsedEntityDeclHandler)
2108 : HANDLER_INFO(NotationDeclHandler)
2109 : HANDLER_INFO(StartNamespaceDeclHandler)
2110 : HANDLER_INFO(EndNamespaceDeclHandler)
2111 : HANDLER_INFO(CommentHandler)
2112 : HANDLER_INFO(StartCdataSectionHandler)
2113 : HANDLER_INFO(EndCdataSectionHandler)
2114 : HANDLER_INFO(DefaultHandler)
2115 : HANDLER_INFO(DefaultHandlerExpand)
2116 : HANDLER_INFO(NotStandaloneHandler)
2117 : HANDLER_INFO(ExternalEntityRefHandler)
2118 : HANDLER_INFO(StartDoctypeDeclHandler)
2119 : HANDLER_INFO(EndDoctypeDeclHandler)
2120 : HANDLER_INFO(EntityDeclHandler)
2121 : HANDLER_INFO(XmlDeclHandler)
2122 : HANDLER_INFO(ElementDeclHandler)
2123 : HANDLER_INFO(AttlistDeclHandler)
2124 : #if XML_COMBINED_VERSION >= 19504
2125 : HANDLER_INFO(SkippedEntityHandler)
2126 : #endif
2127 :
2128 : #undef HANDLER_INFO
2129 :
2130 : {NULL, NULL, NULL} /* sentinel */
2131 : };
|