LCOV - code coverage report
Current view: top level - Modules - pyexpat.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 627 837 74.9 %
Date: 2022-07-07 18:19:46 Functions: 78 86 90.7 %

          Line data    Source code
       1             : #include "Python.h"
       2             : #include <ctype.h>
       3             : 
       4             : #include "structmember.h"         // PyMemberDef
       5             : #include "expat.h"
       6             : 
       7             : #include "pyexpat.h"
       8             : 
       9             : /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
      10             :    included methods. */
      11             : /*[clinic input]
      12             : module pyexpat
      13             : [clinic start generated code]*/
      14             : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
      15             : 
      16             : #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
      17             : 
      18             : static XML_Memory_Handling_Suite ExpatMemoryHandler = {
      19             :     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
      20             : 
      21             : enum HandlerTypes {
      22             :     StartElement,
      23             :     EndElement,
      24             :     ProcessingInstruction,
      25             :     CharacterData,
      26             :     UnparsedEntityDecl,
      27             :     NotationDecl,
      28             :     StartNamespaceDecl,
      29             :     EndNamespaceDecl,
      30             :     Comment,
      31             :     StartCdataSection,
      32             :     EndCdataSection,
      33             :     Default,
      34             :     DefaultHandlerExpand,
      35             :     NotStandalone,
      36             :     ExternalEntityRef,
      37             :     StartDoctypeDecl,
      38             :     EndDoctypeDecl,
      39             :     EntityDecl,
      40             :     XmlDecl,
      41             :     ElementDecl,
      42             :     AttlistDecl,
      43             : #if XML_COMBINED_VERSION >= 19504
      44             :     SkippedEntity,
      45             : #endif
      46             :     _DummyDecl
      47             : };
      48             : 
      49             : typedef struct {
      50             :     PyTypeObject *xml_parse_type;
      51             :     PyObject *error;
      52             :     PyObject *str_read;
      53             : } pyexpat_state;
      54             : 
      55             : static inline pyexpat_state*
      56        3227 : pyexpat_get_state(PyObject *module)
      57             : {
      58        3227 :     void *state = PyModule_GetState(module);
      59        3227 :     assert(state != NULL);
      60        3227 :     return (pyexpat_state *)state;
      61             : }
      62             : 
      63             : /* ----------------------------------------------------- */
      64             : 
      65             : /* Declarations for objects of type xmlparser */
      66             : 
      67             : typedef struct {
      68             :     PyObject_HEAD
      69             : 
      70             :     XML_Parser itself;
      71             :     int ordered_attributes;     /* Return attributes as a list. */
      72             :     int specified_attributes;   /* Report only specified attributes. */
      73             :     int in_callback;            /* Is a callback active? */
      74             :     int ns_prefixes;            /* Namespace-triplets mode? */
      75             :     XML_Char *buffer;           /* Buffer used when accumulating characters */
      76             :                                 /* NULL if not enabled */
      77             :     int buffer_size;            /* Size of buffer, in XML_Char units */
      78             :     int buffer_used;            /* Buffer units in use */
      79             :     PyObject *intern;           /* Dictionary to intern strings */
      80             :     PyObject **handlers;
      81             : } xmlparseobject;
      82             : 
      83             : #include "clinic/pyexpat.c.h"
      84             : 
      85             : #define CHARACTER_DATA_BUFFER_SIZE 8192
      86             : 
      87             : typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
      88             : typedef void* xmlhandler;
      89             : 
      90             : struct HandlerInfo {
      91             :     const char *name;
      92             :     xmlhandlersetter setter;
      93             :     xmlhandler handler;
      94             :     PyGetSetDef getset;
      95             : };
      96             : 
      97             : static struct HandlerInfo handler_info[64];
      98             : 
      99             : /* Set an integer attribute on the error object; return true on success,
     100             :  * false on an exception.
     101             :  */
     102             : static int
     103          96 : set_error_attr(PyObject *err, const char *name, int value)
     104             : {
     105          96 :     PyObject *v = PyLong_FromLong(value);
     106             : 
     107          96 :     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
     108           0 :         Py_XDECREF(v);
     109           0 :         return 0;
     110             :     }
     111          96 :     Py_DECREF(v);
     112          96 :     return 1;
     113             : }
     114             : 
     115             : /* Build and set an Expat exception, including positioning
     116             :  * information.  Always returns NULL.
     117             :  */
     118             : static PyObject *
     119          32 : set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
     120             : {
     121             :     PyObject *err;
     122             :     PyObject *buffer;
     123          32 :     XML_Parser parser = self->itself;
     124          32 :     int lineno = XML_GetErrorLineNumber(parser);
     125          32 :     int column = XML_GetErrorColumnNumber(parser);
     126             : 
     127          32 :     buffer = PyUnicode_FromFormat("%s: line %i, column %i",
     128             :                                   XML_ErrorString(code), lineno, column);
     129          32 :     if (buffer == NULL)
     130           0 :         return NULL;
     131          32 :     err = PyObject_CallOneArg(state->error, buffer);
     132          32 :     Py_DECREF(buffer);
     133          32 :     if (  err != NULL
     134          32 :           && set_error_attr(err, "code", code)
     135          32 :           && set_error_attr(err, "offset", column)
     136          32 :           && set_error_attr(err, "lineno", lineno)) {
     137          32 :         PyErr_SetObject(state->error, err);
     138             :     }
     139          32 :     Py_XDECREF(err);
     140          32 :     return NULL;
     141             : }
     142             : 
     143             : static int
     144       27517 : have_handler(xmlparseobject *self, int type)
     145             : {
     146       27517 :     PyObject *handler = self->handlers[type];
     147       27517 :     return handler != NULL;
     148             : }
     149             : 
     150             : /* Convert a string of XML_Chars into a Unicode string.
     151             :    Returns None if str is a null pointer. */
     152             : 
     153             : static PyObject *
     154       15954 : conv_string_to_unicode(const XML_Char *str)
     155             : {
     156             :     /* XXX currently this code assumes that XML_Char is 8-bit,
     157             :        and hence in UTF-8.  */
     158             :     /* UTF-8 from Expat, Unicode desired */
     159       15954 :     if (str == NULL) {
     160         214 :         Py_RETURN_NONE;
     161             :     }
     162       15740 :     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
     163             : }
     164             : 
     165             : static PyObject *
     166       13037 : conv_string_len_to_unicode(const XML_Char *str, int len)
     167             : {
     168             :     /* XXX currently this code assumes that XML_Char is 8-bit,
     169             :        and hence in UTF-8.  */
     170             :     /* UTF-8 from Expat, Unicode desired */
     171       13037 :     if (str == NULL) {
     172           7 :         Py_RETURN_NONE;
     173             :     }
     174       13030 :     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
     175             : }
     176             : 
     177             : /* Callback routines */
     178             : 
     179             : static void clear_handlers(xmlparseobject *self, int initial);
     180             : 
     181             : /* This handler is used when an error has been detected, in the hope
     182             :    that actual parsing can be terminated early.  This will only help
     183             :    if an external entity reference is encountered. */
     184             : static int
     185           0 : error_external_entity_ref_handler(XML_Parser parser,
     186             :                                   const XML_Char *context,
     187             :                                   const XML_Char *base,
     188             :                                   const XML_Char *systemId,
     189             :                                   const XML_Char *publicId)
     190             : {
     191           0 :     return 0;
     192             : }
     193             : 
     194             : /* Dummy character data handler used when an error (exception) has
     195             :    been detected, and the actual parsing can be terminated early.
     196             :    This is needed since character data handler can't be safely removed
     197             :    from within the character data handler, but can be replaced.  It is
     198             :    used only from the character data handler trampoline, and must be
     199             :    used right after `flag_error()` is called. */
     200             : static void
     201           0 : noop_character_data_handler(void *userData, const XML_Char *data, int len)
     202             : {
     203             :     /* Do nothing. */
     204           0 : }
     205             : 
     206             : static void
     207          30 : flag_error(xmlparseobject *self)
     208             : {
     209          30 :     clear_handlers(self, 0);
     210          30 :     XML_SetExternalEntityRefHandler(self->itself,
     211             :                                     error_external_entity_ref_handler);
     212          30 : }
     213             : 
     214             : static PyObject*
     215       27514 : call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
     216             :                 xmlparseobject *self)
     217             : {
     218             :     PyObject *res;
     219             : 
     220       27514 :     res = PyObject_Call(func, args, NULL);
     221       27514 :     if (res == NULL) {
     222          30 :         _PyTraceback_Add(funcname, __FILE__, lineno);
     223          30 :         XML_StopParser(self->itself, XML_FALSE);
     224             :     }
     225       27514 :     return res;
     226             : }
     227             : 
     228             : static PyObject*
     229       15232 : string_intern(xmlparseobject *self, const char* str)
     230             : {
     231       15232 :     PyObject *result = conv_string_to_unicode(str);
     232             :     PyObject *value;
     233             :     /* result can be NULL if the unicode conversion failed. */
     234       15232 :     if (!result)
     235           0 :         return result;
     236       15232 :     if (!self->intern)
     237        1624 :         return result;
     238       13608 :     value = PyDict_GetItemWithError(self->intern, result);
     239       13608 :     if (!value) {
     240        9006 :         if (!PyErr_Occurred() &&
     241        4503 :             PyDict_SetItem(self->intern, result, result) == 0)
     242             :         {
     243        4503 :             return result;
     244             :         }
     245             :         else {
     246           0 :             Py_DECREF(result);
     247           0 :             return NULL;
     248             :         }
     249             :     }
     250        9105 :     Py_INCREF(value);
     251        9105 :     Py_DECREF(result);
     252        9105 :     return value;
     253             : }
     254             : 
     255             : /* Return 0 on success, -1 on exception.
     256             :  * flag_error() will be called before return if needed.
     257             :  */
     258             : static int
     259       11774 : call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
     260             : {
     261             :     PyObject *args;
     262             :     PyObject *temp;
     263             : 
     264       11774 :     if (!have_handler(self, CharacterData))
     265           0 :         return -1;
     266             : 
     267       11774 :     args = PyTuple_New(1);
     268       11774 :     if (args == NULL)
     269           0 :         return -1;
     270       11774 :     temp = (conv_string_len_to_unicode(buffer, len));
     271       11774 :     if (temp == NULL) {
     272           0 :         Py_DECREF(args);
     273           0 :         flag_error(self);
     274           0 :         XML_SetCharacterDataHandler(self->itself,
     275             :                                     noop_character_data_handler);
     276           0 :         return -1;
     277             :     }
     278       11774 :     PyTuple_SET_ITEM(args, 0, temp);
     279             :     /* temp is now a borrowed reference; consider it unused. */
     280       11774 :     self->in_callback = 1;
     281       11774 :     temp = call_with_frame("CharacterData", __LINE__,
     282       11774 :                            self->handlers[CharacterData], args, self);
     283             :     /* temp is an owned reference again, or NULL */
     284       11774 :     self->in_callback = 0;
     285       11774 :     Py_DECREF(args);
     286       11774 :     if (temp == NULL) {
     287           1 :         flag_error(self);
     288           1 :         XML_SetCharacterDataHandler(self->itself,
     289             :                                     noop_character_data_handler);
     290           1 :         return -1;
     291             :     }
     292       11773 :     Py_DECREF(temp);
     293       11773 :     return 0;
     294             : }
     295             : 
     296             : static int
     297       19380 : flush_character_buffer(xmlparseobject *self)
     298             : {
     299             :     int rc;
     300       19380 :     if (self->buffer == NULL || self->buffer_used == 0)
     301       17642 :         return 0;
     302        1738 :     rc = call_character_handler(self, self->buffer, self->buffer_used);
     303        1738 :     self->buffer_used = 0;
     304        1738 :     return rc;
     305             : }
     306             : 
     307             : static void
     308       13595 : my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
     309             : {
     310       13595 :     xmlparseobject *self = (xmlparseobject *) userData;
     311             : 
     312       13595 :     if (PyErr_Occurred())
     313           1 :         return;
     314             : 
     315       13594 :     if (self->buffer == NULL)
     316       10036 :         call_character_handler(self, data, len);
     317             :     else {
     318        3558 :         if ((self->buffer_used + len) > self->buffer_size) {
     319           3 :             if (flush_character_buffer(self) < 0)
     320           0 :                 return;
     321             :             /* handler might have changed; drop the rest on the floor
     322             :              * if there isn't a handler anymore
     323             :              */
     324           3 :             if (!have_handler(self, CharacterData))
     325           0 :                 return;
     326             :         }
     327        3558 :         if (len > self->buffer_size) {
     328           0 :             call_character_handler(self, data, len);
     329           0 :             self->buffer_used = 0;
     330             :         }
     331             :         else {
     332        3558 :             memcpy(self->buffer + self->buffer_used,
     333             :                    data, len * sizeof(XML_Char));
     334        3558 :             self->buffer_used += len;
     335             :         }
     336             :     }
     337             : }
     338             : 
     339             : static void
     340        7064 : my_StartElementHandler(void *userData,
     341             :                        const XML_Char *name, const XML_Char *atts[])
     342             : {
     343        7064 :     xmlparseobject *self = (xmlparseobject *)userData;
     344             : 
     345        7064 :     if (have_handler(self, StartElement)) {
     346             :         PyObject *container, *rv, *args;
     347             :         int i, max;
     348             : 
     349        7064 :         if (PyErr_Occurred())
     350           0 :             return;
     351             : 
     352        7064 :         if (flush_character_buffer(self) < 0)
     353           0 :             return;
     354             :         /* Set max to the number of slots filled in atts[]; max/2 is
     355             :          * the number of attributes we need to process.
     356             :          */
     357        7064 :         if (self->specified_attributes) {
     358         487 :             max = XML_GetSpecifiedAttributeCount(self->itself);
     359             :         }
     360             :         else {
     361        6577 :             max = 0;
     362        7078 :             while (atts[max] != NULL)
     363         501 :                 max += 2;
     364             :         }
     365             :         /* Build the container. */
     366        7064 :         if (self->ordered_attributes)
     367        1428 :             container = PyList_New(max);
     368             :         else
     369        5636 :             container = PyDict_New();
     370        7064 :         if (container == NULL) {
     371           0 :             flag_error(self);
     372           0 :             return;
     373             :         }
     374        7600 :         for (i = 0; i < max; i += 2) {
     375         536 :             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
     376             :             PyObject *v;
     377         536 :             if (n == NULL) {
     378           0 :                 flag_error(self);
     379           0 :                 Py_DECREF(container);
     380           0 :                 return;
     381             :             }
     382         536 :             v = conv_string_to_unicode((XML_Char *) atts[i+1]);
     383         536 :             if (v == NULL) {
     384           0 :                 flag_error(self);
     385           0 :                 Py_DECREF(container);
     386           0 :                 Py_DECREF(n);
     387           0 :                 return;
     388             :             }
     389         536 :             if (self->ordered_attributes) {
     390         278 :                 PyList_SET_ITEM(container, i, n);
     391         278 :                 PyList_SET_ITEM(container, i+1, v);
     392             :             }
     393         258 :             else if (PyDict_SetItem(container, n, v)) {
     394           0 :                 flag_error(self);
     395           0 :                 Py_DECREF(n);
     396           0 :                 Py_DECREF(v);
     397           0 :                 Py_DECREF(container);
     398           0 :                 return;
     399             :             }
     400             :             else {
     401         258 :                 Py_DECREF(n);
     402         258 :                 Py_DECREF(v);
     403             :             }
     404             :         }
     405        7064 :         args = string_intern(self, name);
     406        7064 :         if (args == NULL) {
     407           0 :             Py_DECREF(container);
     408           0 :             return;
     409             :         }
     410        7064 :         args = Py_BuildValue("(NN)", args, container);
     411        7064 :         if (args == NULL) {
     412           0 :             return;
     413             :         }
     414             :         /* Container is now a borrowed reference; ignore it. */
     415        7064 :         self->in_callback = 1;
     416        7064 :         rv = call_with_frame("StartElement", __LINE__,
     417        7064 :                              self->handlers[StartElement], args, self);
     418        7064 :         self->in_callback = 0;
     419        7064 :         Py_DECREF(args);
     420        7064 :         if (rv == NULL) {
     421           4 :             flag_error(self);
     422           4 :             return;
     423             :         }
     424        7060 :         Py_DECREF(rv);
     425             :     }
     426             : }
     427             : 
     428             : #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
     429             :                 RETURN, GETUSERDATA) \
     430             : static RC \
     431             : my_##NAME##Handler PARAMS {\
     432             :     xmlparseobject *self = GETUSERDATA ; \
     433             :     PyObject *args = NULL; \
     434             :     PyObject *rv = NULL; \
     435             :     INIT \
     436             : \
     437             :     if (have_handler(self, NAME)) { \
     438             :         if (PyErr_Occurred()) \
     439             :             return RETURN; \
     440             :         if (flush_character_buffer(self) < 0) \
     441             :             return RETURN; \
     442             :         args = Py_BuildValue PARAM_FORMAT ;\
     443             :         if (!args) { flag_error(self); return RETURN;} \
     444             :         self->in_callback = 1; \
     445             :         rv = call_with_frame(#NAME,__LINE__, \
     446             :                              self->handlers[NAME], args, self); \
     447             :         self->in_callback = 0; \
     448             :         Py_DECREF(args); \
     449             :         if (rv == NULL) { \
     450             :             flag_error(self); \
     451             :             return RETURN; \
     452             :         } \
     453             :         CONVERSION \
     454             :         Py_DECREF(rv); \
     455             :     } \
     456             :     return RETURN; \
     457             : }
     458             : 
     459             : #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
     460             :         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
     461             :         (xmlparseobject *)userData)
     462             : 
     463             : #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
     464             :         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
     465             :                         rc = PyLong_AsLong(rv);, rc, \
     466             :         (xmlparseobject *)userData)
     467             : 
     468        7004 : VOID_HANDLER(EndElement,
     469             :              (void *userData, const XML_Char *name),
     470             :              ("(N)", string_intern(self, name)))
     471             : 
     472          28 : VOID_HANDLER(ProcessingInstruction,
     473             :              (void *userData,
     474             :               const XML_Char *target,
     475             :               const XML_Char *data),
     476             :              ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
     477             : 
     478           4 : VOID_HANDLER(UnparsedEntityDecl,
     479             :              (void *userData,
     480             :               const XML_Char *entityName,
     481             :               const XML_Char *base,
     482             :               const XML_Char *systemId,
     483             :               const XML_Char *publicId,
     484             :               const XML_Char *notationName),
     485             :              ("(NNNNN)",
     486             :               string_intern(self, entityName), string_intern(self, base),
     487             :               string_intern(self, systemId), string_intern(self, publicId),
     488             :               string_intern(self, notationName)))
     489             : 
     490          13 : VOID_HANDLER(EntityDecl,
     491             :              (void *userData,
     492             :               const XML_Char *entityName,
     493             :               int is_parameter_entity,
     494             :               const XML_Char *value,
     495             :               int value_length,
     496             :               const XML_Char *base,
     497             :               const XML_Char *systemId,
     498             :               const XML_Char *publicId,
     499             :               const XML_Char *notationName),
     500             :              ("NiNNNNN",
     501             :               string_intern(self, entityName), is_parameter_entity,
     502             :               (conv_string_len_to_unicode(value, value_length)),
     503             :               string_intern(self, base), string_intern(self, systemId),
     504             :               string_intern(self, publicId),
     505             :               string_intern(self, notationName)))
     506             : 
     507          29 : VOID_HANDLER(XmlDecl,
     508             :              (void *userData,
     509             :               const XML_Char *version,
     510             :               const XML_Char *encoding,
     511             :               int standalone),
     512             :              ("(O&O&i)",
     513             :               conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
     514             :               standalone))
     515             : 
     516             : static PyObject *
     517           5 : conv_content_model(XML_Content * const model,
     518             :                    PyObject *(*conv_string)(const XML_Char *))
     519             : {
     520           5 :     PyObject *result = NULL;
     521           5 :     PyObject *children = PyTuple_New(model->numchildren);
     522             :     int i;
     523             : 
     524           5 :     if (children != NULL) {
     525           5 :         assert(model->numchildren < INT_MAX);
     526           5 :         for (i = 0; i < (int)model->numchildren; ++i) {
     527           0 :             PyObject *child = conv_content_model(&model->children[i],
     528             :                                                  conv_string);
     529           0 :             if (child == NULL) {
     530           0 :                 Py_XDECREF(children);
     531           0 :                 return NULL;
     532             :             }
     533           0 :             PyTuple_SET_ITEM(children, i, child);
     534             :         }
     535           5 :         result = Py_BuildValue("(iiO&N)",
     536           5 :                                model->type, model->quant,
     537             :                                conv_string,model->name, children);
     538             :     }
     539           5 :     return result;
     540             : }
     541             : 
     542             : static void
     543           5 : my_ElementDeclHandler(void *userData,
     544             :                       const XML_Char *name,
     545             :                       XML_Content *model)
     546             : {
     547           5 :     xmlparseobject *self = (xmlparseobject *)userData;
     548           5 :     PyObject *args = NULL;
     549             : 
     550           5 :     if (have_handler(self, ElementDecl)) {
     551           5 :         PyObject *rv = NULL;
     552             :         PyObject *modelobj, *nameobj;
     553             : 
     554           5 :         if (PyErr_Occurred())
     555           0 :             return;
     556             : 
     557           5 :         if (flush_character_buffer(self) < 0)
     558           0 :             goto finally;
     559           5 :         modelobj = conv_content_model(model, (conv_string_to_unicode));
     560           5 :         if (modelobj == NULL) {
     561           0 :             flag_error(self);
     562           0 :             goto finally;
     563             :         }
     564           5 :         nameobj = string_intern(self, name);
     565           5 :         if (nameobj == NULL) {
     566           0 :             Py_DECREF(modelobj);
     567           0 :             flag_error(self);
     568           0 :             goto finally;
     569             :         }
     570           5 :         args = Py_BuildValue("NN", nameobj, modelobj);
     571           5 :         if (args == NULL) {
     572           0 :             flag_error(self);
     573           0 :             goto finally;
     574             :         }
     575           5 :         self->in_callback = 1;
     576           5 :         rv = call_with_frame("ElementDecl", __LINE__,
     577           5 :                              self->handlers[ElementDecl], args, self);
     578           5 :         self->in_callback = 0;
     579           5 :         if (rv == NULL) {
     580           0 :             flag_error(self);
     581           0 :             goto finally;
     582             :         }
     583           5 :         Py_DECREF(rv);
     584             :     }
     585           0 :  finally:
     586           5 :     Py_XDECREF(args);
     587           5 :     XML_FreeContentModel(self->itself, model);
     588           5 :     return;
     589             : }
     590             : 
     591          15 : VOID_HANDLER(AttlistDecl,
     592             :              (void *userData,
     593             :               const XML_Char *elname,
     594             :               const XML_Char *attname,
     595             :               const XML_Char *att_type,
     596             :               const XML_Char *dflt,
     597             :               int isrequired),
     598             :              ("(NNO&O&i)",
     599             :               string_intern(self, elname), string_intern(self, attname),
     600             :               conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
     601             :               isrequired))
     602             : 
     603             : #if XML_COMBINED_VERSION >= 19504
     604           3 : VOID_HANDLER(SkippedEntity,
     605             :              (void *userData,
     606             :               const XML_Char *entityName,
     607             :               int is_parameter_entity),
     608             :              ("Ni",
     609             :               string_intern(self, entityName), is_parameter_entity))
     610             : #endif
     611             : 
     612           6 : VOID_HANDLER(NotationDecl,
     613             :                 (void *userData,
     614             :                         const XML_Char *notationName,
     615             :                         const XML_Char *base,
     616             :                         const XML_Char *systemId,
     617             :                         const XML_Char *publicId),
     618             :                 ("(NNNN)",
     619             :                  string_intern(self, notationName), string_intern(self, base),
     620             :                  string_intern(self, systemId), string_intern(self, publicId)))
     621             : 
     622         168 : VOID_HANDLER(StartNamespaceDecl,
     623             :                 (void *userData,
     624             :                       const XML_Char *prefix,
     625             :                       const XML_Char *uri),
     626             :                 ("(NN)",
     627             :                  string_intern(self, prefix), string_intern(self, uri)))
     628             : 
     629          30 : VOID_HANDLER(EndNamespaceDecl,
     630             :                 (void *userData,
     631             :                     const XML_Char *prefix),
     632             :                 ("(N)", string_intern(self, prefix)))
     633             : 
     634          54 : VOID_HANDLER(Comment,
     635             :                (void *userData, const XML_Char *data),
     636             :                 ("(O&)", conv_string_to_unicode ,data))
     637             : 
     638           7 : VOID_HANDLER(StartCdataSection,
     639             :                (void *userData),
     640             :                 ("()"))
     641             : 
     642           7 : VOID_HANDLER(EndCdataSection,
     643             :                (void *userData),
     644             :                 ("()"))
     645             : 
     646         153 : VOID_HANDLER(Default,
     647             :               (void *userData, const XML_Char *s, int len),
     648             :               ("(N)", (conv_string_len_to_unicode(s,len))))
     649             : 
     650        1097 : VOID_HANDLER(DefaultHandlerExpand,
     651             :               (void *userData, const XML_Char *s, int len),
     652             :               ("(N)", (conv_string_len_to_unicode(s,len))))
     653             : #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
     654             : 
     655           6 : INT_HANDLER(NotStandalone,
     656             :                 (void *userData),
     657             :                 ("()"))
     658             : 
     659          11 : RC_HANDLER(int, ExternalEntityRef,
     660             :                 (XML_Parser parser,
     661             :                     const XML_Char *context,
     662             :                     const XML_Char *base,
     663             :                     const XML_Char *systemId,
     664             :                     const XML_Char *publicId),
     665             :                 int rc=0;,
     666             :                 ("(O&NNN)",
     667             :                  conv_string_to_unicode ,context, string_intern(self, base),
     668             :                  string_intern(self, systemId), string_intern(self, publicId)),
     669             :                 rc = PyLong_AsLong(rv);, rc,
     670             :                 XML_GetUserData(parser))
     671             : 
     672             : /* XXX UnknownEncodingHandler */
     673             : 
     674          18 : VOID_HANDLER(StartDoctypeDecl,
     675             :              (void *userData, const XML_Char *doctypeName,
     676             :               const XML_Char *sysid, const XML_Char *pubid,
     677             :               int has_internal_subset),
     678             :              ("(NNNi)", string_intern(self, doctypeName),
     679             :               string_intern(self, sysid), string_intern(self, pubid),
     680             :               has_internal_subset))
     681             : 
     682          18 : VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
     683             : 
     684             : /* ---------------------------------------------------------------- */
     685             : /*[clinic input]
     686             : class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
     687             : [clinic start generated code]*/
     688             : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
     689             : 
     690             : 
     691             : static PyObject *
     692        2353 : get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
     693             : {
     694        2353 :     if (PyErr_Occurred()) {
     695          36 :         return NULL;
     696             :     }
     697        2317 :     if (rv == 0) {
     698          32 :         return set_error(state, self, XML_GetErrorCode(self->itself));
     699             :     }
     700        2285 :     if (flush_character_buffer(self) < 0) {
     701           0 :         return NULL;
     702             :     }
     703        2285 :     return PyLong_FromLong(rv);
     704             : }
     705             : 
     706             : #define MAX_CHUNK_SIZE (1 << 20)
     707             : 
     708             : /*[clinic input]
     709             : pyexpat.xmlparser.Parse
     710             : 
     711             :     cls: defining_class
     712             :     data: object
     713             :     isfinal: bool(accept={int}) = False
     714             :     /
     715             : 
     716             : Parse XML data.
     717             : 
     718             : `isfinal' should be true at end of input.
     719             : [clinic start generated code]*/
     720             : 
     721             : static PyObject *
     722        2211 : pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
     723             :                              PyObject *data, int isfinal)
     724             : /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
     725             : {
     726             :     const char *s;
     727             :     Py_ssize_t slen;
     728             :     Py_buffer view;
     729             :     int rc;
     730        2211 :     pyexpat_state *state = PyType_GetModuleState(cls);
     731             : 
     732        2211 :     if (PyUnicode_Check(data)) {
     733         904 :         view.buf = NULL;
     734         904 :         s = PyUnicode_AsUTF8AndSize(data, &slen);
     735         904 :         if (s == NULL)
     736           0 :             return NULL;
     737             :         /* Explicitly set UTF-8 encoding. Return code ignored. */
     738         904 :         (void)XML_SetEncoding(self->itself, "utf-8");
     739             :     }
     740             :     else {
     741        1307 :         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
     742           0 :             return NULL;
     743        1307 :         s = view.buf;
     744        1307 :         slen = view.len;
     745             :     }
     746             : 
     747             :     static_assert(MAX_CHUNK_SIZE <= INT_MAX,
     748             :                   "MAX_CHUNK_SIZE is larger than INT_MAX");
     749        2211 :     while (slen > MAX_CHUNK_SIZE) {
     750           0 :         rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
     751           0 :         if (!rc)
     752           0 :             goto done;
     753           0 :         s += MAX_CHUNK_SIZE;
     754           0 :         slen -= MAX_CHUNK_SIZE;
     755             :     }
     756             : 
     757        2211 :     assert(slen <= INT_MAX);
     758        2211 :     rc = XML_Parse(self->itself, s, (int)slen, isfinal);
     759             : 
     760        2211 : done:
     761        2211 :     if (view.buf != NULL) {
     762        1307 :         PyBuffer_Release(&view);
     763             :     }
     764        2211 :     return get_parse_result(state, self, rc);
     765             : }
     766             : 
     767             : /* File reading copied from cPickle */
     768             : 
     769             : #define BUF_SIZE 2048
     770             : 
     771             : static int
     772         308 : readinst(char *buf, int buf_size, PyObject *meth)
     773             : {
     774             :     PyObject *str;
     775             :     Py_ssize_t len;
     776             :     const char *ptr;
     777             : 
     778         308 :     str = PyObject_CallFunction(meth, "n", buf_size);
     779         308 :     if (str == NULL)
     780           0 :         goto error;
     781             : 
     782         308 :     if (PyBytes_Check(str))
     783         308 :         ptr = PyBytes_AS_STRING(str);
     784           0 :     else if (PyByteArray_Check(str))
     785           0 :         ptr = PyByteArray_AS_STRING(str);
     786             :     else {
     787           0 :         PyErr_Format(PyExc_TypeError,
     788             :                      "read() did not return a bytes object (type=%.400s)",
     789           0 :                      Py_TYPE(str)->tp_name);
     790           0 :         goto error;
     791             :     }
     792         308 :     len = Py_SIZE(str);
     793         308 :     if (len > buf_size) {
     794           0 :         PyErr_Format(PyExc_ValueError,
     795             :                      "read() returned too much data: "
     796             :                      "%i bytes requested, %zd returned",
     797             :                      buf_size, len);
     798           0 :         goto error;
     799             :     }
     800         308 :     memcpy(buf, ptr, len);
     801         308 :     Py_DECREF(str);
     802             :     /* len <= buf_size <= INT_MAX */
     803         308 :     return (int)len;
     804             : 
     805           0 : error:
     806           0 :     Py_XDECREF(str);
     807           0 :     return -1;
     808             : }
     809             : 
     810             : /*[clinic input]
     811             : pyexpat.xmlparser.ParseFile
     812             : 
     813             :     cls: defining_class
     814             :     file: object
     815             :     /
     816             : 
     817             : Parse XML data from file-like object.
     818             : [clinic start generated code]*/
     819             : 
     820             : static PyObject *
     821         156 : pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
     822             :                                  PyObject *file)
     823             : /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
     824             : {
     825         156 :     int rv = 1;
     826         156 :     PyObject *readmethod = NULL;
     827             : 
     828         156 :     pyexpat_state *state = PyType_GetModuleState(cls);
     829             : 
     830         156 :     if (_PyObject_LookupAttr(file, state->str_read, &readmethod) < 0) {
     831           0 :         return NULL;
     832             :     }
     833         156 :     if (readmethod == NULL) {
     834           0 :         PyErr_SetString(PyExc_TypeError,
     835             :                         "argument must have 'read' attribute");
     836           0 :         return NULL;
     837             :     }
     838         153 :     for (;;) {
     839             :         int bytes_read;
     840         309 :         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
     841         309 :         if (buf == NULL) {
     842           1 :             Py_XDECREF(readmethod);
     843           1 :             return get_parse_result(state, self, 0);
     844             :         }
     845             : 
     846         308 :         bytes_read = readinst(buf, BUF_SIZE, readmethod);
     847         308 :         if (bytes_read < 0) {
     848           0 :             Py_DECREF(readmethod);
     849           0 :             return NULL;
     850             :         }
     851         308 :         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
     852         308 :         if (PyErr_Occurred()) {
     853          14 :             Py_XDECREF(readmethod);
     854          14 :             return NULL;
     855             :         }
     856             : 
     857         294 :         if (!rv || bytes_read == 0)
     858             :             break;
     859             :     }
     860         141 :     Py_XDECREF(readmethod);
     861         141 :     return get_parse_result(state, self, rv);
     862             : }
     863             : 
     864             : /*[clinic input]
     865             : pyexpat.xmlparser.SetBase
     866             : 
     867             :     base: str
     868             :     /
     869             : 
     870             : Set the base URL for the parser.
     871             : [clinic start generated code]*/
     872             : 
     873             : static PyObject *
     874          51 : pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
     875             : /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
     876             : {
     877          51 :     if (!XML_SetBase(self->itself, base)) {
     878           0 :         return PyErr_NoMemory();
     879             :     }
     880          51 :     Py_RETURN_NONE;
     881             : }
     882             : 
     883             : /*[clinic input]
     884             : pyexpat.xmlparser.GetBase
     885             : 
     886             : Return base URL string for the parser.
     887             : [clinic start generated code]*/
     888             : 
     889             : static PyObject *
     890           0 : pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
     891             : /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
     892             : {
     893           0 :     return Py_BuildValue("z", XML_GetBase(self->itself));
     894             : }
     895             : 
     896             : /*[clinic input]
     897             : pyexpat.xmlparser.GetInputContext
     898             : 
     899             : Return the untranslated text of the input that caused the current event.
     900             : 
     901             : If the event was generated by a large amount of text (such as a start tag
     902             : for an element with many attributes), not all of the text may be available.
     903             : [clinic start generated code]*/
     904             : 
     905             : static PyObject *
     906           0 : pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
     907             : /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
     908             : {
     909           0 :     if (self->in_callback) {
     910             :         int offset, size;
     911             :         const char *buffer
     912           0 :             = XML_GetInputContext(self->itself, &offset, &size);
     913             : 
     914           0 :         if (buffer != NULL)
     915           0 :             return PyBytes_FromStringAndSize(buffer + offset,
     916           0 :                                               size - offset);
     917             :         else
     918           0 :             Py_RETURN_NONE;
     919             :     }
     920             :     else
     921           0 :         Py_RETURN_NONE;
     922             : }
     923             : 
     924             : /*[clinic input]
     925             : pyexpat.xmlparser.ExternalEntityParserCreate
     926             : 
     927             :     cls: defining_class
     928             :     context: str(accept={str, NoneType})
     929             :     encoding: str = NULL
     930             :     /
     931             : 
     932             : Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
     933             : [clinic start generated code]*/
     934             : 
     935             : static PyObject *
     936           2 : pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
     937             :                                                   PyTypeObject *cls,
     938             :                                                   const char *context,
     939             :                                                   const char *encoding)
     940             : /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
     941             : {
     942             :     xmlparseobject *new_parser;
     943             :     int i;
     944             : 
     945           2 :     pyexpat_state *state = PyType_GetModuleState(cls);
     946             : 
     947           2 :     new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
     948           2 :     if (new_parser == NULL) {
     949           0 :         return NULL;
     950             :     }
     951             : 
     952           2 :     new_parser->buffer_size = self->buffer_size;
     953           2 :     new_parser->buffer_used = 0;
     954           2 :     new_parser->buffer = NULL;
     955           2 :     new_parser->ordered_attributes = self->ordered_attributes;
     956           2 :     new_parser->specified_attributes = self->specified_attributes;
     957           2 :     new_parser->in_callback = 0;
     958           2 :     new_parser->ns_prefixes = self->ns_prefixes;
     959           2 :     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
     960             :                                                         encoding);
     961           2 :     new_parser->handlers = 0;
     962           2 :     new_parser->intern = self->intern;
     963           2 :     Py_XINCREF(new_parser->intern);
     964             : 
     965           2 :     if (self->buffer != NULL) {
     966           1 :         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
     967           1 :         if (new_parser->buffer == NULL) {
     968           0 :             Py_DECREF(new_parser);
     969           0 :             return PyErr_NoMemory();
     970             :         }
     971             :     }
     972           2 :     if (!new_parser->itself) {
     973           0 :         Py_DECREF(new_parser);
     974           0 :         return PyErr_NoMemory();
     975             :     }
     976             : 
     977           2 :     XML_SetUserData(new_parser->itself, (void *)new_parser);
     978             : 
     979             :     /* allocate and clear handlers first */
     980          46 :     for (i = 0; handler_info[i].name != NULL; i++)
     981             :         /* do nothing */;
     982             : 
     983           2 :     new_parser->handlers = PyMem_New(PyObject *, i);
     984           2 :     if (!new_parser->handlers) {
     985           0 :         Py_DECREF(new_parser);
     986           0 :         return PyErr_NoMemory();
     987             :     }
     988           2 :     clear_handlers(new_parser, 1);
     989             : 
     990             :     /* then copy handlers from self */
     991          46 :     for (i = 0; handler_info[i].name != NULL; i++) {
     992          44 :         PyObject *handler = self->handlers[i];
     993          44 :         if (handler != NULL) {
     994          11 :             Py_INCREF(handler);
     995          11 :             new_parser->handlers[i] = handler;
     996          11 :             handler_info[i].setter(new_parser->itself,
     997             :                                    handler_info[i].handler);
     998             :         }
     999             :     }
    1000             : 
    1001           2 :     PyObject_GC_Track(new_parser);
    1002           2 :     return (PyObject *)new_parser;
    1003             : }
    1004             : 
    1005             : /*[clinic input]
    1006             : pyexpat.xmlparser.SetParamEntityParsing
    1007             : 
    1008             :     flag: int
    1009             :     /
    1010             : 
    1011             : Controls parsing of parameter entities (including the external DTD subset).
    1012             : 
    1013             : Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
    1014             : XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
    1015             : XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
    1016             : was successful.
    1017             : [clinic start generated code]*/
    1018             : 
    1019             : static PyObject *
    1020         246 : pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
    1021             : /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
    1022             : {
    1023         246 :     flag = XML_SetParamEntityParsing(self->itself, flag);
    1024         246 :     return PyLong_FromLong(flag);
    1025             : }
    1026             : 
    1027             : 
    1028             : #if XML_COMBINED_VERSION >= 19505
    1029             : /*[clinic input]
    1030             : pyexpat.xmlparser.UseForeignDTD
    1031             : 
    1032             :     cls: defining_class
    1033             :     flag: bool = True
    1034             :     /
    1035             : 
    1036             : Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
    1037             : 
    1038             : This readily allows the use of a 'default' document type controlled by the
    1039             : application, while still getting the advantage of providing document type
    1040             : information to the parser. 'flag' defaults to True if not provided.
    1041             : [clinic start generated code]*/
    1042             : 
    1043             : static PyObject *
    1044           3 : pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
    1045             :                                      int flag)
    1046             : /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
    1047             : {
    1048           3 :     pyexpat_state *state = PyType_GetModuleState(cls);
    1049             :     enum XML_Error rc;
    1050             : 
    1051           3 :     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
    1052           3 :     if (rc != XML_ERROR_NONE) {
    1053           0 :         return set_error(state, self, rc);
    1054             :     }
    1055           3 :     Py_RETURN_NONE;
    1056             : }
    1057             : #endif
    1058             : 
    1059             : static struct PyMethodDef xmlparse_methods[] = {
    1060             :     PYEXPAT_XMLPARSER_PARSE_METHODDEF
    1061             :     PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
    1062             :     PYEXPAT_XMLPARSER_SETBASE_METHODDEF
    1063             :     PYEXPAT_XMLPARSER_GETBASE_METHODDEF
    1064             :     PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
    1065             :     PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
    1066             :     PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
    1067             : #if XML_COMBINED_VERSION >= 19505
    1068             :     PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
    1069             : #endif
    1070             :     {NULL, NULL}  /* sentinel */
    1071             : };
    1072             : 
    1073             : /* ---------- */
    1074             : 
    1075             : 
    1076             : 
    1077             : /* pyexpat international encoding support.
    1078             :    Make it as simple as possible.
    1079             : */
    1080             : 
    1081             : static int
    1082         188 : PyUnknownEncodingHandler(void *encodingHandlerData,
    1083             :                          const XML_Char *name,
    1084             :                          XML_Encoding *info)
    1085             : {
    1086             :     static unsigned char template_buffer[256] = {0};
    1087             :     PyObject* u;
    1088             :     int i;
    1089             :     const void *data;
    1090             :     int kind;
    1091             : 
    1092         188 :     if (PyErr_Occurred())
    1093           0 :         return XML_STATUS_ERROR;
    1094             : 
    1095         188 :     if (template_buffer[1] == 0) {
    1096        1028 :         for (i = 0; i < 256; i++)
    1097        1024 :             template_buffer[i] = i;
    1098             :     }
    1099             : 
    1100         188 :     u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
    1101         188 :     if (u == NULL || PyUnicode_READY(u)) {
    1102           4 :         Py_XDECREF(u);
    1103           4 :         return XML_STATUS_ERROR;
    1104             :     }
    1105             : 
    1106         184 :     if (PyUnicode_GET_LENGTH(u) != 256) {
    1107          36 :         Py_DECREF(u);
    1108          36 :         PyErr_SetString(PyExc_ValueError,
    1109             :                         "multi-byte encodings are not supported");
    1110          36 :         return XML_STATUS_ERROR;
    1111             :     }
    1112             : 
    1113         148 :     kind = PyUnicode_KIND(u);
    1114         148 :     data = PyUnicode_DATA(u);
    1115       38036 :     for (i = 0; i < 256; i++) {
    1116       37888 :         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
    1117       37888 :         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
    1118       34638 :             info->map[i] = ch;
    1119             :         else
    1120        3250 :             info->map[i] = -1;
    1121             :     }
    1122             : 
    1123         148 :     info->data = NULL;
    1124         148 :     info->convert = NULL;
    1125         148 :     info->release = NULL;
    1126         148 :     Py_DECREF(u);
    1127             : 
    1128         148 :     return XML_STATUS_OK;
    1129             : }
    1130             : 
    1131             : 
    1132             : static PyObject *
    1133        1386 : newxmlparseobject(pyexpat_state *state, const char *encoding,
    1134             :                   const char *namespace_separator, PyObject *intern)
    1135             : {
    1136             :     int i;
    1137             :     xmlparseobject *self;
    1138             : 
    1139        1386 :     self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
    1140        1386 :     if (self == NULL)
    1141           0 :         return NULL;
    1142             : 
    1143        1386 :     self->buffer = NULL;
    1144        1386 :     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
    1145        1386 :     self->buffer_used = 0;
    1146        1386 :     self->ordered_attributes = 0;
    1147        1386 :     self->specified_attributes = 0;
    1148        1386 :     self->in_callback = 0;
    1149        1386 :     self->ns_prefixes = 0;
    1150        1386 :     self->handlers = NULL;
    1151        1386 :     self->intern = intern;
    1152        1386 :     Py_XINCREF(self->intern);
    1153             : 
    1154             :     /* namespace_separator is either NULL or contains one char + \0 */
    1155        1386 :     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
    1156             :                                        namespace_separator);
    1157        1386 :     if (self->itself == NULL) {
    1158           0 :         PyErr_SetString(PyExc_RuntimeError,
    1159             :                         "XML_ParserCreate failed");
    1160           0 :         Py_DECREF(self);
    1161           0 :         return NULL;
    1162             :     }
    1163             : #if XML_COMBINED_VERSION >= 20100
    1164             :     /* This feature was added upstream in libexpat 2.1.0. */
    1165        1386 :     XML_SetHashSalt(self->itself,
    1166        1386 :                     (unsigned long)_Py_HashSecret.expat.hashsalt);
    1167             : #endif
    1168        1386 :     XML_SetUserData(self->itself, (void *)self);
    1169        1386 :     XML_SetUnknownEncodingHandler(self->itself,
    1170             :                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
    1171             : 
    1172       31878 :     for (i = 0; handler_info[i].name != NULL; i++)
    1173             :         /* do nothing */;
    1174             : 
    1175        1386 :     self->handlers = PyMem_New(PyObject *, i);
    1176        1386 :     if (!self->handlers) {
    1177           0 :         Py_DECREF(self);
    1178           0 :         return PyErr_NoMemory();
    1179             :     }
    1180        1386 :     clear_handlers(self, 1);
    1181             : 
    1182        1386 :     PyObject_GC_Track(self);
    1183        1386 :     return (PyObject*)self;
    1184             : }
    1185             : 
    1186             : static int
    1187        1864 : xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
    1188             : {
    1189       42872 :     for (int i = 0; handler_info[i].name != NULL; i++) {
    1190       41008 :         Py_VISIT(op->handlers[i]);
    1191             :     }
    1192        1864 :     Py_VISIT(Py_TYPE(op));
    1193        1864 :     return 0;
    1194             : }
    1195             : 
    1196             : static int
    1197        1389 : xmlparse_clear(xmlparseobject *op)
    1198             : {
    1199        1389 :     clear_handlers(op, 0);
    1200        1389 :     Py_CLEAR(op->intern);
    1201        1389 :     return 0;
    1202             : }
    1203             : 
    1204             : static void
    1205        1388 : xmlparse_dealloc(xmlparseobject *self)
    1206             : {
    1207        1388 :     PyObject_GC_UnTrack(self);
    1208        1388 :     (void)xmlparse_clear(self);
    1209        1388 :     if (self->itself != NULL)
    1210        1388 :         XML_ParserFree(self->itself);
    1211        1388 :     self->itself = NULL;
    1212             : 
    1213        1388 :     if (self->handlers != NULL) {
    1214        1388 :         PyMem_Free(self->handlers);
    1215        1388 :         self->handlers = NULL;
    1216             :     }
    1217        1388 :     if (self->buffer != NULL) {
    1218         611 :         PyMem_Free(self->buffer);
    1219         611 :         self->buffer = NULL;
    1220             :     }
    1221        1388 :     PyTypeObject *tp = Py_TYPE(self);
    1222        1388 :     PyObject_GC_Del(self);
    1223        1388 :     Py_DECREF(tp);
    1224        1388 : }
    1225             : 
    1226             : 
    1227             : static PyObject *
    1228           0 : xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
    1229             : {
    1230           0 :     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
    1231           0 :     int handlernum = (int)(hi - handler_info);
    1232           0 :     PyObject *result = self->handlers[handlernum];
    1233           0 :     if (result == NULL)
    1234           0 :         result = Py_None;
    1235           0 :     Py_INCREF(result);
    1236           0 :     return result;
    1237             : }
    1238             : 
    1239             : static int
    1240        8844 : xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
    1241             : {
    1242        8844 :     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
    1243        8844 :     int handlernum = (int)(hi - handler_info);
    1244        8844 :     if (v == NULL) {
    1245           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1246           0 :         return -1;
    1247             :     }
    1248        8844 :     if (handlernum == CharacterData) {
    1249             :         /* If we're changing the character data handler, flush all
    1250             :          * cached data with the old handler.  Not sure there's a
    1251             :          * "right" thing to do, though, but this probably won't
    1252             :          * happen.
    1253             :          */
    1254        1349 :         if (flush_character_buffer(self) < 0)
    1255           0 :             return -1;
    1256             :     }
    1257             : 
    1258        8844 :     xmlhandler c_handler = NULL;
    1259        8844 :     if (v == Py_None) {
    1260             :         /* If this is the character data handler, and a character
    1261             :            data handler is already active, we need to be more
    1262             :            careful.  What we can safely do is replace the existing
    1263             :            character data handler callback function with a no-op
    1264             :            function that will refuse to call Python.  The downside
    1265             :            is that this doesn't completely remove the character
    1266             :            data handler from the C layer if there's any callback
    1267             :            active, so Expat does a little more work than it
    1268             :            otherwise would, but that's really an odd case.  A more
    1269             :            elaborate system of handlers and state could remove the
    1270             :            C handler more effectively. */
    1271          15 :         if (handlernum == CharacterData && self->in_callback)
    1272           0 :             c_handler = noop_character_data_handler;
    1273          15 :         v = NULL;
    1274             :     }
    1275        8829 :     else if (v != NULL) {
    1276        8829 :         Py_INCREF(v);
    1277        8829 :         c_handler = handler_info[handlernum].handler;
    1278             :     }
    1279        8844 :     Py_XSETREF(self->handlers[handlernum], v);
    1280        8844 :     handler_info[handlernum].setter(self->itself, c_handler);
    1281        8844 :     return 0;
    1282             : }
    1283             : 
    1284             : #define INT_GETTER(name) \
    1285             :     static PyObject * \
    1286             :     xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
    1287             :     { \
    1288             :         return PyLong_FromLong((long) XML_Get##name(self->itself)); \
    1289             :     }
    1290           0 : INT_GETTER(ErrorCode)
    1291          14 : INT_GETTER(ErrorLineNumber)
    1292          14 : INT_GETTER(ErrorColumnNumber)
    1293           0 : INT_GETTER(ErrorByteIndex)
    1294          17 : INT_GETTER(CurrentLineNumber)
    1295           6 : INT_GETTER(CurrentColumnNumber)
    1296           6 : INT_GETTER(CurrentByteIndex)
    1297             : 
    1298             : #undef INT_GETTER
    1299             : 
    1300             : static PyObject *
    1301           9 : xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
    1302             : {
    1303           9 :     return PyBool_FromLong(self->buffer != NULL);
    1304             : }
    1305             : 
    1306             : static int
    1307         618 : xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
    1308             : {
    1309         618 :     if (v == NULL) {
    1310           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1311           0 :         return -1;
    1312             :     }
    1313         618 :     int b = PyObject_IsTrue(v);
    1314         618 :     if (b < 0)
    1315           0 :         return -1;
    1316         618 :     if (b) {
    1317         614 :         if (self->buffer == NULL) {
    1318         610 :             self->buffer = PyMem_Malloc(self->buffer_size);
    1319         610 :             if (self->buffer == NULL) {
    1320           0 :                 PyErr_NoMemory();
    1321           0 :                 return -1;
    1322             :             }
    1323         610 :             self->buffer_used = 0;
    1324             :         }
    1325             :     }
    1326           4 :     else if (self->buffer != NULL) {
    1327           3 :         if (flush_character_buffer(self) < 0)
    1328           0 :             return -1;
    1329           3 :         PyMem_Free(self->buffer);
    1330           3 :         self->buffer = NULL;
    1331             :     }
    1332         618 :     return 0;
    1333             : }
    1334             : 
    1335             : static PyObject *
    1336          11 : xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
    1337             : {
    1338          11 :     return PyLong_FromLong((long) self->buffer_size);
    1339             : }
    1340             : 
    1341             : static int
    1342          13 : xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
    1343             : {
    1344          13 :     if (v == NULL) {
    1345           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1346           0 :         return -1;
    1347             :     }
    1348             :     long new_buffer_size;
    1349          13 :     if (!PyLong_Check(v)) {
    1350           1 :         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
    1351           1 :         return -1;
    1352             :     }
    1353             : 
    1354          12 :     new_buffer_size = PyLong_AsLong(v);
    1355          12 :     if (new_buffer_size <= 0) {
    1356           3 :         if (!PyErr_Occurred())
    1357           2 :             PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
    1358           3 :         return -1;
    1359             :     }
    1360             : 
    1361             :     /* trivial case -- no change */
    1362           9 :     if (new_buffer_size == self->buffer_size) {
    1363           1 :         return 0;
    1364             :     }
    1365             : 
    1366             :     /* check maximum */
    1367           8 :     if (new_buffer_size > INT_MAX) {
    1368             :         char errmsg[100];
    1369           0 :         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
    1370           0 :         PyErr_SetString(PyExc_ValueError, errmsg);
    1371           0 :         return -1;
    1372             :     }
    1373             : 
    1374           8 :     if (self->buffer != NULL) {
    1375             :         /* there is already a buffer */
    1376           5 :         if (self->buffer_used != 0) {
    1377           0 :             if (flush_character_buffer(self) < 0) {
    1378           0 :                 return -1;
    1379             :             }
    1380             :         }
    1381             :         /* free existing buffer */
    1382           5 :         PyMem_Free(self->buffer);
    1383             :     }
    1384           8 :     self->buffer = PyMem_Malloc(new_buffer_size);
    1385           8 :     if (self->buffer == NULL) {
    1386           0 :         PyErr_NoMemory();
    1387           0 :         return -1;
    1388             :     }
    1389           8 :     self->buffer_size = new_buffer_size;
    1390           8 :     return 0;
    1391             : }
    1392             : 
    1393             : static PyObject *
    1394           0 : xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
    1395             : {
    1396           0 :     return PyLong_FromLong((long) self->buffer_used);
    1397             : }
    1398             : 
    1399             : static PyObject *
    1400           5 : xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
    1401             : {
    1402           5 :     return PyBool_FromLong(self->ns_prefixes);
    1403             : }
    1404             : 
    1405             : static int
    1406         115 : xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1407             : {
    1408         115 :     if (v == NULL) {
    1409           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1410           0 :         return -1;
    1411             :     }
    1412         115 :     int b = PyObject_IsTrue(v);
    1413         115 :     if (b < 0)
    1414           0 :         return -1;
    1415         115 :     self->ns_prefixes = b;
    1416         115 :     XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
    1417         115 :     return 0;
    1418             : }
    1419             : 
    1420             : static PyObject *
    1421           5 : xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
    1422             : {
    1423           5 :     return PyBool_FromLong(self->ordered_attributes);
    1424             : }
    1425             : 
    1426             : static int
    1427         611 : xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1428             : {
    1429         611 :     if (v == NULL) {
    1430           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1431           0 :         return -1;
    1432             :     }
    1433         611 :     int b = PyObject_IsTrue(v);
    1434         611 :     if (b < 0)
    1435           0 :         return -1;
    1436         611 :     self->ordered_attributes = b;
    1437         611 :     return 0;
    1438             : }
    1439             : 
    1440             : static PyObject *
    1441           5 : xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
    1442             : {
    1443           5 :     return PyBool_FromLong((long) self->specified_attributes);
    1444             : }
    1445             : 
    1446             : static int
    1447         103 : xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
    1448             : {
    1449         103 :     if (v == NULL) {
    1450           0 :         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
    1451           0 :         return -1;
    1452             :     }
    1453         103 :     int b = PyObject_IsTrue(v);
    1454         103 :     if (b < 0)
    1455           0 :         return -1;
    1456         103 :     self->specified_attributes = b;
    1457         103 :     return 0;
    1458             : }
    1459             : 
    1460             : static PyMemberDef xmlparse_members[] = {
    1461             :     {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
    1462             :     {NULL}
    1463             : };
    1464             : 
    1465             : #define XMLPARSE_GETTER_DEF(name) \
    1466             :     {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
    1467             : #define XMLPARSE_GETTER_SETTER_DEF(name) \
    1468             :     {#name, (getter)xmlparse_##name##_getter, \
    1469             :             (setter)xmlparse_##name##_setter, NULL},
    1470             : 
    1471             : static PyGetSetDef xmlparse_getsetlist[] = {
    1472             :     XMLPARSE_GETTER_DEF(ErrorCode)
    1473             :     XMLPARSE_GETTER_DEF(ErrorLineNumber)
    1474             :     XMLPARSE_GETTER_DEF(ErrorColumnNumber)
    1475             :     XMLPARSE_GETTER_DEF(ErrorByteIndex)
    1476             :     XMLPARSE_GETTER_DEF(CurrentLineNumber)
    1477             :     XMLPARSE_GETTER_DEF(CurrentColumnNumber)
    1478             :     XMLPARSE_GETTER_DEF(CurrentByteIndex)
    1479             :     XMLPARSE_GETTER_SETTER_DEF(buffer_size)
    1480             :     XMLPARSE_GETTER_SETTER_DEF(buffer_text)
    1481             :     XMLPARSE_GETTER_DEF(buffer_used)
    1482             :     XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
    1483             :     XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
    1484             :     XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
    1485             :     {NULL},
    1486             : };
    1487             : 
    1488             : #undef XMLPARSE_GETTER_DEF
    1489             : #undef XMLPARSE_GETTER_SETTER_DEF
    1490             : 
    1491             : PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
    1492             : 
    1493             : static PyType_Slot _xml_parse_type_spec_slots[] = {
    1494             :     {Py_tp_dealloc, xmlparse_dealloc},
    1495             :     {Py_tp_doc, (void *)Xmlparsetype__doc__},
    1496             :     {Py_tp_traverse, xmlparse_traverse},
    1497             :     {Py_tp_clear, xmlparse_clear},
    1498             :     {Py_tp_methods, xmlparse_methods},
    1499             :     {Py_tp_members, xmlparse_members},
    1500             :     {Py_tp_getset, xmlparse_getsetlist},
    1501             :     {0, 0}
    1502             : };
    1503             : 
    1504             : static PyType_Spec _xml_parse_type_spec = {
    1505             :     .name = "pyexpat.xmlparser",
    1506             :     .basicsize = sizeof(xmlparseobject),
    1507             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
    1508             :               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
    1509             :     .slots = _xml_parse_type_spec_slots,
    1510             : };
    1511             : 
    1512             : /* End of code for xmlparser objects */
    1513             : /* -------------------------------------------------------- */
    1514             : 
    1515             : /*[clinic input]
    1516             : pyexpat.ParserCreate
    1517             : 
    1518             :     encoding: str(accept={str, NoneType}) = None
    1519             :     namespace_separator: str(accept={str, NoneType}) = None
    1520             :     intern: object = NULL
    1521             : 
    1522             : Return a new XML parser object.
    1523             : [clinic start generated code]*/
    1524             : 
    1525             : static PyObject *
    1526        1387 : pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
    1527             :                           const char *namespace_separator, PyObject *intern)
    1528             : /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
    1529             : {
    1530             :     PyObject *result;
    1531        1387 :     int intern_decref = 0;
    1532             : 
    1533        1387 :     if (namespace_separator != NULL
    1534         622 :         && strlen(namespace_separator) > 1) {
    1535           1 :         PyErr_SetString(PyExc_ValueError,
    1536             :                         "namespace_separator must be at most one"
    1537             :                         " character, omitted, or None");
    1538           1 :         return NULL;
    1539             :     }
    1540             :     /* Explicitly passing None means no interning is desired.
    1541             :        Not passing anything means that a new dictionary is used. */
    1542        1386 :     if (intern == Py_None)
    1543         243 :         intern = NULL;
    1544        1143 :     else if (intern == NULL) {
    1545        1143 :         intern = PyDict_New();
    1546        1143 :         if (!intern)
    1547           0 :             return NULL;
    1548        1143 :         intern_decref = 1;
    1549             :     }
    1550           0 :     else if (!PyDict_Check(intern)) {
    1551           0 :         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
    1552           0 :         return NULL;
    1553             :     }
    1554             : 
    1555        1386 :     pyexpat_state *state = pyexpat_get_state(module);
    1556        1386 :     result = newxmlparseobject(state, encoding, namespace_separator, intern);
    1557        1386 :     if (intern_decref) {
    1558        1143 :         Py_DECREF(intern);
    1559             :     }
    1560        1386 :     return result;
    1561             : }
    1562             : 
    1563             : /*[clinic input]
    1564             : pyexpat.ErrorString
    1565             : 
    1566             :     code: long
    1567             :     /
    1568             : 
    1569             : Returns string error for given number.
    1570             : [clinic start generated code]*/
    1571             : 
    1572             : static PyObject *
    1573           8 : pyexpat_ErrorString_impl(PyObject *module, long code)
    1574             : /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
    1575             : {
    1576           8 :     return Py_BuildValue("z", XML_ErrorString((int)code));
    1577             : }
    1578             : 
    1579             : /* List of methods defined in the module */
    1580             : 
    1581             : static struct PyMethodDef pyexpat_methods[] = {
    1582             :     PYEXPAT_PARSERCREATE_METHODDEF
    1583             :     PYEXPAT_ERRORSTRING_METHODDEF
    1584             :     {NULL, NULL}  /* sentinel */
    1585             : };
    1586             : 
    1587             : /* Module docstring */
    1588             : 
    1589             : PyDoc_STRVAR(pyexpat_module_documentation,
    1590             : "Python wrapper for Expat parser.");
    1591             : 
    1592             : /* Initialization function for the module */
    1593             : 
    1594             : #ifndef MODULE_NAME
    1595             : #define MODULE_NAME "pyexpat"
    1596             : #endif
    1597             : 
    1598          38 : static int init_handler_descrs(pyexpat_state *state)
    1599             : {
    1600             :     int i;
    1601          38 :     assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
    1602         874 :     for (i = 0; handler_info[i].name != NULL; i++) {
    1603         836 :         struct HandlerInfo *hi = &handler_info[i];
    1604         836 :         hi->getset.name = hi->name;
    1605         836 :         hi->getset.get = (getter)xmlparse_handler_getter;
    1606         836 :         hi->getset.set = (setter)xmlparse_handler_setter;
    1607         836 :         hi->getset.closure = &handler_info[i];
    1608             : 
    1609         836 :         PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
    1610         836 :         if (descr == NULL)
    1611           0 :             return -1;
    1612             : 
    1613         836 :         if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
    1614           0 :             Py_DECREF(descr);
    1615           0 :             return -1;
    1616             :         }
    1617         836 :         Py_DECREF(descr);
    1618             :     }
    1619          38 :     return 0;
    1620             : }
    1621             : 
    1622             : static PyObject *
    1623          76 : add_submodule(PyObject *mod, const char *fullname)
    1624             : {
    1625          76 :     const char *name = strrchr(fullname, '.') + 1;
    1626             : 
    1627          76 :     PyObject *submodule = PyModule_New(fullname);
    1628          76 :     if (submodule == NULL) {
    1629           0 :         return NULL;
    1630             :     }
    1631             : 
    1632          76 :     PyObject *mod_name = PyUnicode_FromString(fullname);
    1633          76 :     if (mod_name == NULL) {
    1634           0 :         Py_DECREF(submodule);
    1635           0 :         return NULL;
    1636             :     }
    1637             : 
    1638          76 :     if (_PyImport_SetModule(mod_name, submodule) < 0) {
    1639           0 :         Py_DECREF(submodule);
    1640           0 :         Py_DECREF(mod_name);
    1641           0 :         return NULL;
    1642             :     }
    1643          76 :     Py_DECREF(mod_name);
    1644             : 
    1645             :     /* gives away the reference to the submodule */
    1646          76 :     if (PyModule_AddObject(mod, name, submodule) < 0) {
    1647           0 :         Py_DECREF(submodule);
    1648           0 :         return NULL;
    1649             :     }
    1650             : 
    1651          76 :     return submodule;
    1652             : }
    1653             : 
    1654             : struct ErrorInfo {
    1655             :     const char * name;  /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
    1656             :     const char * description;  /* Error description as returned by XML_ErrorString(<int>) */
    1657             : };
    1658             : 
    1659             : static
    1660             : struct ErrorInfo error_info_of[] = {
    1661             :     {NULL, NULL},  /* XML_ERROR_NONE (value 0) is not exposed */
    1662             : 
    1663             :     {"XML_ERROR_NO_MEMORY", "out of memory"},
    1664             :     {"XML_ERROR_SYNTAX", "syntax error"},
    1665             :     {"XML_ERROR_NO_ELEMENTS", "no element found"},
    1666             :     {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
    1667             :     {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
    1668             :     {"XML_ERROR_PARTIAL_CHAR", "partial character"},
    1669             :     {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
    1670             :     {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
    1671             :     {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
    1672             :     {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
    1673             :     {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
    1674             :     {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
    1675             :     {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
    1676             :     {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
    1677             :     {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
    1678             :     {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
    1679             :     {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
    1680             :     {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
    1681             :     {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
    1682             :     {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
    1683             :     {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
    1684             :     {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
    1685             :     {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
    1686             :     {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
    1687             :     {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
    1688             :     {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
    1689             : 
    1690             :     /* Added in Expat 1.95.7. */
    1691             :     {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
    1692             : 
    1693             :     /* Added in Expat 1.95.8. */
    1694             :     {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
    1695             :     {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
    1696             :     {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
    1697             :     {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
    1698             :     {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
    1699             :     {"XML_ERROR_SUSPENDED", "parser suspended"},
    1700             :     {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
    1701             :     {"XML_ERROR_ABORTED", "parsing aborted"},
    1702             :     {"XML_ERROR_FINISHED", "parsing finished"},
    1703             :     {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
    1704             : 
    1705             :     /* Added in 2.0.0. */
    1706             :     {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
    1707             :     {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
    1708             :     {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
    1709             : 
    1710             :     /* Added in 2.2.1. */
    1711             :     {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
    1712             : 
    1713             :     /* Added in 2.3.0. */
    1714             :     {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
    1715             : 
    1716             :     /* Added in 2.4.0. */
    1717             :     {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
    1718             : };
    1719             : 
    1720             : static int
    1721        1634 : add_error(PyObject *errors_module, PyObject *codes_dict,
    1722             :           PyObject *rev_codes_dict, size_t error_index)
    1723             : {
    1724        1634 :     const char * const name = error_info_of[error_index].name;
    1725        1634 :     const int error_code = (int)error_index;
    1726             : 
    1727             :     /* NOTE: This keeps the source of truth regarding error
    1728             :      *       messages with libexpat and (by definiton) in bulletproof sync
    1729             :      *       with the other uses of the XML_ErrorString function
    1730             :      *       elsewhere within this file.  pyexpat's copy of the messages
    1731             :      *       only acts as a fallback in case of outdated runtime libexpat,
    1732             :      *       where it returns NULL. */
    1733        1634 :     const char *error_string = XML_ErrorString(error_code);
    1734        1634 :     if (error_string == NULL) {
    1735           0 :         error_string = error_info_of[error_index].description;
    1736             :     }
    1737             : 
    1738        1634 :     if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
    1739           0 :         return -1;
    1740             :     }
    1741             : 
    1742        1634 :     PyObject *num = PyLong_FromLong(error_code);
    1743        1634 :     if (num == NULL) {
    1744           0 :         return -1;
    1745             :     }
    1746             : 
    1747        1634 :     if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
    1748           0 :         Py_DECREF(num);
    1749           0 :         return -1;
    1750             :     }
    1751             : 
    1752        1634 :     PyObject *str = PyUnicode_FromString(error_string);
    1753        1634 :     if (str == NULL) {
    1754           0 :         Py_DECREF(num);
    1755           0 :         return -1;
    1756             :     }
    1757             : 
    1758        1634 :     int res = PyDict_SetItem(rev_codes_dict, num, str);
    1759        1634 :     Py_DECREF(str);
    1760        1634 :     Py_DECREF(num);
    1761        1634 :     if (res < 0) {
    1762           0 :         return -1;
    1763             :     }
    1764             : 
    1765        1634 :     return 0;
    1766             : }
    1767             : 
    1768             : static int
    1769          38 : add_errors_module(PyObject *mod)
    1770             : {
    1771          38 :     PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
    1772          38 :     if (errors_module == NULL) {
    1773           0 :         return -1;
    1774             :     }
    1775             : 
    1776          38 :     PyObject *codes_dict = PyDict_New();
    1777          38 :     PyObject *rev_codes_dict = PyDict_New();
    1778          38 :     if (codes_dict == NULL || rev_codes_dict == NULL) {
    1779           0 :         goto error;
    1780             :     }
    1781             : 
    1782          38 :     size_t error_index = 0;
    1783        1710 :     for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
    1784        1672 :         if (error_info_of[error_index].name == NULL) {
    1785          38 :             continue;
    1786             :         }
    1787             : 
    1788        1634 :         if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
    1789           0 :             goto error;
    1790             :         }
    1791             :     }
    1792             : 
    1793          38 :     if (PyModule_AddStringConstant(errors_module, "__doc__",
    1794             :                                    "Constants used to describe "
    1795             :                                    "error conditions.") < 0) {
    1796           0 :         goto error;
    1797             :     }
    1798             : 
    1799          38 :     Py_INCREF(codes_dict);
    1800          38 :     if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
    1801           0 :         Py_DECREF(codes_dict);
    1802           0 :         goto error;
    1803             :     }
    1804          38 :     Py_CLEAR(codes_dict);
    1805             : 
    1806          38 :     Py_INCREF(rev_codes_dict);
    1807          38 :     if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
    1808           0 :         Py_DECREF(rev_codes_dict);
    1809           0 :         goto error;
    1810             :     }
    1811          38 :     Py_CLEAR(rev_codes_dict);
    1812             : 
    1813          38 :     return 0;
    1814             : 
    1815           0 : error:
    1816           0 :     Py_XDECREF(codes_dict);
    1817           0 :     Py_XDECREF(rev_codes_dict);
    1818           0 :     return -1;
    1819             : }
    1820             : 
    1821             : static int
    1822          38 : add_model_module(PyObject *mod)
    1823             : {
    1824          38 :     PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
    1825          38 :     if (model_module == NULL) {
    1826           0 :         return -1;
    1827             :     }
    1828             : 
    1829             : #define MYCONST(c)  do {                                        \
    1830             :         if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
    1831             :             return -1;                                          \
    1832             :         }                                                       \
    1833             :     } while(0)
    1834             : 
    1835          38 :     if (PyModule_AddStringConstant(
    1836             :         model_module, "__doc__",
    1837             :         "Constants used to interpret content model information.") < 0) {
    1838           0 :         return -1;
    1839             :     }
    1840             : 
    1841          38 :     MYCONST(XML_CTYPE_EMPTY);
    1842          38 :     MYCONST(XML_CTYPE_ANY);
    1843          38 :     MYCONST(XML_CTYPE_MIXED);
    1844          38 :     MYCONST(XML_CTYPE_NAME);
    1845          38 :     MYCONST(XML_CTYPE_CHOICE);
    1846          38 :     MYCONST(XML_CTYPE_SEQ);
    1847             : 
    1848          38 :     MYCONST(XML_CQUANT_NONE);
    1849          38 :     MYCONST(XML_CQUANT_OPT);
    1850          38 :     MYCONST(XML_CQUANT_REP);
    1851          38 :     MYCONST(XML_CQUANT_PLUS);
    1852             : #undef MYCONST
    1853          38 :     return 0;
    1854             : }
    1855             : 
    1856             : #if XML_COMBINED_VERSION > 19505
    1857             : static int
    1858          38 : add_features(PyObject *mod)
    1859             : {
    1860          38 :     PyObject *list = PyList_New(0);
    1861          38 :     if (list == NULL) {
    1862           0 :         return -1;
    1863             :     }
    1864             : 
    1865          38 :     const XML_Feature *features = XML_GetFeatureList();
    1866         304 :     for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
    1867         266 :         PyObject *item = Py_BuildValue("si", features[i].name,
    1868         266 :                                        features[i].value);
    1869         266 :         if (item == NULL) {
    1870           0 :             goto error;
    1871             :         }
    1872         266 :         int ok = PyList_Append(list, item);
    1873         266 :         Py_DECREF(item);
    1874         266 :         if (ok < 0) {
    1875           0 :             goto error;
    1876             :         }
    1877             :     }
    1878          38 :     if (PyModule_AddObject(mod, "features", list) < 0) {
    1879           0 :         goto error;
    1880             :     }
    1881          38 :     return 0;
    1882             : 
    1883           0 : error:
    1884           0 :     Py_DECREF(list);
    1885           0 :     return -1;
    1886             : }
    1887             : #endif
    1888             : 
    1889             : static void
    1890          38 : pyexpat_destructor(PyObject *op)
    1891             : {
    1892          38 :     void *p = PyCapsule_GetPointer(op, PyExpat_CAPSULE_NAME);
    1893          38 :     PyMem_Free(p);
    1894          38 : }
    1895             : 
    1896             : static int
    1897          38 : pyexpat_exec(PyObject *mod)
    1898             : {
    1899          38 :     pyexpat_state *state = pyexpat_get_state(mod);
    1900          38 :     state->str_read = PyUnicode_InternFromString("read");
    1901          38 :     if (state->str_read == NULL) {
    1902           0 :         return -1;
    1903             :     }
    1904          38 :     state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
    1905             :         mod, &_xml_parse_type_spec, NULL);
    1906             : 
    1907          38 :     if (state->xml_parse_type == NULL) {
    1908           0 :         return -1;
    1909             :     }
    1910             : 
    1911          38 :     if (init_handler_descrs(state) < 0) {
    1912           0 :         return -1;
    1913             :     }
    1914          38 :     state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
    1915             :                                       NULL, NULL);
    1916          38 :     if (state->error == NULL) {
    1917           0 :         return -1;
    1918             :     }
    1919             : 
    1920             :     /* Add some symbolic constants to the module */
    1921             : 
    1922          38 :     if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
    1923           0 :         return -1;
    1924             :     }
    1925             : 
    1926          38 :     if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
    1927           0 :         return -1;
    1928             :     }
    1929             : 
    1930          38 :     if (PyModule_AddObjectRef(mod, "XMLParserType",
    1931          38 :                            (PyObject *) state->xml_parse_type) < 0) {
    1932           0 :         return -1;
    1933             :     }
    1934             : 
    1935          38 :     if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
    1936          38 :                                    XML_ExpatVersion()) < 0) {
    1937           0 :         return -1;
    1938             :     }
    1939             :     {
    1940          38 :         XML_Expat_Version info = XML_ExpatVersionInfo();
    1941          38 :         PyObject *versionInfo = Py_BuildValue("(iii)",
    1942             :                                               info.major,
    1943             :                                               info.minor,
    1944             :                                               info.micro);
    1945          38 :         if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
    1946           0 :             Py_DECREF(versionInfo);
    1947           0 :             return -1;
    1948             :         }
    1949             :     }
    1950             :     /* XXX When Expat supports some way of figuring out how it was
    1951             :        compiled, this should check and set native_encoding
    1952             :        appropriately.
    1953             :     */
    1954          38 :     if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
    1955           0 :         return -1;
    1956             :     }
    1957             : 
    1958          38 :     if (add_errors_module(mod) < 0) {
    1959           0 :         return -1;
    1960             :     }
    1961             : 
    1962          38 :     if (add_model_module(mod) < 0) {
    1963           0 :         return -1;
    1964             :     }
    1965             : 
    1966             : #if XML_COMBINED_VERSION > 19505
    1967          38 :     if (add_features(mod) < 0) {
    1968           0 :         return -1;
    1969             :     }
    1970             : #endif
    1971             : 
    1972             : #define MYCONST(c) do {                                 \
    1973             :         if (PyModule_AddIntConstant(mod, #c, c) < 0) {  \
    1974             :             return -1;                                  \
    1975             :         }                                               \
    1976             :     } while(0)
    1977             : 
    1978          38 :     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
    1979          38 :     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
    1980          38 :     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
    1981             : #undef MYCONST
    1982             : 
    1983          38 :     struct PyExpat_CAPI *capi = PyMem_Calloc(1, sizeof(struct PyExpat_CAPI));
    1984          38 :     if (capi == NULL) {
    1985           0 :         PyErr_NoMemory();
    1986           0 :         return -1;
    1987             :     }
    1988             :     /* initialize pyexpat dispatch table */
    1989          38 :     capi->size = sizeof(*capi);
    1990          38 :     capi->magic = PyExpat_CAPI_MAGIC;
    1991          38 :     capi->MAJOR_VERSION = XML_MAJOR_VERSION;
    1992          38 :     capi->MINOR_VERSION = XML_MINOR_VERSION;
    1993          38 :     capi->MICRO_VERSION = XML_MICRO_VERSION;
    1994          38 :     capi->ErrorString = XML_ErrorString;
    1995          38 :     capi->GetErrorCode = XML_GetErrorCode;
    1996          38 :     capi->GetErrorColumnNumber = XML_GetErrorColumnNumber;
    1997          38 :     capi->GetErrorLineNumber = XML_GetErrorLineNumber;
    1998          38 :     capi->Parse = XML_Parse;
    1999          38 :     capi->ParserCreate_MM = XML_ParserCreate_MM;
    2000          38 :     capi->ParserFree = XML_ParserFree;
    2001          38 :     capi->SetCharacterDataHandler = XML_SetCharacterDataHandler;
    2002          38 :     capi->SetCommentHandler = XML_SetCommentHandler;
    2003          38 :     capi->SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
    2004          38 :     capi->SetElementHandler = XML_SetElementHandler;
    2005          38 :     capi->SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
    2006          38 :     capi->SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
    2007          38 :     capi->SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
    2008          38 :     capi->SetUserData = XML_SetUserData;
    2009          38 :     capi->SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
    2010          38 :     capi->SetEncoding = XML_SetEncoding;
    2011          38 :     capi->DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
    2012             : #if XML_COMBINED_VERSION >= 20100
    2013          38 :     capi->SetHashSalt = XML_SetHashSalt;
    2014             : #else
    2015             :     capi->SetHashSalt = NULL;
    2016             : #endif
    2017             : 
    2018             :     /* export using capsule */
    2019          38 :     PyObject *capi_object = PyCapsule_New(capi, PyExpat_CAPSULE_NAME,
    2020             :                                           pyexpat_destructor);
    2021          38 :     if (capi_object == NULL) {
    2022           0 :         PyMem_Free(capi);
    2023           0 :         return -1;
    2024             :     }
    2025             : 
    2026          38 :     if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
    2027           0 :         Py_DECREF(capi_object);
    2028           0 :         return -1;
    2029             :     }
    2030             : 
    2031          38 :     return 0;
    2032             : }
    2033             : 
    2034             : static int
    2035        1740 : pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
    2036             : {
    2037        1740 :     pyexpat_state *state = pyexpat_get_state(module);
    2038        1740 :     Py_VISIT(state->xml_parse_type);
    2039        1740 :     Py_VISIT(state->error);
    2040        1740 :     Py_VISIT(state->str_read);
    2041        1740 :     return 0;
    2042             : }
    2043             : 
    2044             : static int
    2045          63 : pyexpat_clear(PyObject *module)
    2046             : {
    2047          63 :     pyexpat_state *state = pyexpat_get_state(module);
    2048          63 :     Py_CLEAR(state->xml_parse_type);
    2049          63 :     Py_CLEAR(state->error);
    2050          63 :     Py_CLEAR(state->str_read);
    2051          63 :     return 0;
    2052             : }
    2053             : 
    2054             : static void
    2055          38 : pyexpat_free(void *module)
    2056             : {
    2057          38 :     pyexpat_clear((PyObject *)module);
    2058          38 : }
    2059             : 
    2060             : static PyModuleDef_Slot pyexpat_slots[] = {
    2061             :     {Py_mod_exec, pyexpat_exec},
    2062             :     {0, NULL}
    2063             : };
    2064             : 
    2065             : static struct PyModuleDef pyexpatmodule = {
    2066             :     PyModuleDef_HEAD_INIT,
    2067             :     .m_name = MODULE_NAME,
    2068             :     .m_doc = pyexpat_module_documentation,
    2069             :     .m_size = sizeof(pyexpat_state),
    2070             :     .m_methods = pyexpat_methods,
    2071             :     .m_slots = pyexpat_slots,
    2072             :     .m_traverse = pyexpat_traverse,
    2073             :     .m_clear = pyexpat_clear,
    2074             :     .m_free = pyexpat_free
    2075             : };
    2076             : 
    2077             : PyMODINIT_FUNC
    2078          38 : PyInit_pyexpat(void)
    2079             : {
    2080          38 :     return PyModuleDef_Init(&pyexpatmodule);
    2081             : }
    2082             : 
    2083             : static void
    2084        2807 : clear_handlers(xmlparseobject *self, int initial)
    2085             : {
    2086        2807 :     int i = 0;
    2087             : 
    2088       64561 :     for (; handler_info[i].name != NULL; i++) {
    2089       61754 :         if (initial)
    2090       30536 :             self->handlers[i] = NULL;
    2091             :         else {
    2092       31218 :             Py_CLEAR(self->handlers[i]);
    2093       31218 :             handler_info[i].setter(self->itself, NULL);
    2094             :         }
    2095             :     }
    2096        2807 : }
    2097             : 
    2098             : static struct HandlerInfo handler_info[] = {
    2099             : 
    2100             : #define HANDLER_INFO(name) \
    2101             :     {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
    2102             : 
    2103             :     HANDLER_INFO(StartElementHandler)
    2104             :     HANDLER_INFO(EndElementHandler)
    2105             :     HANDLER_INFO(ProcessingInstructionHandler)
    2106             :     HANDLER_INFO(CharacterDataHandler)
    2107             :     HANDLER_INFO(UnparsedEntityDeclHandler)
    2108             :     HANDLER_INFO(NotationDeclHandler)
    2109             :     HANDLER_INFO(StartNamespaceDeclHandler)
    2110             :     HANDLER_INFO(EndNamespaceDeclHandler)
    2111             :     HANDLER_INFO(CommentHandler)
    2112             :     HANDLER_INFO(StartCdataSectionHandler)
    2113             :     HANDLER_INFO(EndCdataSectionHandler)
    2114             :     HANDLER_INFO(DefaultHandler)
    2115             :     HANDLER_INFO(DefaultHandlerExpand)
    2116             :     HANDLER_INFO(NotStandaloneHandler)
    2117             :     HANDLER_INFO(ExternalEntityRefHandler)
    2118             :     HANDLER_INFO(StartDoctypeDeclHandler)
    2119             :     HANDLER_INFO(EndDoctypeDeclHandler)
    2120             :     HANDLER_INFO(EntityDeclHandler)
    2121             :     HANDLER_INFO(XmlDeclHandler)
    2122             :     HANDLER_INFO(ElementDeclHandler)
    2123             :     HANDLER_INFO(AttlistDeclHandler)
    2124             : #if XML_COMBINED_VERSION >= 19504
    2125             :     HANDLER_INFO(SkippedEntityHandler)
    2126             : #endif
    2127             : 
    2128             : #undef HANDLER_INFO
    2129             : 
    2130             :     {NULL, NULL, NULL} /* sentinel */
    2131             : };

Generated by: LCOV version 1.14