LCOV - code coverage report
Current view: top level - Modules/cjkcodecs - multibytecodec.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 695 936 74.3 %
Date: 2022-07-07 18:19:46 Functions: 54 58 93.1 %

          Line data    Source code
       1             : /*
       2             :  * multibytecodec.c: Common Multibyte Codec Implementation
       3             :  *
       4             :  * Written by Hye-Shik Chang <perky@FreeBSD.org>
       5             :  */
       6             : 
       7             : #define PY_SSIZE_T_CLEAN
       8             : #include "Python.h"
       9             : #include "structmember.h"         // PyMemberDef
      10             : #include "multibytecodec.h"
      11             : #include "clinic/multibytecodec.c.h"
      12             : 
      13             : #define MODULE_NAME "_multibytecodec"
      14             : 
      15             : typedef struct {
      16             :     PyTypeObject *encoder_type;
      17             :     PyTypeObject *decoder_type;
      18             :     PyTypeObject *reader_type;
      19             :     PyTypeObject *writer_type;
      20             :     PyTypeObject *multibytecodec_type;
      21             :     PyObject *str_write;
      22             : } _multibytecodec_state;
      23             : 
      24             : static _multibytecodec_state *
      25        8025 : _multibytecodec_get_state(PyObject *module)
      26             : {
      27        8025 :     _multibytecodec_state *state = PyModule_GetState(module);
      28        8025 :     assert(state != NULL);
      29        8025 :     return state;
      30             : }
      31             : 
      32             : static struct PyModuleDef _multibytecodecmodule;
      33             : static _multibytecodec_state *
      34        7010 : _multibyte_codec_find_state_by_type(PyTypeObject *type)
      35             : {
      36        7010 :     PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
      37        7010 :     assert(module != NULL);
      38        7010 :     return _multibytecodec_get_state(module);
      39             : }
      40             : 
      41             : #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
      42             : /*[clinic input]
      43             : module _multibytecodec
      44             : class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
      45             : class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
      46             : class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
      47             : class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
      48             : class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
      49             : [clinic start generated code]*/
      50             : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
      51             : #undef clinic_get_state
      52             : 
      53             : typedef struct {
      54             :     PyObject            *inobj;
      55             :     Py_ssize_t          inpos, inlen;
      56             :     unsigned char       *outbuf, *outbuf_end;
      57             :     PyObject            *excobj, *outobj;
      58             : } MultibyteEncodeBuffer;
      59             : 
      60             : typedef struct {
      61             :     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
      62             :     PyObject            *excobj;
      63             :     _PyUnicodeWriter    writer;
      64             : } MultibyteDecodeBuffer;
      65             : 
      66             : static char *incnewkwarglist[] = {"errors", NULL};
      67             : static char *streamkwarglist[] = {"stream", "errors", NULL};
      68             : 
      69             : static PyObject *multibytecodec_encode(MultibyteCodec *,
      70             :                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
      71             :                 PyObject *, int);
      72             : 
      73             : #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
      74             : 
      75             : static PyObject *
      76     1420770 : make_tuple(PyObject *object, Py_ssize_t len)
      77             : {
      78             :     PyObject *v, *w;
      79             : 
      80     1420770 :     if (object == NULL)
      81           0 :         return NULL;
      82             : 
      83     1420770 :     v = PyTuple_New(2);
      84     1420770 :     if (v == NULL) {
      85           0 :         Py_DECREF(object);
      86           0 :         return NULL;
      87             :     }
      88     1420770 :     PyTuple_SET_ITEM(v, 0, object);
      89             : 
      90     1420770 :     w = PyLong_FromSsize_t(len);
      91     1420770 :     if (w == NULL) {
      92           0 :         Py_DECREF(v);
      93           0 :         return NULL;
      94             :     }
      95     1420770 :     PyTuple_SET_ITEM(v, 1, w);
      96             : 
      97     1420770 :     return v;
      98             : }
      99             : 
     100             : static PyObject *
     101     1428020 : internal_error_callback(const char *errors)
     102             : {
     103     1428020 :     if (errors == NULL || strcmp(errors, "strict") == 0)
     104      378875 :         return ERROR_STRICT;
     105     1049150 :     else if (strcmp(errors, "ignore") == 0)
     106     1048730 :         return ERROR_IGNORE;
     107         422 :     else if (strcmp(errors, "replace") == 0)
     108         132 :         return ERROR_REPLACE;
     109             :     else
     110         290 :         return PyUnicode_FromString(errors);
     111             : }
     112             : 
     113             : static PyObject *
     114        2841 : call_error_callback(PyObject *errors, PyObject *exc)
     115             : {
     116             :     PyObject *cb, *r;
     117             :     const char *str;
     118             : 
     119        2841 :     assert(PyUnicode_Check(errors));
     120        2841 :     str = PyUnicode_AsUTF8(errors);
     121        2841 :     if (str == NULL)
     122           0 :         return NULL;
     123        2841 :     cb = PyCodec_LookupError(str);
     124        2841 :     if (cb == NULL)
     125           0 :         return NULL;
     126             : 
     127        2841 :     r = PyObject_CallOneArg(cb, exc);
     128        2841 :     Py_DECREF(cb);
     129        2841 :     return r;
     130             : }
     131             : 
     132             : static PyObject *
     133           0 : codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
     134             : {
     135             :     const char *errors;
     136             : 
     137           0 :     if (self->errors == ERROR_STRICT)
     138           0 :         errors = "strict";
     139           0 :     else if (self->errors == ERROR_IGNORE)
     140           0 :         errors = "ignore";
     141           0 :     else if (self->errors == ERROR_REPLACE)
     142           0 :         errors = "replace";
     143             :     else {
     144           0 :         Py_INCREF(self->errors);
     145           0 :         return self->errors;
     146             :     }
     147             : 
     148           0 :     return PyUnicode_FromString(errors);
     149             : }
     150             : 
     151             : static int
     152          76 : codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
     153             :                     void *closure)
     154             : {
     155             :     PyObject *cb;
     156             :     const char *str;
     157             : 
     158          76 :     if (value == NULL) {
     159          19 :         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
     160          19 :         return -1;
     161             :     }
     162          57 :     if (!PyUnicode_Check(value)) {
     163           0 :         PyErr_SetString(PyExc_TypeError, "errors must be a string");
     164           0 :         return -1;
     165             :     }
     166             : 
     167          57 :     str = PyUnicode_AsUTF8(value);
     168          57 :     if (str == NULL)
     169           0 :         return -1;
     170             : 
     171          57 :     cb = internal_error_callback(str);
     172          57 :     if (cb == NULL)
     173           0 :         return -1;
     174             : 
     175          57 :     ERROR_DECREF(self->errors);
     176          57 :     self->errors = cb;
     177          57 :     return 0;
     178             : }
     179             : 
     180             : /* This getset handlers list is used by all the stateful codec objects */
     181             : static PyGetSetDef codecctx_getsets[] = {
     182             :     {"errors",          (getter)codecctx_errors_get,
     183             :                     (setter)codecctx_errors_set,
     184             :                     PyDoc_STR("how to treat errors")},
     185             :     {NULL,}
     186             : };
     187             : 
     188             : static int
     189         279 : expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
     190             : {
     191             :     Py_ssize_t orgpos, orgsize, incsize;
     192             : 
     193         558 :     orgpos = (Py_ssize_t)((char *)buf->outbuf -
     194         279 :                             PyBytes_AS_STRING(buf->outobj));
     195         279 :     orgsize = PyBytes_GET_SIZE(buf->outobj);
     196         279 :     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
     197             : 
     198         279 :     if (orgsize > PY_SSIZE_T_MAX - incsize) {
     199           0 :         PyErr_NoMemory();
     200           0 :         return -1;
     201             :     }
     202             : 
     203         279 :     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
     204           0 :         return -1;
     205             : 
     206         279 :     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
     207         279 :     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
     208         279 :         + PyBytes_GET_SIZE(buf->outobj);
     209             : 
     210         279 :     return 0;
     211             : }
     212             : #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
     213             :     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
     214             :         if (expand_encodebuffer(buf, s) == -1)                          \
     215             :             goto errorexit;                                             \
     216             : } while(0)
     217             : 
     218             : 
     219             : /**
     220             :  * MultibyteCodec object
     221             :  */
     222             : 
     223             : static int
     224     1051720 : multibytecodec_encerror(MultibyteCodec *codec,
     225             :                         MultibyteCodec_State *state,
     226             :                         MultibyteEncodeBuffer *buf,
     227             :                         PyObject *errors, Py_ssize_t e)
     228             : {
     229     1051720 :     PyObject *retobj = NULL, *retstr = NULL, *tobj;
     230             :     Py_ssize_t retstrsize, newpos;
     231             :     Py_ssize_t esize, start, end;
     232             :     const char *reason;
     233             : 
     234     1051720 :     if (e > 0) {
     235     1051480 :         reason = "illegal multibyte sequence";
     236     1051480 :         esize = e;
     237             :     }
     238             :     else {
     239         246 :         switch (e) {
     240         246 :         case MBERR_TOOSMALL:
     241         246 :             REQUIRE_ENCODEBUFFER(buf, -1);
     242         246 :             return 0; /* retry it */
     243           0 :         case MBERR_TOOFEW:
     244           0 :             reason = "incomplete multibyte sequence";
     245           0 :             esize = (Py_ssize_t)buf->inpos;
     246           0 :             break;
     247           0 :         case MBERR_INTERNAL:
     248           0 :             PyErr_SetString(PyExc_RuntimeError,
     249             :                             "internal codec error");
     250           0 :             return -1;
     251           0 :         default:
     252           0 :             PyErr_SetString(PyExc_RuntimeError,
     253             :                             "unknown runtime error");
     254           0 :             return -1;
     255             :         }
     256             :     }
     257             : 
     258     1051480 :     if (errors == ERROR_REPLACE) {
     259             :         PyObject *replchar;
     260             :         Py_ssize_t r;
     261             :         Py_ssize_t inpos;
     262             :         int kind;
     263             :         const void *data;
     264             : 
     265           0 :         replchar = PyUnicode_FromOrdinal('?');
     266           0 :         if (replchar == NULL)
     267           0 :             goto errorexit;
     268           0 :         kind = PyUnicode_KIND(replchar);
     269           0 :         data = PyUnicode_DATA(replchar);
     270             : 
     271           0 :         inpos = 0;
     272           0 :         for (;;) {
     273           0 :             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
     274             : 
     275           0 :             r = codec->encode(state, codec->config,
     276             :                               kind, data, &inpos, 1,
     277             :                               &buf->outbuf, outleft, 0);
     278           0 :             if (r == MBERR_TOOSMALL) {
     279           0 :                 REQUIRE_ENCODEBUFFER(buf, -1);
     280           0 :                 continue;
     281             :             }
     282             :             else
     283           0 :                 break;
     284             :         }
     285             : 
     286           0 :         Py_DECREF(replchar);
     287             : 
     288           0 :         if (r != 0) {
     289           0 :             REQUIRE_ENCODEBUFFER(buf, 1);
     290           0 :             *buf->outbuf++ = '?';
     291             :         }
     292             :     }
     293     1051480 :     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
     294     1048610 :         buf->inpos += esize;
     295     1048610 :         return 0;
     296             :     }
     297             : 
     298        2863 :     start = (Py_ssize_t)buf->inpos;
     299        2863 :     end = start + esize;
     300             : 
     301             :     /* use cached exception object if available */
     302        2863 :     if (buf->excobj == NULL) {
     303         312 :         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
     304             :                                              "sOnns",
     305             :                                              codec->encoding, buf->inobj,
     306             :                                              start, end, reason);
     307         312 :         if (buf->excobj == NULL)
     308           0 :             goto errorexit;
     309             :     }
     310             :     else
     311        5102 :         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
     312        5102 :             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
     313        2551 :             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
     314           0 :             goto errorexit;
     315             : 
     316        2863 :     if (errors == ERROR_STRICT) {
     317          23 :         PyCodec_StrictErrors(buf->excobj);
     318          23 :         goto errorexit;
     319             :     }
     320             : 
     321        2840 :     retobj = call_error_callback(errors, buf->excobj);
     322        2840 :     if (retobj == NULL)
     323           0 :         goto errorexit;
     324             : 
     325        5680 :     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
     326        5699 :         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
     327        2764 :         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
     328          95 :         PyErr_SetString(PyExc_TypeError,
     329             :                         "encoding error handler must return "
     330             :                         "(str, int) tuple");
     331          95 :         goto errorexit;
     332             :     }
     333             : 
     334        2745 :     if (PyUnicode_Check(tobj)) {
     335             :         Py_ssize_t inpos;
     336             : 
     337        2726 :         retstr = multibytecodec_encode(codec, state, tobj,
     338             :                         &inpos, ERROR_STRICT,
     339             :                         MBENC_FLUSH);
     340        2726 :         if (retstr == NULL)
     341           0 :             goto errorexit;
     342             :     }
     343             :     else {
     344          19 :         Py_INCREF(tobj);
     345          19 :         retstr = tobj;
     346             :     }
     347             : 
     348        2745 :     assert(PyBytes_Check(retstr));
     349        2745 :     retstrsize = PyBytes_GET_SIZE(retstr);
     350        2745 :     if (retstrsize > 0) {
     351         345 :         REQUIRE_ENCODEBUFFER(buf, retstrsize);
     352         345 :         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
     353         345 :         buf->outbuf += retstrsize;
     354             :     }
     355             : 
     356        2745 :     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
     357        2745 :     if (newpos < 0 && !PyErr_Occurred())
     358           0 :         newpos += (Py_ssize_t)buf->inlen;
     359        2745 :     if (newpos < 0 || newpos > buf->inlen) {
     360          38 :         PyErr_Clear();
     361          38 :         PyErr_Format(PyExc_IndexError,
     362             :                      "position %zd from error handler out of bounds",
     363             :                      newpos);
     364          38 :         goto errorexit;
     365             :     }
     366        2707 :     buf->inpos = newpos;
     367             : 
     368        2707 :     Py_DECREF(retobj);
     369        2707 :     Py_DECREF(retstr);
     370        2707 :     return 0;
     371             : 
     372         156 : errorexit:
     373         156 :     Py_XDECREF(retobj);
     374         156 :     Py_XDECREF(retstr);
     375         156 :     return -1;
     376             : }
     377             : 
     378             : static int
     379        3087 : multibytecodec_decerror(MultibyteCodec *codec,
     380             :                         MultibyteCodec_State *state,
     381             :                         MultibyteDecodeBuffer *buf,
     382             :                         PyObject *errors, Py_ssize_t e)
     383             : {
     384        3087 :     PyObject *retobj = NULL, *retuni = NULL;
     385             :     Py_ssize_t newpos;
     386             :     const char *reason;
     387             :     Py_ssize_t esize, start, end;
     388             : 
     389        3087 :     if (e > 0) {
     390        3015 :         reason = "illegal multibyte sequence";
     391        3015 :         esize = e;
     392             :     }
     393             :     else {
     394          72 :         switch (e) {
     395           0 :         case MBERR_TOOSMALL:
     396           0 :             return 0; /* retry it */
     397          72 :         case MBERR_TOOFEW:
     398          72 :             reason = "incomplete multibyte sequence";
     399          72 :             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
     400          72 :             break;
     401           0 :         case MBERR_INTERNAL:
     402           0 :             PyErr_SetString(PyExc_RuntimeError,
     403             :                             "internal codec error");
     404           0 :             return -1;
     405           0 :         case MBERR_EXCEPTION:
     406           0 :             return -1;
     407           0 :         default:
     408           0 :             PyErr_SetString(PyExc_RuntimeError,
     409             :                             "unknown runtime error");
     410           0 :             return -1;
     411             :         }
     412             :     }
     413             : 
     414        3087 :     if (errors == ERROR_REPLACE) {
     415        3003 :         if (_PyUnicodeWriter_WriteChar(&buf->writer,
     416             :                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
     417           0 :             goto errorexit;
     418             :     }
     419        3087 :     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
     420        3034 :         buf->inbuf += esize;
     421        3034 :         return 0;
     422             :     }
     423             : 
     424          53 :     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
     425          53 :     end = start + esize;
     426             : 
     427             :     /* use cached exception object if available */
     428          53 :     if (buf->excobj == NULL) {
     429         106 :         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
     430          53 :                         (const char *)buf->inbuf_top,
     431          53 :                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
     432             :                         start, end, reason);
     433          53 :         if (buf->excobj == NULL)
     434           0 :             goto errorexit;
     435             :     }
     436             :     else
     437           0 :         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
     438           0 :             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
     439           0 :             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
     440           0 :             goto errorexit;
     441             : 
     442          53 :     if (errors == ERROR_STRICT) {
     443          52 :         PyCodec_StrictErrors(buf->excobj);
     444          52 :         goto errorexit;
     445             :     }
     446             : 
     447           1 :     retobj = call_error_callback(errors, buf->excobj);
     448           1 :     if (retobj == NULL)
     449           0 :         goto errorexit;
     450             : 
     451           2 :     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
     452           2 :         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
     453           1 :         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
     454           0 :         PyErr_SetString(PyExc_TypeError,
     455             :                         "decoding error handler must return "
     456             :                         "(str, int) tuple");
     457           0 :         goto errorexit;
     458             :     }
     459             : 
     460           1 :     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
     461           0 :         goto errorexit;
     462             : 
     463           1 :     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
     464           1 :     if (newpos < 0 && !PyErr_Occurred())
     465           0 :         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
     466           1 :     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
     467           1 :         PyErr_Clear();
     468           1 :         PyErr_Format(PyExc_IndexError,
     469             :                      "position %zd from error handler out of bounds",
     470             :                      newpos);
     471           1 :         goto errorexit;
     472             :     }
     473           0 :     buf->inbuf = buf->inbuf_top + newpos;
     474           0 :     Py_DECREF(retobj);
     475           0 :     return 0;
     476             : 
     477          53 : errorexit:
     478          53 :     Py_XDECREF(retobj);
     479          53 :     return -1;
     480             : }
     481             : 
     482             : static PyObject *
     483     1325780 : multibytecodec_encode(MultibyteCodec *codec,
     484             :                       MultibyteCodec_State *state,
     485             :                       PyObject *text, Py_ssize_t *inpos_t,
     486             :                       PyObject *errors, int flags)
     487             : {
     488             :     MultibyteEncodeBuffer buf;
     489     1325780 :     Py_ssize_t finalsize, r = 0;
     490             :     Py_ssize_t datalen;
     491             :     int kind;
     492             :     const void *data;
     493             : 
     494     1325780 :     if (PyUnicode_READY(text) < 0)
     495           0 :         return NULL;
     496     1325780 :     datalen = PyUnicode_GET_LENGTH(text);
     497             : 
     498     1325780 :     if (datalen == 0 && !(flags & MBENC_RESET))
     499        2425 :         return PyBytes_FromStringAndSize(NULL, 0);
     500             : 
     501     1323360 :     buf.excobj = NULL;
     502     1323360 :     buf.outobj = NULL;
     503     1323360 :     buf.inobj = text;   /* borrowed reference */
     504     1323360 :     buf.inpos = 0;
     505     1323360 :     buf.inlen = datalen;
     506     1323360 :     kind = PyUnicode_KIND(buf.inobj);
     507     1323360 :     data = PyUnicode_DATA(buf.inobj);
     508             : 
     509     1323360 :     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
     510           0 :         PyErr_NoMemory();
     511           0 :         goto errorexit;
     512             :     }
     513             : 
     514     1323360 :     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
     515     1323360 :     if (buf.outobj == NULL)
     516           0 :         goto errorexit;
     517     1323360 :     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
     518     1323360 :     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
     519             : 
     520     2374920 :     while (buf.inpos < buf.inlen) {
     521             :         /* we don't reuse inleft and outleft here.
     522             :          * error callbacks can relocate the cursor anywhere on buffer*/
     523     1326100 :         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
     524             : 
     525     1326100 :         r = codec->encode(state, codec->config,
     526             :                           kind, data,
     527             :                           &buf.inpos, buf.inlen,
     528             :                           &buf.outbuf, outleft, flags);
     529     1326100 :         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
     530             :             break;
     531     1051720 :         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
     532         156 :             goto errorexit;
     533     1051570 :         else if (r == MBERR_TOOFEW)
     534           0 :             break;
     535             :     }
     536             : 
     537     1323200 :     if (codec->encreset != NULL && (flags & MBENC_RESET))
     538           0 :         for (;;) {
     539             :             Py_ssize_t outleft;
     540             : 
     541     1048900 :             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
     542     1048900 :             r = codec->encreset(state, codec->config, &buf.outbuf,
     543             :                                 outleft);
     544     1048900 :             if (r == 0)
     545     1048900 :                 break;
     546           0 :             else if (multibytecodec_encerror(codec, state,
     547             :                                              &buf, errors, r))
     548           0 :                 goto errorexit;
     549             :         }
     550             : 
     551     2646400 :     finalsize = (Py_ssize_t)((char *)buf.outbuf -
     552     1323200 :                              PyBytes_AS_STRING(buf.outobj));
     553             : 
     554     1323200 :     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
     555     1323170 :         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
     556           0 :             goto errorexit;
     557             : 
     558     1323200 :     if (inpos_t)
     559       88404 :         *inpos_t = buf.inpos;
     560     1323200 :     Py_XDECREF(buf.excobj);
     561     1323200 :     return buf.outobj;
     562             : 
     563         156 : errorexit:
     564         156 :     Py_XDECREF(buf.excobj);
     565         156 :     Py_XDECREF(buf.outobj);
     566         156 :     return NULL;
     567             : }
     568             : 
     569             : /*[clinic input]
     570             : _multibytecodec.MultibyteCodec.encode
     571             : 
     572             :   input: object
     573             :   errors: str(accept={str, NoneType}) = None
     574             : 
     575             : Return an encoded string version of `input'.
     576             : 
     577             : 'errors' may be given to set a different error handling scheme. Default is
     578             : 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
     579             : values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
     580             : registered with codecs.register_error that can handle UnicodeEncodeErrors.
     581             : [clinic start generated code]*/
     582             : 
     583             : static PyObject *
     584     1234930 : _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
     585             :                                            PyObject *input,
     586             :                                            const char *errors)
     587             : /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
     588             : {
     589             :     MultibyteCodec_State state;
     590             :     PyObject *errorcb, *r, *ucvt;
     591             :     Py_ssize_t datalen;
     592             : 
     593     1234930 :     if (PyUnicode_Check(input))
     594     1234930 :         ucvt = NULL;
     595             :     else {
     596           0 :         input = ucvt = PyObject_Str(input);
     597           0 :         if (input == NULL)
     598           0 :             return NULL;
     599           0 :         else if (!PyUnicode_Check(input)) {
     600           0 :             PyErr_SetString(PyExc_TypeError,
     601             :                 "couldn't convert the object to unicode.");
     602           0 :             Py_DECREF(ucvt);
     603           0 :             return NULL;
     604             :         }
     605             :     }
     606             : 
     607     1234930 :     if (PyUnicode_READY(input) < 0) {
     608           0 :         Py_XDECREF(ucvt);
     609           0 :         return NULL;
     610             :     }
     611     1234930 :     datalen = PyUnicode_GET_LENGTH(input);
     612             : 
     613     1234930 :     errorcb = internal_error_callback(errors);
     614     1234930 :     if (errorcb == NULL) {
     615           0 :         Py_XDECREF(ucvt);
     616           0 :         return NULL;
     617             :     }
     618             : 
     619     2283760 :     if (self->codec->encinit != NULL &&
     620     1048820 :         self->codec->encinit(&state, self->codec->config) != 0)
     621           0 :         goto errorexit;
     622     1234930 :     r = multibytecodec_encode(self->codec, &state,
     623             :                     input, NULL, errorcb,
     624             :                     MBENC_FLUSH | MBENC_RESET);
     625     1234930 :     if (r == NULL)
     626         134 :         goto errorexit;
     627             : 
     628     1234800 :     ERROR_DECREF(errorcb);
     629     1234800 :     Py_XDECREF(ucvt);
     630     1234800 :     return make_tuple(r, datalen);
     631             : 
     632         134 : errorexit:
     633         134 :     ERROR_DECREF(errorcb);
     634         134 :     Py_XDECREF(ucvt);
     635         134 :     return NULL;
     636             : }
     637             : 
     638             : /*[clinic input]
     639             : _multibytecodec.MultibyteCodec.decode
     640             : 
     641             :   input: Py_buffer
     642             :   errors: str(accept={str, NoneType}) = None
     643             : 
     644             : Decodes 'input'.
     645             : 
     646             : 'errors' may be given to set a different error handling scheme. Default is
     647             : 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
     648             : values are 'ignore' and 'replace' as well as any other name registered with
     649             : codecs.register_error that is able to handle UnicodeDecodeErrors."
     650             : [clinic start generated code]*/
     651             : 
     652             : static PyObject *
     653      186024 : _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
     654             :                                            Py_buffer *input,
     655             :                                            const char *errors)
     656             : /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
     657             : {
     658             :     MultibyteCodec_State state;
     659             :     MultibyteDecodeBuffer buf;
     660             :     PyObject *errorcb, *res;
     661             :     const char *data;
     662             :     Py_ssize_t datalen;
     663             : 
     664      186024 :     data = input->buf;
     665      186024 :     datalen = input->len;
     666             : 
     667      186024 :     errorcb = internal_error_callback(errors);
     668      186024 :     if (errorcb == NULL) {
     669           0 :         return NULL;
     670             :     }
     671             : 
     672      186024 :     if (datalen == 0) {
     673           0 :         ERROR_DECREF(errorcb);
     674           0 :         return make_tuple(PyUnicode_New(0, 0), 0);
     675             :     }
     676             : 
     677      186024 :     _PyUnicodeWriter_Init(&buf.writer);
     678      186024 :     buf.writer.min_length = datalen;
     679      186024 :     buf.excobj = NULL;
     680      186024 :     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
     681      186024 :     buf.inbuf_end = buf.inbuf_top + datalen;
     682             : 
     683      186087 :     if (self->codec->decinit != NULL &&
     684          63 :         self->codec->decinit(&state, self->codec->config) != 0)
     685           0 :         goto errorexit;
     686             : 
     687      189058 :     while (buf.inbuf < buf.inbuf_end) {
     688             :         Py_ssize_t inleft, r;
     689             : 
     690      188987 :         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
     691             : 
     692      188987 :         r = self->codec->decode(&state, self->codec->config,
     693             :                         &buf.inbuf, inleft, &buf.writer);
     694      188987 :         if (r == 0)
     695      185904 :             break;
     696        3083 :         else if (multibytecodec_decerror(self->codec, &state,
     697             :                                          &buf, errorcb, r))
     698          49 :             goto errorexit;
     699             :     }
     700             : 
     701      185975 :     res = _PyUnicodeWriter_Finish(&buf.writer);
     702      185975 :     if (res == NULL)
     703           0 :         goto errorexit;
     704             : 
     705      185975 :     Py_XDECREF(buf.excobj);
     706      185975 :     ERROR_DECREF(errorcb);
     707      185975 :     return make_tuple(res, datalen);
     708             : 
     709          49 : errorexit:
     710          49 :     ERROR_DECREF(errorcb);
     711          49 :     Py_XDECREF(buf.excobj);
     712          49 :     _PyUnicodeWriter_Dealloc(&buf.writer);
     713             : 
     714          49 :     return NULL;
     715             : }
     716             : 
     717             : static struct PyMethodDef multibytecodec_methods[] = {
     718             :     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
     719             :     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
     720             :     {NULL, NULL},
     721             : };
     722             : 
     723             : static int
     724        5186 : multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
     725             : {
     726        5186 :     Py_VISIT(Py_TYPE(self));
     727        5186 :     return 0;
     728             : }
     729             : 
     730             : static void
     731         142 : multibytecodec_dealloc(MultibyteCodecObject *self)
     732             : {
     733         142 :     PyObject_GC_UnTrack(self);
     734         142 :     PyTypeObject *tp = Py_TYPE(self);
     735         142 :     tp->tp_free(self);
     736         142 :     Py_DECREF(tp);
     737         142 : }
     738             : 
     739             : static PyType_Slot multibytecodec_slots[] = {
     740             :     {Py_tp_dealloc, multibytecodec_dealloc},
     741             :     {Py_tp_getattro, PyObject_GenericGetAttr},
     742             :     {Py_tp_methods, multibytecodec_methods},
     743             :     {Py_tp_traverse, multibytecodec_traverse},
     744             :     {0, NULL},
     745             : };
     746             : 
     747             : static PyType_Spec multibytecodec_spec = {
     748             :     .name = MODULE_NAME ".MultibyteCodec",
     749             :     .basicsize = sizeof(MultibyteCodecObject),
     750             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
     751             :               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
     752             :     .slots = multibytecodec_slots,
     753             : };
     754             : 
     755             : 
     756             : /**
     757             :  * Utility functions for stateful codec mechanism
     758             :  */
     759             : 
     760             : #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
     761             : #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
     762             : 
     763             : static PyObject *
     764       88125 : encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
     765             :                         PyObject *unistr, int final)
     766             : {
     767       88125 :     PyObject *ucvt, *r = NULL;
     768       88125 :     PyObject *inbuf = NULL;
     769             :     Py_ssize_t inpos, datalen;
     770       88125 :     PyObject *origpending = NULL;
     771             : 
     772       88125 :     if (PyUnicode_Check(unistr))
     773       88125 :         ucvt = NULL;
     774             :     else {
     775           0 :         unistr = ucvt = PyObject_Str(unistr);
     776           0 :         if (unistr == NULL)
     777           0 :             return NULL;
     778           0 :         else if (!PyUnicode_Check(unistr)) {
     779           0 :             PyErr_SetString(PyExc_TypeError,
     780             :                 "couldn't convert the object to str.");
     781           0 :             Py_DECREF(ucvt);
     782           0 :             return NULL;
     783             :         }
     784             :     }
     785             : 
     786       88125 :     if (ctx->pending) {
     787             :         PyObject *inbuf_tmp;
     788             : 
     789         836 :         Py_INCREF(ctx->pending);
     790         836 :         origpending = ctx->pending;
     791             : 
     792         836 :         Py_INCREF(ctx->pending);
     793         836 :         inbuf_tmp = ctx->pending;
     794         836 :         PyUnicode_Append(&inbuf_tmp, unistr);
     795         836 :         if (inbuf_tmp == NULL)
     796           0 :             goto errorexit;
     797         836 :         Py_CLEAR(ctx->pending);
     798         836 :         inbuf = inbuf_tmp;
     799             :     }
     800             :     else {
     801       87289 :         origpending = NULL;
     802             : 
     803       87289 :         Py_INCREF(unistr);
     804       87289 :         inbuf = unistr;
     805             :     }
     806       88125 :     if (PyUnicode_READY(inbuf) < 0)
     807           0 :         goto errorexit;
     808       88125 :     inpos = 0;
     809       88125 :     datalen = PyUnicode_GET_LENGTH(inbuf);
     810             : 
     811       88125 :     r = multibytecodec_encode(ctx->codec, &ctx->state,
     812             :                               inbuf, &inpos,
     813             :                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
     814       88125 :     if (r == NULL) {
     815             :         /* recover the original pending buffer */
     816          22 :         Py_XSETREF(ctx->pending, origpending);
     817          22 :         origpending = NULL;
     818          22 :         goto errorexit;
     819             :     }
     820       88103 :     Py_XDECREF(origpending);
     821             : 
     822       88103 :     if (inpos < datalen) {
     823         662 :         if (datalen - inpos > MAXENCPENDING) {
     824             :             /* normal codecs can't reach here */
     825           0 :             PyErr_SetString(PyExc_UnicodeError,
     826             :                             "pending buffer overflow");
     827           0 :             goto errorexit;
     828             :         }
     829         662 :         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
     830         662 :         if (ctx->pending == NULL) {
     831             :             /* normal codecs can't reach here */
     832           0 :             goto errorexit;
     833             :         }
     834             :     }
     835             : 
     836       88103 :     Py_DECREF(inbuf);
     837       88103 :     Py_XDECREF(ucvt);
     838       88103 :     return r;
     839             : 
     840          22 : errorexit:
     841          22 :     Py_XDECREF(r);
     842          22 :     Py_XDECREF(ucvt);
     843          22 :     Py_XDECREF(origpending);
     844          22 :     Py_XDECREF(inbuf);
     845          22 :     return NULL;
     846             : }
     847             : 
     848             : static int
     849       72561 : decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
     850             :                        MultibyteDecodeBuffer *buf)
     851             : {
     852             :     Py_ssize_t npendings;
     853             : 
     854       72561 :     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
     855       72561 :     if (npendings + ctx->pendingsize > MAXDECPENDING ||
     856       72561 :         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
     857           0 :             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
     858           0 :             return -1;
     859             :     }
     860       72561 :     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
     861       72561 :     ctx->pendingsize += npendings;
     862       72561 :     return 0;
     863             : }
     864             : 
     865             : static int
     866      193233 : decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
     867             :                        Py_ssize_t size)
     868             : {
     869      193233 :     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
     870      193233 :     buf->inbuf_end = buf->inbuf_top + size;
     871      193233 :     buf->writer.min_length += size;
     872      193233 :     return 0;
     873             : }
     874             : 
     875             : static int
     876      191010 : decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
     877             :                     MultibyteDecodeBuffer *buf)
     878             : {
     879      191010 :     while (buf->inbuf < buf->inbuf_end) {
     880             :         Py_ssize_t inleft;
     881             :         Py_ssize_t r;
     882             : 
     883      190738 :         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
     884             : 
     885      190738 :         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
     886             :             &buf->inbuf, inleft, &buf->writer);
     887      190738 :         if (r == 0 || r == MBERR_TOOFEW)
     888             :             break;
     889           0 :         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
     890             :                                          buf, ctx->errors, r))
     891           0 :             return -1;
     892             :     }
     893      191010 :     return 0;
     894             : }
     895             : 
     896             : 
     897             : /*[clinic input]
     898             : _multibytecodec.MultibyteIncrementalEncoder.encode
     899             : 
     900             :     input: object
     901             :     final: bool(accept={int}) = False
     902             : [clinic start generated code]*/
     903             : 
     904             : static PyObject *
     905       27427 : _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
     906             :                                                         PyObject *input,
     907             :                                                         int final)
     908             : /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
     909             : {
     910       27427 :     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
     911             : }
     912             : 
     913             : /*[clinic input]
     914             : _multibytecodec.MultibyteIncrementalEncoder.getstate
     915             : [clinic start generated code]*/
     916             : 
     917             : static PyObject *
     918         177 : _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
     919             : /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
     920             : {
     921             :     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
     922             :        for UTF-8 encoded buffer (each character can use up to 4
     923             :        bytes), and required bytes for MultibyteCodec_State.c. A byte
     924             :        array is used to avoid different compilers generating different
     925             :        values for the same state, e.g. as a result of struct padding.
     926             :     */
     927             :     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
     928             :     Py_ssize_t statesize;
     929         177 :     const char *pendingbuffer = NULL;
     930             :     Py_ssize_t pendingsize;
     931             : 
     932         177 :     if (self->pending != NULL) {
     933           2 :         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
     934           2 :         if (pendingbuffer == NULL) {
     935           0 :             return NULL;
     936             :         }
     937           2 :         if (pendingsize > MAXENCPENDING*4) {
     938           0 :             PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
     939           0 :             return NULL;
     940             :         }
     941           2 :         statebytes[0] = (unsigned char)pendingsize;
     942           2 :         memcpy(statebytes + 1, pendingbuffer, pendingsize);
     943           2 :         statesize = 1 + pendingsize;
     944             :     } else {
     945         175 :         statebytes[0] = 0;
     946         175 :         statesize = 1;
     947             :     }
     948         177 :     memcpy(statebytes+statesize, self->state.c,
     949             :            sizeof(self->state.c));
     950         177 :     statesize += sizeof(self->state.c);
     951             : 
     952         177 :     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
     953             :                                              1 /* little-endian */ ,
     954             :                                              0 /* unsigned */ );
     955             : }
     956             : 
     957             : /*[clinic input]
     958             : _multibytecodec.MultibyteIncrementalEncoder.setstate
     959             :     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
     960             :     /
     961             : [clinic start generated code]*/
     962             : 
     963             : static PyObject *
     964         177 : _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
     965             :                                                           PyLongObject *statelong)
     966             : /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
     967             : {
     968         177 :     PyObject *pending = NULL;
     969             :     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
     970             : 
     971         177 :     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
     972             :                             1 /* little-endian */ ,
     973             :                             0 /* unsigned */ ) < 0) {
     974           0 :         goto errorexit;
     975             :     }
     976             : 
     977         177 :     if (statebytes[0] > MAXENCPENDING*4) {
     978           1 :         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
     979           1 :         return NULL;
     980             :     }
     981             : 
     982         176 :     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
     983         176 :                                    statebytes[0], "strict");
     984         176 :     if (pending == NULL) {
     985           1 :         goto errorexit;
     986             :     }
     987             : 
     988         175 :     Py_CLEAR(self->pending);
     989         175 :     self->pending = pending;
     990         175 :     memcpy(self->state.c, statebytes+1+statebytes[0],
     991             :            sizeof(self->state.c));
     992             : 
     993         175 :     Py_RETURN_NONE;
     994             : 
     995           1 : errorexit:
     996           1 :     Py_XDECREF(pending);
     997           1 :     return NULL;
     998             : }
     999             : 
    1000             : /*[clinic input]
    1001             : _multibytecodec.MultibyteIncrementalEncoder.reset
    1002             : [clinic start generated code]*/
    1003             : 
    1004             : static PyObject *
    1005          24 : _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
    1006             : /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
    1007             : {
    1008             :     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
    1009             :     unsigned char buffer[4], *outbuf;
    1010             :     Py_ssize_t r;
    1011          24 :     if (self->codec->encreset != NULL) {
    1012           4 :         outbuf = buffer;
    1013           4 :         r = self->codec->encreset(&self->state, self->codec->config,
    1014             :                                   &outbuf, sizeof(buffer));
    1015           4 :         if (r != 0)
    1016           0 :             return NULL;
    1017             :     }
    1018          24 :     Py_CLEAR(self->pending);
    1019          24 :     Py_RETURN_NONE;
    1020             : }
    1021             : 
    1022             : static struct PyMethodDef mbiencoder_methods[] = {
    1023             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
    1024             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
    1025             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
    1026             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
    1027             :     {NULL, NULL},
    1028             : };
    1029             : 
    1030             : static PyObject *
    1031        1263 : mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1032             : {
    1033             :     MultibyteIncrementalEncoderObject *self;
    1034        1263 :     PyObject *codec = NULL;
    1035        1263 :     char *errors = NULL;
    1036             : 
    1037        1263 :     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
    1038             :                                      incnewkwarglist, &errors))
    1039           0 :         return NULL;
    1040             : 
    1041        1263 :     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
    1042        1263 :     if (self == NULL)
    1043           0 :         return NULL;
    1044             : 
    1045        1263 :     codec = PyObject_GetAttrString((PyObject *)type, "codec");
    1046        1263 :     if (codec == NULL)
    1047           0 :         goto errorexit;
    1048             : 
    1049        1263 :     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
    1050        1263 :     if (!MultibyteCodec_Check(state, codec)) {
    1051           0 :         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
    1052           0 :         goto errorexit;
    1053             :     }
    1054             : 
    1055        1263 :     self->codec = ((MultibyteCodecObject *)codec)->codec;
    1056        1263 :     self->pending = NULL;
    1057        1263 :     self->errors = internal_error_callback(errors);
    1058        1263 :     if (self->errors == NULL)
    1059           0 :         goto errorexit;
    1060        1589 :     if (self->codec->encinit != NULL &&
    1061         326 :         self->codec->encinit(&self->state, self->codec->config) != 0)
    1062           0 :         goto errorexit;
    1063             : 
    1064        1263 :     Py_DECREF(codec);
    1065        1263 :     return (PyObject *)self;
    1066             : 
    1067           0 : errorexit:
    1068           0 :     Py_XDECREF(self);
    1069           0 :     Py_XDECREF(codec);
    1070           0 :     return NULL;
    1071             : }
    1072             : 
    1073             : static int
    1074        1263 : mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
    1075             : {
    1076        1263 :     return 0;
    1077             : }
    1078             : 
    1079             : static int
    1080           0 : mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
    1081             :                     visitproc visit, void *arg)
    1082             : {
    1083           0 :     if (ERROR_ISCUSTOM(self->errors))
    1084           0 :         Py_VISIT(self->errors);
    1085           0 :     return 0;
    1086             : }
    1087             : 
    1088             : static void
    1089        1263 : mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
    1090             : {
    1091        1263 :     PyTypeObject *tp = Py_TYPE(self);
    1092        1263 :     PyObject_GC_UnTrack(self);
    1093        1263 :     ERROR_DECREF(self->errors);
    1094        1263 :     Py_CLEAR(self->pending);
    1095        1263 :     tp->tp_free(self);
    1096        1263 :     Py_DECREF(tp);
    1097        1263 : }
    1098             : 
    1099             : static PyType_Slot encoder_slots[] = {
    1100             :     {Py_tp_dealloc, mbiencoder_dealloc},
    1101             :     {Py_tp_getattro, PyObject_GenericGetAttr},
    1102             :     {Py_tp_traverse, mbiencoder_traverse},
    1103             :     {Py_tp_methods, mbiencoder_methods},
    1104             :     {Py_tp_getset, codecctx_getsets},
    1105             :     {Py_tp_init, mbiencoder_init},
    1106             :     {Py_tp_new, mbiencoder_new},
    1107             :     {0, NULL},
    1108             : };
    1109             : 
    1110             : static PyType_Spec encoder_spec = {
    1111             :     .name = MODULE_NAME ".MultibyteIncrementalEncoder",
    1112             :     .basicsize = sizeof(MultibyteIncrementalEncoderObject),
    1113             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
    1114             :               Py_TPFLAGS_IMMUTABLETYPE),
    1115             :     .slots = encoder_slots,
    1116             : };
    1117             : 
    1118             : 
    1119             : /*[clinic input]
    1120             : _multibytecodec.MultibyteIncrementalDecoder.decode
    1121             : 
    1122             :     input: Py_buffer
    1123             :     final: bool(accept={int}) = False
    1124             : [clinic start generated code]*/
    1125             : 
    1126             : static PyObject *
    1127       47789 : _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
    1128             :                                                         Py_buffer *input,
    1129             :                                                         int final)
    1130             : /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
    1131             : {
    1132             :     MultibyteDecodeBuffer buf;
    1133       47789 :     char *data, *wdata = NULL;
    1134             :     Py_ssize_t wsize, size, origpending;
    1135             :     PyObject *res;
    1136             : 
    1137       47789 :     data = input->buf;
    1138       47789 :     size = input->len;
    1139             : 
    1140       47789 :     _PyUnicodeWriter_Init(&buf.writer);
    1141       47789 :     buf.excobj = NULL;
    1142       47789 :     origpending = self->pendingsize;
    1143             : 
    1144       47789 :     if (self->pendingsize == 0) {
    1145       28726 :         wsize = size;
    1146       28726 :         wdata = data;
    1147             :     }
    1148             :     else {
    1149       19063 :         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
    1150           0 :             PyErr_NoMemory();
    1151           0 :             goto errorexit;
    1152             :         }
    1153       19063 :         wsize = size + self->pendingsize;
    1154       19063 :         wdata = PyMem_Malloc(wsize);
    1155       19063 :         if (wdata == NULL) {
    1156           0 :             PyErr_NoMemory();
    1157           0 :             goto errorexit;
    1158             :         }
    1159       19063 :         memcpy(wdata, self->pending, self->pendingsize);
    1160       19063 :         memcpy(wdata + self->pendingsize, data, size);
    1161       19063 :         self->pendingsize = 0;
    1162             :     }
    1163             : 
    1164       47789 :     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
    1165           0 :         goto errorexit;
    1166             : 
    1167       47789 :     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
    1168           0 :         goto errorexit;
    1169             : 
    1170       47789 :     if (final && buf.inbuf < buf.inbuf_end) {
    1171           3 :         if (multibytecodec_decerror(self->codec, &self->state,
    1172             :                         &buf, self->errors, MBERR_TOOFEW)) {
    1173             :             /* recover the original pending buffer */
    1174           3 :             memcpy(self->pending, wdata, origpending);
    1175           3 :             self->pendingsize = origpending;
    1176           3 :             goto errorexit;
    1177             :         }
    1178             :     }
    1179             : 
    1180       47786 :     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
    1181       19061 :         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
    1182           0 :             goto errorexit;
    1183             :     }
    1184             : 
    1185       47786 :     res = _PyUnicodeWriter_Finish(&buf.writer);
    1186       47786 :     if (res == NULL)
    1187           0 :         goto errorexit;
    1188             : 
    1189       47786 :     if (wdata != data)
    1190       19060 :         PyMem_Free(wdata);
    1191       47786 :     Py_XDECREF(buf.excobj);
    1192       47786 :     return res;
    1193             : 
    1194           3 : errorexit:
    1195           3 :     if (wdata != NULL && wdata != data)
    1196           3 :         PyMem_Free(wdata);
    1197           3 :     Py_XDECREF(buf.excobj);
    1198           3 :     _PyUnicodeWriter_Dealloc(&buf.writer);
    1199           3 :     return NULL;
    1200             : }
    1201             : 
    1202             : /*[clinic input]
    1203             : _multibytecodec.MultibyteIncrementalDecoder.getstate
    1204             : [clinic start generated code]*/
    1205             : 
    1206             : static PyObject *
    1207         329 : _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
    1208             : /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
    1209             : {
    1210             :     PyObject *buffer;
    1211             :     PyObject *statelong;
    1212             : 
    1213         329 :     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
    1214             :                                        self->pendingsize);
    1215         329 :     if (buffer == NULL) {
    1216           0 :         return NULL;
    1217             :     }
    1218             : 
    1219         329 :     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
    1220             :                                                   sizeof(self->state.c),
    1221             :                                                   1 /* little-endian */ ,
    1222             :                                                   0 /* unsigned */ );
    1223         329 :     if (statelong == NULL) {
    1224           0 :         Py_DECREF(buffer);
    1225           0 :         return NULL;
    1226             :     }
    1227             : 
    1228         329 :     return Py_BuildValue("NN", buffer, statelong);
    1229             : }
    1230             : 
    1231             : /*[clinic input]
    1232             : _multibytecodec.MultibyteIncrementalDecoder.setstate
    1233             :     state: object(subclass_of='&PyTuple_Type')
    1234             :     /
    1235             : [clinic start generated code]*/
    1236             : 
    1237             : static PyObject *
    1238         305 : _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
    1239             :                                                           PyObject *state)
    1240             : /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
    1241             : {
    1242             :     PyObject *buffer;
    1243             :     PyLongObject *statelong;
    1244             :     Py_ssize_t buffersize;
    1245             :     const char *bufferstr;
    1246             :     unsigned char statebytes[8];
    1247             : 
    1248         305 :     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
    1249             :                           &buffer, &PyLong_Type, &statelong))
    1250             :     {
    1251           2 :         return NULL;
    1252             :     }
    1253             : 
    1254         303 :     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
    1255             :                             1 /* little-endian */ ,
    1256             :                             0 /* unsigned */ ) < 0) {
    1257           0 :         return NULL;
    1258             :     }
    1259             : 
    1260         303 :     buffersize = PyBytes_Size(buffer);
    1261         303 :     if (buffersize == -1) {
    1262           0 :         return NULL;
    1263             :     }
    1264             : 
    1265         303 :     if (buffersize > MAXDECPENDING) {
    1266           1 :         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
    1267           1 :         return NULL;
    1268             :     }
    1269             : 
    1270         302 :     bufferstr = PyBytes_AsString(buffer);
    1271         302 :     if (bufferstr == NULL) {
    1272           0 :         return NULL;
    1273             :     }
    1274         302 :     self->pendingsize = buffersize;
    1275         302 :     memcpy(self->pending, bufferstr, self->pendingsize);
    1276         302 :     memcpy(self->state.c, statebytes, sizeof(statebytes));
    1277             : 
    1278         302 :     Py_RETURN_NONE;
    1279             : }
    1280             : 
    1281             : /*[clinic input]
    1282             : _multibytecodec.MultibyteIncrementalDecoder.reset
    1283             : [clinic start generated code]*/
    1284             : 
    1285             : static PyObject *
    1286           4 : _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
    1287             : /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
    1288             : {
    1289           5 :     if (self->codec->decreset != NULL &&
    1290           1 :         self->codec->decreset(&self->state, self->codec->config) != 0)
    1291           0 :         return NULL;
    1292           4 :     self->pendingsize = 0;
    1293             : 
    1294           4 :     Py_RETURN_NONE;
    1295             : }
    1296             : 
    1297             : static struct PyMethodDef mbidecoder_methods[] = {
    1298             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
    1299             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
    1300             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
    1301             :     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
    1302             :     {NULL, NULL},
    1303             : };
    1304             : 
    1305             : static PyObject *
    1306        1263 : mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1307             : {
    1308             :     MultibyteIncrementalDecoderObject *self;
    1309        1263 :     PyObject *codec = NULL;
    1310        1263 :     char *errors = NULL;
    1311             : 
    1312        1263 :     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
    1313             :                                      incnewkwarglist, &errors))
    1314           0 :         return NULL;
    1315             : 
    1316        1263 :     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
    1317        1263 :     if (self == NULL)
    1318           0 :         return NULL;
    1319             : 
    1320        1263 :     codec = PyObject_GetAttrString((PyObject *)type, "codec");
    1321        1263 :     if (codec == NULL)
    1322           0 :         goto errorexit;
    1323             : 
    1324        1263 :     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
    1325        1263 :     if (!MultibyteCodec_Check(state, codec)) {
    1326           0 :         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
    1327           0 :         goto errorexit;
    1328             :     }
    1329             : 
    1330        1263 :     self->codec = ((MultibyteCodecObject *)codec)->codec;
    1331        1263 :     self->pendingsize = 0;
    1332        1263 :     self->errors = internal_error_callback(errors);
    1333        1263 :     if (self->errors == NULL)
    1334           0 :         goto errorexit;
    1335        1588 :     if (self->codec->decinit != NULL &&
    1336         325 :         self->codec->decinit(&self->state, self->codec->config) != 0)
    1337           0 :         goto errorexit;
    1338             : 
    1339        1263 :     Py_DECREF(codec);
    1340        1263 :     return (PyObject *)self;
    1341             : 
    1342           0 : errorexit:
    1343           0 :     Py_XDECREF(self);
    1344           0 :     Py_XDECREF(codec);
    1345           0 :     return NULL;
    1346             : }
    1347             : 
    1348             : static int
    1349        1263 : mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
    1350             : {
    1351        1263 :     return 0;
    1352             : }
    1353             : 
    1354             : static int
    1355           2 : mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
    1356             :                     visitproc visit, void *arg)
    1357             : {
    1358           2 :     if (ERROR_ISCUSTOM(self->errors))
    1359           0 :         Py_VISIT(self->errors);
    1360           2 :     return 0;
    1361             : }
    1362             : 
    1363             : static void
    1364        1263 : mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
    1365             : {
    1366        1263 :     PyTypeObject *tp = Py_TYPE(self);
    1367        1263 :     PyObject_GC_UnTrack(self);
    1368        1263 :     ERROR_DECREF(self->errors);
    1369        1263 :     tp->tp_free(self);
    1370        1263 :     Py_DECREF(tp);
    1371        1263 : }
    1372             : 
    1373             : static PyType_Slot decoder_slots[] = {
    1374             :     {Py_tp_dealloc, mbidecoder_dealloc},
    1375             :     {Py_tp_getattro, PyObject_GenericGetAttr},
    1376             :     {Py_tp_traverse, mbidecoder_traverse},
    1377             :     {Py_tp_methods, mbidecoder_methods},
    1378             :     {Py_tp_getset, codecctx_getsets},
    1379             :     {Py_tp_init, mbidecoder_init},
    1380             :     {Py_tp_new, mbidecoder_new},
    1381             :     {0, NULL},
    1382             : };
    1383             : 
    1384             : static PyType_Spec decoder_spec = {
    1385             :     .name = MODULE_NAME ".MultibyteIncrementalDecoder",
    1386             :     .basicsize = sizeof(MultibyteIncrementalDecoderObject),
    1387             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
    1388             :               Py_TPFLAGS_IMMUTABLETYPE),
    1389             :     .slots = decoder_slots,
    1390             : };
    1391             : 
    1392             : static PyObject *
    1393      129158 : mbstreamreader_iread(MultibyteStreamReaderObject *self,
    1394             :                      const char *method, Py_ssize_t sizehint)
    1395             : {
    1396             :     MultibyteDecodeBuffer buf;
    1397             :     PyObject *cres, *res;
    1398             :     Py_ssize_t rsize;
    1399             : 
    1400      129158 :     if (sizehint == 0)
    1401           0 :         return PyUnicode_New(0, 0);
    1402             : 
    1403      129158 :     _PyUnicodeWriter_Init(&buf.writer);
    1404      129158 :     buf.excobj = NULL;
    1405      129158 :     cres = NULL;
    1406             : 
    1407       16286 :     for (;;) {
    1408             :         int endoffile;
    1409             : 
    1410      145444 :         if (sizehint < 0)
    1411         746 :             cres = PyObject_CallMethod(self->stream,
    1412             :                             method, NULL);
    1413             :         else
    1414      144698 :             cres = PyObject_CallMethod(self->stream,
    1415             :                             method, "i", sizehint);
    1416      145444 :         if (cres == NULL)
    1417           0 :             goto errorexit;
    1418             : 
    1419      145444 :         if (!PyBytes_Check(cres)) {
    1420           0 :             PyErr_Format(PyExc_TypeError,
    1421             :                          "stream function returned a "
    1422             :                          "non-bytes object (%.100s)",
    1423           0 :                          Py_TYPE(cres)->tp_name);
    1424           0 :             goto errorexit;
    1425             :         }
    1426             : 
    1427      145444 :         endoffile = (PyBytes_GET_SIZE(cres) == 0);
    1428             : 
    1429      145444 :         if (self->pendingsize > 0) {
    1430             :             PyObject *ctr;
    1431             :             char *ctrdata;
    1432             : 
    1433       53500 :             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
    1434           0 :                 PyErr_NoMemory();
    1435           0 :                 goto errorexit;
    1436             :             }
    1437       53500 :             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
    1438       53500 :             ctr = PyBytes_FromStringAndSize(NULL, rsize);
    1439       53500 :             if (ctr == NULL)
    1440           0 :                 goto errorexit;
    1441       53500 :             ctrdata = PyBytes_AS_STRING(ctr);
    1442       53500 :             memcpy(ctrdata, self->pending, self->pendingsize);
    1443      107000 :             memcpy(ctrdata + self->pendingsize,
    1444       53500 :                     PyBytes_AS_STRING(cres),
    1445       53500 :                     PyBytes_GET_SIZE(cres));
    1446       53500 :             Py_DECREF(cres);
    1447       53500 :             cres = ctr;
    1448       53500 :             self->pendingsize = 0;
    1449             :         }
    1450             : 
    1451      145444 :         rsize = PyBytes_GET_SIZE(cres);
    1452      145444 :         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
    1453             :                                    rsize) != 0)
    1454           0 :             goto errorexit;
    1455             : 
    1456      145444 :         if (rsize > 0 && decoder_feed_buffer(
    1457             :                         (MultibyteStatefulDecoderContext *)self, &buf))
    1458           0 :             goto errorexit;
    1459             : 
    1460      145444 :         if (endoffile || sizehint < 0) {
    1461        2857 :             if (buf.inbuf < buf.inbuf_end &&
    1462           1 :                 multibytecodec_decerror(self->codec, &self->state,
    1463             :                             &buf, self->errors, MBERR_TOOFEW))
    1464           1 :                 goto errorexit;
    1465             :         }
    1466             : 
    1467      145443 :         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
    1468       53500 :             if (decoder_append_pending(STATEFUL_DCTX(self),
    1469             :                                        &buf) != 0)
    1470           0 :                 goto errorexit;
    1471             :         }
    1472             : 
    1473      145443 :         Py_DECREF(cres);
    1474      145443 :         cres = NULL;
    1475             : 
    1476      145443 :         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
    1477             :             break;
    1478             : 
    1479       16286 :         sizehint = 1; /* read 1 more byte and retry */
    1480             :     }
    1481             : 
    1482      129157 :     res = _PyUnicodeWriter_Finish(&buf.writer);
    1483      129157 :     if (res == NULL)
    1484           0 :         goto errorexit;
    1485             : 
    1486      129157 :     Py_XDECREF(cres);
    1487      129157 :     Py_XDECREF(buf.excobj);
    1488      129157 :     return res;
    1489             : 
    1490           1 : errorexit:
    1491           1 :     Py_XDECREF(cres);
    1492           1 :     Py_XDECREF(buf.excobj);
    1493           1 :     _PyUnicodeWriter_Dealloc(&buf.writer);
    1494           1 :     return NULL;
    1495             : }
    1496             : 
    1497             : /*[clinic input]
    1498             :  _multibytecodec.MultibyteStreamReader.read
    1499             : 
    1500             :     sizeobj: object = None
    1501             :     /
    1502             : [clinic start generated code]*/
    1503             : 
    1504             : static PyObject *
    1505       42296 : _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
    1506             :                                                 PyObject *sizeobj)
    1507             : /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
    1508             : {
    1509             :     Py_ssize_t size;
    1510             : 
    1511       42296 :     if (sizeobj == Py_None)
    1512         302 :         size = -1;
    1513       41994 :     else if (PyLong_Check(sizeobj))
    1514       41994 :         size = PyLong_AsSsize_t(sizeobj);
    1515             :     else {
    1516           0 :         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
    1517           0 :         return NULL;
    1518             :     }
    1519             : 
    1520       42296 :     if (size == -1 && PyErr_Occurred())
    1521           0 :         return NULL;
    1522             : 
    1523       42296 :     return mbstreamreader_iread(self, "read", size);
    1524             : }
    1525             : 
    1526             : /*[clinic input]
    1527             :  _multibytecodec.MultibyteStreamReader.readline
    1528             : 
    1529             :     sizeobj: object = None
    1530             :     /
    1531             : [clinic start generated code]*/
    1532             : 
    1533             : static PyObject *
    1534       44831 : _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
    1535             :                                                     PyObject *sizeobj)
    1536             : /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
    1537             : {
    1538             :     Py_ssize_t size;
    1539             : 
    1540       44831 :     if (sizeobj == Py_None)
    1541         165 :         size = -1;
    1542       44666 :     else if (PyLong_Check(sizeobj))
    1543       44666 :         size = PyLong_AsSsize_t(sizeobj);
    1544             :     else {
    1545           0 :         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
    1546           0 :         return NULL;
    1547             :     }
    1548             : 
    1549       44831 :     if (size == -1 && PyErr_Occurred())
    1550           0 :         return NULL;
    1551             : 
    1552       44831 :     return mbstreamreader_iread(self, "readline", size);
    1553             : }
    1554             : 
    1555             : /*[clinic input]
    1556             :  _multibytecodec.MultibyteStreamReader.readlines
    1557             : 
    1558             :     sizehintobj: object = None
    1559             :     /
    1560             : [clinic start generated code]*/
    1561             : 
    1562             : static PyObject *
    1563       42031 : _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
    1564             :                                                      PyObject *sizehintobj)
    1565             : /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
    1566             : {
    1567             :     PyObject *r, *sr;
    1568             :     Py_ssize_t sizehint;
    1569             : 
    1570       42031 :     if (sizehintobj == Py_None)
    1571          38 :         sizehint = -1;
    1572       41993 :     else if (PyLong_Check(sizehintobj))
    1573       41993 :         sizehint = PyLong_AsSsize_t(sizehintobj);
    1574             :     else {
    1575           0 :         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
    1576           0 :         return NULL;
    1577             :     }
    1578             : 
    1579       42031 :     if (sizehint == -1 && PyErr_Occurred())
    1580           0 :         return NULL;
    1581             : 
    1582       42031 :     r = mbstreamreader_iread(self, "read", sizehint);
    1583       42031 :     if (r == NULL)
    1584           0 :         return NULL;
    1585             : 
    1586       42031 :     sr = PyUnicode_Splitlines(r, 1);
    1587       42031 :     Py_DECREF(r);
    1588       42031 :     return sr;
    1589             : }
    1590             : 
    1591             : /*[clinic input]
    1592             :  _multibytecodec.MultibyteStreamReader.reset
    1593             : [clinic start generated code]*/
    1594             : 
    1595             : static PyObject *
    1596         120 : _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
    1597             : /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
    1598             : {
    1599         160 :     if (self->codec->decreset != NULL &&
    1600          40 :         self->codec->decreset(&self->state, self->codec->config) != 0)
    1601           0 :         return NULL;
    1602         120 :     self->pendingsize = 0;
    1603             : 
    1604         120 :     Py_RETURN_NONE;
    1605             : }
    1606             : 
    1607             : static struct PyMethodDef mbstreamreader_methods[] = {
    1608             :     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
    1609             :     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
    1610             :     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
    1611             :     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
    1612             :     {NULL,              NULL},
    1613             : };
    1614             : 
    1615             : static PyMemberDef mbstreamreader_members[] = {
    1616             :     {"stream",          T_OBJECT,
    1617             :                     offsetof(MultibyteStreamReaderObject, stream),
    1618             :                     READONLY, NULL},
    1619             :     {NULL,}
    1620             : };
    1621             : 
    1622             : static PyObject *
    1623        2273 : mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1624             : {
    1625             :     MultibyteStreamReaderObject *self;
    1626        2273 :     PyObject *stream, *codec = NULL;
    1627        2273 :     char *errors = NULL;
    1628             : 
    1629        2273 :     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
    1630             :                             streamkwarglist, &stream, &errors))
    1631           0 :         return NULL;
    1632             : 
    1633        2273 :     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
    1634        2273 :     if (self == NULL)
    1635           0 :         return NULL;
    1636             : 
    1637        2273 :     codec = PyObject_GetAttrString((PyObject *)type, "codec");
    1638        2273 :     if (codec == NULL)
    1639           1 :         goto errorexit;
    1640             : 
    1641        2272 :     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
    1642        2272 :     if (!MultibyteCodec_Check(state, codec)) {
    1643           0 :         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
    1644           0 :         goto errorexit;
    1645             :     }
    1646             : 
    1647        2272 :     self->codec = ((MultibyteCodecObject *)codec)->codec;
    1648        2272 :     self->stream = stream;
    1649        2272 :     Py_INCREF(stream);
    1650        2272 :     self->pendingsize = 0;
    1651        2272 :     self->errors = internal_error_callback(errors);
    1652        2272 :     if (self->errors == NULL)
    1653           0 :         goto errorexit;
    1654        2756 :     if (self->codec->decinit != NULL &&
    1655         484 :         self->codec->decinit(&self->state, self->codec->config) != 0)
    1656           0 :         goto errorexit;
    1657             : 
    1658        2272 :     Py_DECREF(codec);
    1659        2272 :     return (PyObject *)self;
    1660             : 
    1661           1 : errorexit:
    1662           1 :     Py_XDECREF(self);
    1663           1 :     Py_XDECREF(codec);
    1664           1 :     return NULL;
    1665             : }
    1666             : 
    1667             : static int
    1668        2272 : mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
    1669             : {
    1670        2272 :     return 0;
    1671             : }
    1672             : 
    1673             : static int
    1674           0 : mbstreamreader_traverse(MultibyteStreamReaderObject *self,
    1675             :                         visitproc visit, void *arg)
    1676             : {
    1677           0 :     if (ERROR_ISCUSTOM(self->errors))
    1678           0 :         Py_VISIT(self->errors);
    1679           0 :     Py_VISIT(self->stream);
    1680           0 :     return 0;
    1681             : }
    1682             : 
    1683             : static void
    1684        2273 : mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
    1685             : {
    1686        2273 :     PyTypeObject *tp = Py_TYPE(self);
    1687        2273 :     PyObject_GC_UnTrack(self);
    1688        2273 :     ERROR_DECREF(self->errors);
    1689        2273 :     Py_XDECREF(self->stream);
    1690        2273 :     tp->tp_free(self);
    1691        2273 :     Py_DECREF(tp);
    1692        2273 : }
    1693             : 
    1694             : static PyType_Slot reader_slots[] = {
    1695             :     {Py_tp_dealloc, mbstreamreader_dealloc},
    1696             :     {Py_tp_getattro, PyObject_GenericGetAttr},
    1697             :     {Py_tp_traverse, mbstreamreader_traverse},
    1698             :     {Py_tp_methods, mbstreamreader_methods},
    1699             :     {Py_tp_members, mbstreamreader_members},
    1700             :     {Py_tp_getset, codecctx_getsets},
    1701             :     {Py_tp_init, mbstreamreader_init},
    1702             :     {Py_tp_new, mbstreamreader_new},
    1703             :     {0, NULL},
    1704             : };
    1705             : 
    1706             : static PyType_Spec reader_spec = {
    1707             :     .name = MODULE_NAME ".MultibyteStreamReader",
    1708             :     .basicsize = sizeof(MultibyteStreamReaderObject),
    1709             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
    1710             :               Py_TPFLAGS_IMMUTABLETYPE),
    1711             :     .slots = reader_slots,
    1712             : };
    1713             : 
    1714             : static int
    1715       60698 : mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
    1716             :                       PyObject *unistr, PyObject *str_write)
    1717             : {
    1718             :     PyObject *str, *wr;
    1719             : 
    1720       60698 :     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
    1721       60698 :     if (str == NULL)
    1722           0 :         return -1;
    1723             : 
    1724       60698 :     wr = _PyObject_CallMethodOneArg(self->stream, str_write, str);
    1725       60698 :     Py_DECREF(str);
    1726       60698 :     if (wr == NULL)
    1727           0 :         return -1;
    1728             : 
    1729       60698 :     Py_DECREF(wr);
    1730       60698 :     return 0;
    1731             : }
    1732             : 
    1733             : /*[clinic input]
    1734             :  _multibytecodec.MultibyteStreamWriter.write
    1735             : 
    1736             :     cls: defining_class
    1737             :     strobj: object
    1738             :     /
    1739             : [clinic start generated code]*/
    1740             : 
    1741             : static PyObject *
    1742       55150 : _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
    1743             :                                                  PyTypeObject *cls,
    1744             :                                                  PyObject *strobj)
    1745             : /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
    1746             : {
    1747       55150 :     _multibytecodec_state *state = PyType_GetModuleState(cls);
    1748       55150 :     assert(state != NULL);
    1749       55150 :     if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
    1750           0 :         return NULL;
    1751             :     }
    1752       55150 :     Py_RETURN_NONE;
    1753             : }
    1754             : 
    1755             : /*[clinic input]
    1756             :  _multibytecodec.MultibyteStreamWriter.writelines
    1757             : 
    1758             :     cls: defining_class
    1759             :     lines: object
    1760             :     /
    1761             : [clinic start generated code]*/
    1762             : 
    1763             : static PyObject *
    1764         722 : _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
    1765             :                                                       PyTypeObject *cls,
    1766             :                                                       PyObject *lines)
    1767             : /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
    1768             : {
    1769             :     PyObject *strobj;
    1770             :     int i, r;
    1771             : 
    1772         722 :     if (!PySequence_Check(lines)) {
    1773           0 :         PyErr_SetString(PyExc_TypeError,
    1774             :                         "arg must be a sequence object");
    1775           0 :         return NULL;
    1776             :     }
    1777             : 
    1778         722 :     _multibytecodec_state *state = PyType_GetModuleState(cls);
    1779         722 :     assert(state != NULL);
    1780        6270 :     for (i = 0; i < PySequence_Length(lines); i++) {
    1781             :         /* length can be changed even within this loop */
    1782        5548 :         strobj = PySequence_GetItem(lines, i);
    1783        5548 :         if (strobj == NULL)
    1784           0 :             return NULL;
    1785             : 
    1786        5548 :         r = mbstreamwriter_iwrite(self, strobj, state->str_write);
    1787        5548 :         Py_DECREF(strobj);
    1788        5548 :         if (r == -1)
    1789           0 :             return NULL;
    1790             :     }
    1791             :     /* PySequence_Length() can fail */
    1792         722 :     if (PyErr_Occurred())
    1793           0 :         return NULL;
    1794             : 
    1795         722 :     Py_RETURN_NONE;
    1796             : }
    1797             : 
    1798             : /*[clinic input]
    1799             :  _multibytecodec.MultibyteStreamWriter.reset
    1800             : 
    1801             :     cls: defining_class
    1802             :     /
    1803             : 
    1804             : [clinic start generated code]*/
    1805             : 
    1806             : static PyObject *
    1807          19 : _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
    1808             :                                                  PyTypeObject *cls)
    1809             : /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
    1810             : {
    1811             :     PyObject *pwrt;
    1812             : 
    1813          19 :     if (!self->pending)
    1814          19 :         Py_RETURN_NONE;
    1815             : 
    1816           0 :     pwrt = multibytecodec_encode(self->codec, &self->state,
    1817             :                     self->pending, NULL, self->errors,
    1818             :                     MBENC_FLUSH | MBENC_RESET);
    1819             :     /* some pending buffer can be truncated when UnicodeEncodeError is
    1820             :      * raised on 'strict' mode. but, 'reset' method is designed to
    1821             :      * reset the pending buffer or states so failed string sequence
    1822             :      * ought to be missed */
    1823           0 :     Py_CLEAR(self->pending);
    1824           0 :     if (pwrt == NULL)
    1825           0 :         return NULL;
    1826             : 
    1827           0 :     assert(PyBytes_Check(pwrt));
    1828             : 
    1829           0 :     _multibytecodec_state *state = PyType_GetModuleState(cls);
    1830           0 :     assert(state != NULL);
    1831             : 
    1832           0 :     if (PyBytes_Size(pwrt) > 0) {
    1833             :         PyObject *wr;
    1834             : 
    1835           0 :         wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
    1836           0 :         if (wr == NULL) {
    1837           0 :             Py_DECREF(pwrt);
    1838           0 :             return NULL;
    1839             :         }
    1840             :     }
    1841           0 :     Py_DECREF(pwrt);
    1842             : 
    1843           0 :     Py_RETURN_NONE;
    1844             : }
    1845             : 
    1846             : static PyObject *
    1847        2213 : mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
    1848             : {
    1849             :     MultibyteStreamWriterObject *self;
    1850        2213 :     PyObject *stream, *codec = NULL;
    1851        2213 :     char *errors = NULL;
    1852             : 
    1853        2213 :     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
    1854             :                             streamkwarglist, &stream, &errors))
    1855           0 :         return NULL;
    1856             : 
    1857        2213 :     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
    1858        2213 :     if (self == NULL)
    1859           0 :         return NULL;
    1860             : 
    1861        2213 :     codec = PyObject_GetAttrString((PyObject *)type, "codec");
    1862        2213 :     if (codec == NULL)
    1863           1 :         goto errorexit;
    1864             : 
    1865        2212 :     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
    1866        2212 :     if (!MultibyteCodec_Check(state, codec)) {
    1867           0 :         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
    1868           0 :         goto errorexit;
    1869             :     }
    1870             : 
    1871        2212 :     self->codec = ((MultibyteCodecObject *)codec)->codec;
    1872        2212 :     self->stream = stream;
    1873        2212 :     Py_INCREF(stream);
    1874        2212 :     self->pending = NULL;
    1875        2212 :     self->errors = internal_error_callback(errors);
    1876        2212 :     if (self->errors == NULL)
    1877           0 :         goto errorexit;
    1878        2680 :     if (self->codec->encinit != NULL &&
    1879         468 :         self->codec->encinit(&self->state, self->codec->config) != 0)
    1880           0 :         goto errorexit;
    1881             : 
    1882        2212 :     Py_DECREF(codec);
    1883        2212 :     return (PyObject *)self;
    1884             : 
    1885           1 : errorexit:
    1886           1 :     Py_XDECREF(self);
    1887           1 :     Py_XDECREF(codec);
    1888           1 :     return NULL;
    1889             : }
    1890             : 
    1891             : static int
    1892        2212 : mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
    1893             : {
    1894        2212 :     return 0;
    1895             : }
    1896             : 
    1897             : static int
    1898           0 : mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
    1899             :                         visitproc visit, void *arg)
    1900             : {
    1901           0 :     if (ERROR_ISCUSTOM(self->errors))
    1902           0 :         Py_VISIT(self->errors);
    1903           0 :     Py_VISIT(self->stream);
    1904           0 :     return 0;
    1905             : }
    1906             : 
    1907             : static void
    1908        2213 : mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
    1909             : {
    1910        2213 :     PyTypeObject *tp = Py_TYPE(self);
    1911        2213 :     PyObject_GC_UnTrack(self);
    1912        2213 :     ERROR_DECREF(self->errors);
    1913        2213 :     Py_XDECREF(self->stream);
    1914        2213 :     tp->tp_free(self);
    1915        2213 :     Py_DECREF(tp);
    1916        2213 : }
    1917             : 
    1918             : static struct PyMethodDef mbstreamwriter_methods[] = {
    1919             :     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
    1920             :     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
    1921             :     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
    1922             :     {NULL, NULL},
    1923             : };
    1924             : 
    1925             : static PyMemberDef mbstreamwriter_members[] = {
    1926             :     {"stream",          T_OBJECT,
    1927             :                     offsetof(MultibyteStreamWriterObject, stream),
    1928             :                     READONLY, NULL},
    1929             :     {NULL,}
    1930             : };
    1931             : 
    1932             : static PyType_Slot writer_slots[] = {
    1933             :     {Py_tp_dealloc, mbstreamwriter_dealloc},
    1934             :     {Py_tp_getattro, PyObject_GenericGetAttr},
    1935             :     {Py_tp_traverse, mbstreamwriter_traverse},
    1936             :     {Py_tp_methods, mbstreamwriter_methods},
    1937             :     {Py_tp_members, mbstreamwriter_members},
    1938             :     {Py_tp_getset, codecctx_getsets},
    1939             :     {Py_tp_init, mbstreamwriter_init},
    1940             :     {Py_tp_new, mbstreamwriter_new},
    1941             :     {0, NULL},
    1942             : };
    1943             : 
    1944             : static PyType_Spec writer_spec = {
    1945             :     .name = MODULE_NAME ".MultibyteStreamWriter",
    1946             :     .basicsize = sizeof(MultibyteStreamWriterObject),
    1947             :     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
    1948             :               Py_TPFLAGS_IMMUTABLETYPE),
    1949             :     .slots = writer_slots,
    1950             : };
    1951             : 
    1952             : 
    1953             : /*[clinic input]
    1954             : _multibytecodec.__create_codec
    1955             : 
    1956             :     arg: object
    1957             :     /
    1958             : [clinic start generated code]*/
    1959             : 
    1960             : static PyObject *
    1961         142 : _multibytecodec___create_codec(PyObject *module, PyObject *arg)
    1962             : /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
    1963             : {
    1964             :     MultibyteCodecObject *self;
    1965             :     MultibyteCodec *codec;
    1966             : 
    1967         142 :     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
    1968           0 :         PyErr_SetString(PyExc_ValueError, "argument type invalid");
    1969           0 :         return NULL;
    1970             :     }
    1971             : 
    1972         142 :     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
    1973         142 :     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
    1974           0 :         return NULL;
    1975             : 
    1976         142 :     _multibytecodec_state *state = _multibytecodec_get_state(module);
    1977         142 :     self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
    1978         142 :     if (self == NULL)
    1979           0 :         return NULL;
    1980         142 :     self->codec = codec;
    1981             : 
    1982         142 :     PyObject_GC_Track(self);
    1983         142 :     return (PyObject *)self;
    1984             : }
    1985             : 
    1986             : static int
    1987         816 : _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
    1988             : {
    1989         816 :     _multibytecodec_state *state = _multibytecodec_get_state(mod);
    1990         816 :     Py_VISIT(state->multibytecodec_type);
    1991         816 :     Py_VISIT(state->encoder_type);
    1992         816 :     Py_VISIT(state->decoder_type);
    1993         816 :     Py_VISIT(state->reader_type);
    1994         816 :     Py_VISIT(state->writer_type);
    1995         816 :     return 0;
    1996             : }
    1997             : 
    1998             : static int
    1999          32 : _multibytecodec_clear(PyObject *mod)
    2000             : {
    2001          32 :     _multibytecodec_state *state = _multibytecodec_get_state(mod);
    2002          32 :     Py_CLEAR(state->multibytecodec_type);
    2003          32 :     Py_CLEAR(state->encoder_type);
    2004          32 :     Py_CLEAR(state->decoder_type);
    2005          32 :     Py_CLEAR(state->reader_type);
    2006          32 :     Py_CLEAR(state->writer_type);
    2007          32 :     Py_CLEAR(state->str_write);
    2008          32 :     return 0;
    2009             : }
    2010             : 
    2011             : static void
    2012          25 : _multibytecodec_free(void *mod)
    2013             : {
    2014          25 :     _multibytecodec_clear((PyObject *)mod);
    2015          25 : }
    2016             : 
    2017             : #define CREATE_TYPE(module, type, spec)                                      \
    2018             :     do {                                                                     \
    2019             :         type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
    2020             :         if (!type) {                                                         \
    2021             :             return -1;                                                       \
    2022             :         }                                                                    \
    2023             :     } while (0)
    2024             : 
    2025             : #define ADD_TYPE(module, type)                    \
    2026             :     do {                                          \
    2027             :         if (PyModule_AddType(module, type) < 0) { \
    2028             :             return -1;                            \
    2029             :         }                                         \
    2030             :     } while (0)
    2031             : 
    2032             : static int
    2033          25 : _multibytecodec_exec(PyObject *mod)
    2034             : {
    2035          25 :     _multibytecodec_state *state = _multibytecodec_get_state(mod);
    2036          25 :     state->str_write = PyUnicode_InternFromString("write");
    2037          25 :     if (state->str_write == NULL) {
    2038           0 :         return -1;
    2039             :     }
    2040          25 :     CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
    2041          25 :     CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
    2042          25 :     CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
    2043          25 :     CREATE_TYPE(mod, state->reader_type, &reader_spec);
    2044          25 :     CREATE_TYPE(mod, state->writer_type, &writer_spec);
    2045             : 
    2046          25 :     ADD_TYPE(mod, state->encoder_type);
    2047          25 :     ADD_TYPE(mod, state->decoder_type);
    2048          25 :     ADD_TYPE(mod, state->reader_type);
    2049          25 :     ADD_TYPE(mod, state->writer_type);
    2050          25 :     return 0;
    2051             : }
    2052             : 
    2053             : #undef CREATE_TYPE
    2054             : #undef ADD_TYPE
    2055             : 
    2056             : static struct PyMethodDef _multibytecodec_methods[] = {
    2057             :     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
    2058             :     {NULL, NULL},
    2059             : };
    2060             : 
    2061             : static PyModuleDef_Slot _multibytecodec_slots[] = {
    2062             :     {Py_mod_exec, _multibytecodec_exec},
    2063             :     {0, NULL}
    2064             : };
    2065             : 
    2066             : static struct PyModuleDef _multibytecodecmodule = {
    2067             :     .m_base = PyModuleDef_HEAD_INIT,
    2068             :     .m_name = "_multibytecodec",
    2069             :     .m_size = sizeof(_multibytecodec_state),
    2070             :     .m_methods = _multibytecodec_methods,
    2071             :     .m_slots = _multibytecodec_slots,
    2072             :     .m_traverse = _multibytecodec_traverse,
    2073             :     .m_clear = _multibytecodec_clear,
    2074             :     .m_free = _multibytecodec_free,
    2075             : };
    2076             : 
    2077             : PyMODINIT_FUNC
    2078          25 : PyInit__multibytecodec(void)
    2079             : {
    2080          25 :     return PyModuleDef_Init(&_multibytecodecmodule);
    2081             : }

Generated by: LCOV version 1.14