Line data Source code
1 : /*
2 : * multibytecodec.c: Common Multibyte Codec Implementation
3 : *
4 : * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 : */
6 :
7 : #define PY_SSIZE_T_CLEAN
8 : #include "Python.h"
9 : #include "structmember.h" // PyMemberDef
10 : #include "multibytecodec.h"
11 : #include "clinic/multibytecodec.c.h"
12 :
13 : #define MODULE_NAME "_multibytecodec"
14 :
15 : typedef struct {
16 : PyTypeObject *encoder_type;
17 : PyTypeObject *decoder_type;
18 : PyTypeObject *reader_type;
19 : PyTypeObject *writer_type;
20 : PyTypeObject *multibytecodec_type;
21 : PyObject *str_write;
22 : } _multibytecodec_state;
23 :
24 : static _multibytecodec_state *
25 8025 : _multibytecodec_get_state(PyObject *module)
26 : {
27 8025 : _multibytecodec_state *state = PyModule_GetState(module);
28 8025 : assert(state != NULL);
29 8025 : return state;
30 : }
31 :
32 : static struct PyModuleDef _multibytecodecmodule;
33 : static _multibytecodec_state *
34 7010 : _multibyte_codec_find_state_by_type(PyTypeObject *type)
35 : {
36 7010 : PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
37 7010 : assert(module != NULL);
38 7010 : return _multibytecodec_get_state(module);
39 : }
40 :
41 : #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
42 : /*[clinic input]
43 : module _multibytecodec
44 : class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
45 : class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
46 : class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
47 : class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
48 : class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
49 : [clinic start generated code]*/
50 : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
51 : #undef clinic_get_state
52 :
53 : typedef struct {
54 : PyObject *inobj;
55 : Py_ssize_t inpos, inlen;
56 : unsigned char *outbuf, *outbuf_end;
57 : PyObject *excobj, *outobj;
58 : } MultibyteEncodeBuffer;
59 :
60 : typedef struct {
61 : const unsigned char *inbuf, *inbuf_top, *inbuf_end;
62 : PyObject *excobj;
63 : _PyUnicodeWriter writer;
64 : } MultibyteDecodeBuffer;
65 :
66 : static char *incnewkwarglist[] = {"errors", NULL};
67 : static char *streamkwarglist[] = {"stream", "errors", NULL};
68 :
69 : static PyObject *multibytecodec_encode(MultibyteCodec *,
70 : MultibyteCodec_State *, PyObject *, Py_ssize_t *,
71 : PyObject *, int);
72 :
73 : #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
74 :
75 : static PyObject *
76 1420770 : make_tuple(PyObject *object, Py_ssize_t len)
77 : {
78 : PyObject *v, *w;
79 :
80 1420770 : if (object == NULL)
81 0 : return NULL;
82 :
83 1420770 : v = PyTuple_New(2);
84 1420770 : if (v == NULL) {
85 0 : Py_DECREF(object);
86 0 : return NULL;
87 : }
88 1420770 : PyTuple_SET_ITEM(v, 0, object);
89 :
90 1420770 : w = PyLong_FromSsize_t(len);
91 1420770 : if (w == NULL) {
92 0 : Py_DECREF(v);
93 0 : return NULL;
94 : }
95 1420770 : PyTuple_SET_ITEM(v, 1, w);
96 :
97 1420770 : return v;
98 : }
99 :
100 : static PyObject *
101 1428020 : internal_error_callback(const char *errors)
102 : {
103 1428020 : if (errors == NULL || strcmp(errors, "strict") == 0)
104 378875 : return ERROR_STRICT;
105 1049150 : else if (strcmp(errors, "ignore") == 0)
106 1048730 : return ERROR_IGNORE;
107 422 : else if (strcmp(errors, "replace") == 0)
108 132 : return ERROR_REPLACE;
109 : else
110 290 : return PyUnicode_FromString(errors);
111 : }
112 :
113 : static PyObject *
114 2841 : call_error_callback(PyObject *errors, PyObject *exc)
115 : {
116 : PyObject *cb, *r;
117 : const char *str;
118 :
119 2841 : assert(PyUnicode_Check(errors));
120 2841 : str = PyUnicode_AsUTF8(errors);
121 2841 : if (str == NULL)
122 0 : return NULL;
123 2841 : cb = PyCodec_LookupError(str);
124 2841 : if (cb == NULL)
125 0 : return NULL;
126 :
127 2841 : r = PyObject_CallOneArg(cb, exc);
128 2841 : Py_DECREF(cb);
129 2841 : return r;
130 : }
131 :
132 : static PyObject *
133 0 : codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
134 : {
135 : const char *errors;
136 :
137 0 : if (self->errors == ERROR_STRICT)
138 0 : errors = "strict";
139 0 : else if (self->errors == ERROR_IGNORE)
140 0 : errors = "ignore";
141 0 : else if (self->errors == ERROR_REPLACE)
142 0 : errors = "replace";
143 : else {
144 0 : Py_INCREF(self->errors);
145 0 : return self->errors;
146 : }
147 :
148 0 : return PyUnicode_FromString(errors);
149 : }
150 :
151 : static int
152 76 : codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
153 : void *closure)
154 : {
155 : PyObject *cb;
156 : const char *str;
157 :
158 76 : if (value == NULL) {
159 19 : PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
160 19 : return -1;
161 : }
162 57 : if (!PyUnicode_Check(value)) {
163 0 : PyErr_SetString(PyExc_TypeError, "errors must be a string");
164 0 : return -1;
165 : }
166 :
167 57 : str = PyUnicode_AsUTF8(value);
168 57 : if (str == NULL)
169 0 : return -1;
170 :
171 57 : cb = internal_error_callback(str);
172 57 : if (cb == NULL)
173 0 : return -1;
174 :
175 57 : ERROR_DECREF(self->errors);
176 57 : self->errors = cb;
177 57 : return 0;
178 : }
179 :
180 : /* This getset handlers list is used by all the stateful codec objects */
181 : static PyGetSetDef codecctx_getsets[] = {
182 : {"errors", (getter)codecctx_errors_get,
183 : (setter)codecctx_errors_set,
184 : PyDoc_STR("how to treat errors")},
185 : {NULL,}
186 : };
187 :
188 : static int
189 279 : expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
190 : {
191 : Py_ssize_t orgpos, orgsize, incsize;
192 :
193 558 : orgpos = (Py_ssize_t)((char *)buf->outbuf -
194 279 : PyBytes_AS_STRING(buf->outobj));
195 279 : orgsize = PyBytes_GET_SIZE(buf->outobj);
196 279 : incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
197 :
198 279 : if (orgsize > PY_SSIZE_T_MAX - incsize) {
199 0 : PyErr_NoMemory();
200 0 : return -1;
201 : }
202 :
203 279 : if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
204 0 : return -1;
205 :
206 279 : buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
207 279 : buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
208 279 : + PyBytes_GET_SIZE(buf->outobj);
209 :
210 279 : return 0;
211 : }
212 : #define REQUIRE_ENCODEBUFFER(buf, s) do { \
213 : if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
214 : if (expand_encodebuffer(buf, s) == -1) \
215 : goto errorexit; \
216 : } while(0)
217 :
218 :
219 : /**
220 : * MultibyteCodec object
221 : */
222 :
223 : static int
224 1051720 : multibytecodec_encerror(MultibyteCodec *codec,
225 : MultibyteCodec_State *state,
226 : MultibyteEncodeBuffer *buf,
227 : PyObject *errors, Py_ssize_t e)
228 : {
229 1051720 : PyObject *retobj = NULL, *retstr = NULL, *tobj;
230 : Py_ssize_t retstrsize, newpos;
231 : Py_ssize_t esize, start, end;
232 : const char *reason;
233 :
234 1051720 : if (e > 0) {
235 1051480 : reason = "illegal multibyte sequence";
236 1051480 : esize = e;
237 : }
238 : else {
239 246 : switch (e) {
240 246 : case MBERR_TOOSMALL:
241 246 : REQUIRE_ENCODEBUFFER(buf, -1);
242 246 : return 0; /* retry it */
243 0 : case MBERR_TOOFEW:
244 0 : reason = "incomplete multibyte sequence";
245 0 : esize = (Py_ssize_t)buf->inpos;
246 0 : break;
247 0 : case MBERR_INTERNAL:
248 0 : PyErr_SetString(PyExc_RuntimeError,
249 : "internal codec error");
250 0 : return -1;
251 0 : default:
252 0 : PyErr_SetString(PyExc_RuntimeError,
253 : "unknown runtime error");
254 0 : return -1;
255 : }
256 : }
257 :
258 1051480 : if (errors == ERROR_REPLACE) {
259 : PyObject *replchar;
260 : Py_ssize_t r;
261 : Py_ssize_t inpos;
262 : int kind;
263 : const void *data;
264 :
265 0 : replchar = PyUnicode_FromOrdinal('?');
266 0 : if (replchar == NULL)
267 0 : goto errorexit;
268 0 : kind = PyUnicode_KIND(replchar);
269 0 : data = PyUnicode_DATA(replchar);
270 :
271 0 : inpos = 0;
272 0 : for (;;) {
273 0 : Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
274 :
275 0 : r = codec->encode(state, codec->config,
276 : kind, data, &inpos, 1,
277 : &buf->outbuf, outleft, 0);
278 0 : if (r == MBERR_TOOSMALL) {
279 0 : REQUIRE_ENCODEBUFFER(buf, -1);
280 0 : continue;
281 : }
282 : else
283 0 : break;
284 : }
285 :
286 0 : Py_DECREF(replchar);
287 :
288 0 : if (r != 0) {
289 0 : REQUIRE_ENCODEBUFFER(buf, 1);
290 0 : *buf->outbuf++ = '?';
291 : }
292 : }
293 1051480 : if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
294 1048610 : buf->inpos += esize;
295 1048610 : return 0;
296 : }
297 :
298 2863 : start = (Py_ssize_t)buf->inpos;
299 2863 : end = start + esize;
300 :
301 : /* use cached exception object if available */
302 2863 : if (buf->excobj == NULL) {
303 312 : buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
304 : "sOnns",
305 : codec->encoding, buf->inobj,
306 : start, end, reason);
307 312 : if (buf->excobj == NULL)
308 0 : goto errorexit;
309 : }
310 : else
311 5102 : if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
312 5102 : PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
313 2551 : PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
314 0 : goto errorexit;
315 :
316 2863 : if (errors == ERROR_STRICT) {
317 23 : PyCodec_StrictErrors(buf->excobj);
318 23 : goto errorexit;
319 : }
320 :
321 2840 : retobj = call_error_callback(errors, buf->excobj);
322 2840 : if (retobj == NULL)
323 0 : goto errorexit;
324 :
325 5680 : if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
326 5699 : (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
327 2764 : !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
328 95 : PyErr_SetString(PyExc_TypeError,
329 : "encoding error handler must return "
330 : "(str, int) tuple");
331 95 : goto errorexit;
332 : }
333 :
334 2745 : if (PyUnicode_Check(tobj)) {
335 : Py_ssize_t inpos;
336 :
337 2726 : retstr = multibytecodec_encode(codec, state, tobj,
338 : &inpos, ERROR_STRICT,
339 : MBENC_FLUSH);
340 2726 : if (retstr == NULL)
341 0 : goto errorexit;
342 : }
343 : else {
344 19 : Py_INCREF(tobj);
345 19 : retstr = tobj;
346 : }
347 :
348 2745 : assert(PyBytes_Check(retstr));
349 2745 : retstrsize = PyBytes_GET_SIZE(retstr);
350 2745 : if (retstrsize > 0) {
351 345 : REQUIRE_ENCODEBUFFER(buf, retstrsize);
352 345 : memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
353 345 : buf->outbuf += retstrsize;
354 : }
355 :
356 2745 : newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
357 2745 : if (newpos < 0 && !PyErr_Occurred())
358 0 : newpos += (Py_ssize_t)buf->inlen;
359 2745 : if (newpos < 0 || newpos > buf->inlen) {
360 38 : PyErr_Clear();
361 38 : PyErr_Format(PyExc_IndexError,
362 : "position %zd from error handler out of bounds",
363 : newpos);
364 38 : goto errorexit;
365 : }
366 2707 : buf->inpos = newpos;
367 :
368 2707 : Py_DECREF(retobj);
369 2707 : Py_DECREF(retstr);
370 2707 : return 0;
371 :
372 156 : errorexit:
373 156 : Py_XDECREF(retobj);
374 156 : Py_XDECREF(retstr);
375 156 : return -1;
376 : }
377 :
378 : static int
379 3087 : multibytecodec_decerror(MultibyteCodec *codec,
380 : MultibyteCodec_State *state,
381 : MultibyteDecodeBuffer *buf,
382 : PyObject *errors, Py_ssize_t e)
383 : {
384 3087 : PyObject *retobj = NULL, *retuni = NULL;
385 : Py_ssize_t newpos;
386 : const char *reason;
387 : Py_ssize_t esize, start, end;
388 :
389 3087 : if (e > 0) {
390 3015 : reason = "illegal multibyte sequence";
391 3015 : esize = e;
392 : }
393 : else {
394 72 : switch (e) {
395 0 : case MBERR_TOOSMALL:
396 0 : return 0; /* retry it */
397 72 : case MBERR_TOOFEW:
398 72 : reason = "incomplete multibyte sequence";
399 72 : esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
400 72 : break;
401 0 : case MBERR_INTERNAL:
402 0 : PyErr_SetString(PyExc_RuntimeError,
403 : "internal codec error");
404 0 : return -1;
405 0 : case MBERR_EXCEPTION:
406 0 : return -1;
407 0 : default:
408 0 : PyErr_SetString(PyExc_RuntimeError,
409 : "unknown runtime error");
410 0 : return -1;
411 : }
412 : }
413 :
414 3087 : if (errors == ERROR_REPLACE) {
415 3003 : if (_PyUnicodeWriter_WriteChar(&buf->writer,
416 : Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
417 0 : goto errorexit;
418 : }
419 3087 : if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
420 3034 : buf->inbuf += esize;
421 3034 : return 0;
422 : }
423 :
424 53 : start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
425 53 : end = start + esize;
426 :
427 : /* use cached exception object if available */
428 53 : if (buf->excobj == NULL) {
429 106 : buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
430 53 : (const char *)buf->inbuf_top,
431 53 : (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
432 : start, end, reason);
433 53 : if (buf->excobj == NULL)
434 0 : goto errorexit;
435 : }
436 : else
437 0 : if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
438 0 : PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
439 0 : PyUnicodeDecodeError_SetReason(buf->excobj, reason))
440 0 : goto errorexit;
441 :
442 53 : if (errors == ERROR_STRICT) {
443 52 : PyCodec_StrictErrors(buf->excobj);
444 52 : goto errorexit;
445 : }
446 :
447 1 : retobj = call_error_callback(errors, buf->excobj);
448 1 : if (retobj == NULL)
449 0 : goto errorexit;
450 :
451 2 : if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
452 2 : !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
453 1 : !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
454 0 : PyErr_SetString(PyExc_TypeError,
455 : "decoding error handler must return "
456 : "(str, int) tuple");
457 0 : goto errorexit;
458 : }
459 :
460 1 : if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
461 0 : goto errorexit;
462 :
463 1 : newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
464 1 : if (newpos < 0 && !PyErr_Occurred())
465 0 : newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
466 1 : if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
467 1 : PyErr_Clear();
468 1 : PyErr_Format(PyExc_IndexError,
469 : "position %zd from error handler out of bounds",
470 : newpos);
471 1 : goto errorexit;
472 : }
473 0 : buf->inbuf = buf->inbuf_top + newpos;
474 0 : Py_DECREF(retobj);
475 0 : return 0;
476 :
477 53 : errorexit:
478 53 : Py_XDECREF(retobj);
479 53 : return -1;
480 : }
481 :
482 : static PyObject *
483 1325780 : multibytecodec_encode(MultibyteCodec *codec,
484 : MultibyteCodec_State *state,
485 : PyObject *text, Py_ssize_t *inpos_t,
486 : PyObject *errors, int flags)
487 : {
488 : MultibyteEncodeBuffer buf;
489 1325780 : Py_ssize_t finalsize, r = 0;
490 : Py_ssize_t datalen;
491 : int kind;
492 : const void *data;
493 :
494 1325780 : if (PyUnicode_READY(text) < 0)
495 0 : return NULL;
496 1325780 : datalen = PyUnicode_GET_LENGTH(text);
497 :
498 1325780 : if (datalen == 0 && !(flags & MBENC_RESET))
499 2425 : return PyBytes_FromStringAndSize(NULL, 0);
500 :
501 1323360 : buf.excobj = NULL;
502 1323360 : buf.outobj = NULL;
503 1323360 : buf.inobj = text; /* borrowed reference */
504 1323360 : buf.inpos = 0;
505 1323360 : buf.inlen = datalen;
506 1323360 : kind = PyUnicode_KIND(buf.inobj);
507 1323360 : data = PyUnicode_DATA(buf.inobj);
508 :
509 1323360 : if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
510 0 : PyErr_NoMemory();
511 0 : goto errorexit;
512 : }
513 :
514 1323360 : buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
515 1323360 : if (buf.outobj == NULL)
516 0 : goto errorexit;
517 1323360 : buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
518 1323360 : buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
519 :
520 2374920 : while (buf.inpos < buf.inlen) {
521 : /* we don't reuse inleft and outleft here.
522 : * error callbacks can relocate the cursor anywhere on buffer*/
523 1326100 : Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
524 :
525 1326100 : r = codec->encode(state, codec->config,
526 : kind, data,
527 : &buf.inpos, buf.inlen,
528 : &buf.outbuf, outleft, flags);
529 1326100 : if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
530 : break;
531 1051720 : else if (multibytecodec_encerror(codec, state, &buf, errors,r))
532 156 : goto errorexit;
533 1051570 : else if (r == MBERR_TOOFEW)
534 0 : break;
535 : }
536 :
537 1323200 : if (codec->encreset != NULL && (flags & MBENC_RESET))
538 0 : for (;;) {
539 : Py_ssize_t outleft;
540 :
541 1048900 : outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
542 1048900 : r = codec->encreset(state, codec->config, &buf.outbuf,
543 : outleft);
544 1048900 : if (r == 0)
545 1048900 : break;
546 0 : else if (multibytecodec_encerror(codec, state,
547 : &buf, errors, r))
548 0 : goto errorexit;
549 : }
550 :
551 2646400 : finalsize = (Py_ssize_t)((char *)buf.outbuf -
552 1323200 : PyBytes_AS_STRING(buf.outobj));
553 :
554 1323200 : if (finalsize != PyBytes_GET_SIZE(buf.outobj))
555 1323170 : if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
556 0 : goto errorexit;
557 :
558 1323200 : if (inpos_t)
559 88404 : *inpos_t = buf.inpos;
560 1323200 : Py_XDECREF(buf.excobj);
561 1323200 : return buf.outobj;
562 :
563 156 : errorexit:
564 156 : Py_XDECREF(buf.excobj);
565 156 : Py_XDECREF(buf.outobj);
566 156 : return NULL;
567 : }
568 :
569 : /*[clinic input]
570 : _multibytecodec.MultibyteCodec.encode
571 :
572 : input: object
573 : errors: str(accept={str, NoneType}) = None
574 :
575 : Return an encoded string version of `input'.
576 :
577 : 'errors' may be given to set a different error handling scheme. Default is
578 : 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
579 : values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
580 : registered with codecs.register_error that can handle UnicodeEncodeErrors.
581 : [clinic start generated code]*/
582 :
583 : static PyObject *
584 1234930 : _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
585 : PyObject *input,
586 : const char *errors)
587 : /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
588 : {
589 : MultibyteCodec_State state;
590 : PyObject *errorcb, *r, *ucvt;
591 : Py_ssize_t datalen;
592 :
593 1234930 : if (PyUnicode_Check(input))
594 1234930 : ucvt = NULL;
595 : else {
596 0 : input = ucvt = PyObject_Str(input);
597 0 : if (input == NULL)
598 0 : return NULL;
599 0 : else if (!PyUnicode_Check(input)) {
600 0 : PyErr_SetString(PyExc_TypeError,
601 : "couldn't convert the object to unicode.");
602 0 : Py_DECREF(ucvt);
603 0 : return NULL;
604 : }
605 : }
606 :
607 1234930 : if (PyUnicode_READY(input) < 0) {
608 0 : Py_XDECREF(ucvt);
609 0 : return NULL;
610 : }
611 1234930 : datalen = PyUnicode_GET_LENGTH(input);
612 :
613 1234930 : errorcb = internal_error_callback(errors);
614 1234930 : if (errorcb == NULL) {
615 0 : Py_XDECREF(ucvt);
616 0 : return NULL;
617 : }
618 :
619 2283760 : if (self->codec->encinit != NULL &&
620 1048820 : self->codec->encinit(&state, self->codec->config) != 0)
621 0 : goto errorexit;
622 1234930 : r = multibytecodec_encode(self->codec, &state,
623 : input, NULL, errorcb,
624 : MBENC_FLUSH | MBENC_RESET);
625 1234930 : if (r == NULL)
626 134 : goto errorexit;
627 :
628 1234800 : ERROR_DECREF(errorcb);
629 1234800 : Py_XDECREF(ucvt);
630 1234800 : return make_tuple(r, datalen);
631 :
632 134 : errorexit:
633 134 : ERROR_DECREF(errorcb);
634 134 : Py_XDECREF(ucvt);
635 134 : return NULL;
636 : }
637 :
638 : /*[clinic input]
639 : _multibytecodec.MultibyteCodec.decode
640 :
641 : input: Py_buffer
642 : errors: str(accept={str, NoneType}) = None
643 :
644 : Decodes 'input'.
645 :
646 : 'errors' may be given to set a different error handling scheme. Default is
647 : 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
648 : values are 'ignore' and 'replace' as well as any other name registered with
649 : codecs.register_error that is able to handle UnicodeDecodeErrors."
650 : [clinic start generated code]*/
651 :
652 : static PyObject *
653 186024 : _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
654 : Py_buffer *input,
655 : const char *errors)
656 : /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
657 : {
658 : MultibyteCodec_State state;
659 : MultibyteDecodeBuffer buf;
660 : PyObject *errorcb, *res;
661 : const char *data;
662 : Py_ssize_t datalen;
663 :
664 186024 : data = input->buf;
665 186024 : datalen = input->len;
666 :
667 186024 : errorcb = internal_error_callback(errors);
668 186024 : if (errorcb == NULL) {
669 0 : return NULL;
670 : }
671 :
672 186024 : if (datalen == 0) {
673 0 : ERROR_DECREF(errorcb);
674 0 : return make_tuple(PyUnicode_New(0, 0), 0);
675 : }
676 :
677 186024 : _PyUnicodeWriter_Init(&buf.writer);
678 186024 : buf.writer.min_length = datalen;
679 186024 : buf.excobj = NULL;
680 186024 : buf.inbuf = buf.inbuf_top = (unsigned char *)data;
681 186024 : buf.inbuf_end = buf.inbuf_top + datalen;
682 :
683 186087 : if (self->codec->decinit != NULL &&
684 63 : self->codec->decinit(&state, self->codec->config) != 0)
685 0 : goto errorexit;
686 :
687 189058 : while (buf.inbuf < buf.inbuf_end) {
688 : Py_ssize_t inleft, r;
689 :
690 188987 : inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
691 :
692 188987 : r = self->codec->decode(&state, self->codec->config,
693 : &buf.inbuf, inleft, &buf.writer);
694 188987 : if (r == 0)
695 185904 : break;
696 3083 : else if (multibytecodec_decerror(self->codec, &state,
697 : &buf, errorcb, r))
698 49 : goto errorexit;
699 : }
700 :
701 185975 : res = _PyUnicodeWriter_Finish(&buf.writer);
702 185975 : if (res == NULL)
703 0 : goto errorexit;
704 :
705 185975 : Py_XDECREF(buf.excobj);
706 185975 : ERROR_DECREF(errorcb);
707 185975 : return make_tuple(res, datalen);
708 :
709 49 : errorexit:
710 49 : ERROR_DECREF(errorcb);
711 49 : Py_XDECREF(buf.excobj);
712 49 : _PyUnicodeWriter_Dealloc(&buf.writer);
713 :
714 49 : return NULL;
715 : }
716 :
717 : static struct PyMethodDef multibytecodec_methods[] = {
718 : _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
719 : _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
720 : {NULL, NULL},
721 : };
722 :
723 : static int
724 5186 : multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
725 : {
726 5186 : Py_VISIT(Py_TYPE(self));
727 5186 : return 0;
728 : }
729 :
730 : static void
731 142 : multibytecodec_dealloc(MultibyteCodecObject *self)
732 : {
733 142 : PyObject_GC_UnTrack(self);
734 142 : PyTypeObject *tp = Py_TYPE(self);
735 142 : tp->tp_free(self);
736 142 : Py_DECREF(tp);
737 142 : }
738 :
739 : static PyType_Slot multibytecodec_slots[] = {
740 : {Py_tp_dealloc, multibytecodec_dealloc},
741 : {Py_tp_getattro, PyObject_GenericGetAttr},
742 : {Py_tp_methods, multibytecodec_methods},
743 : {Py_tp_traverse, multibytecodec_traverse},
744 : {0, NULL},
745 : };
746 :
747 : static PyType_Spec multibytecodec_spec = {
748 : .name = MODULE_NAME ".MultibyteCodec",
749 : .basicsize = sizeof(MultibyteCodecObject),
750 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
751 : Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
752 : .slots = multibytecodec_slots,
753 : };
754 :
755 :
756 : /**
757 : * Utility functions for stateful codec mechanism
758 : */
759 :
760 : #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
761 : #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
762 :
763 : static PyObject *
764 88125 : encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
765 : PyObject *unistr, int final)
766 : {
767 88125 : PyObject *ucvt, *r = NULL;
768 88125 : PyObject *inbuf = NULL;
769 : Py_ssize_t inpos, datalen;
770 88125 : PyObject *origpending = NULL;
771 :
772 88125 : if (PyUnicode_Check(unistr))
773 88125 : ucvt = NULL;
774 : else {
775 0 : unistr = ucvt = PyObject_Str(unistr);
776 0 : if (unistr == NULL)
777 0 : return NULL;
778 0 : else if (!PyUnicode_Check(unistr)) {
779 0 : PyErr_SetString(PyExc_TypeError,
780 : "couldn't convert the object to str.");
781 0 : Py_DECREF(ucvt);
782 0 : return NULL;
783 : }
784 : }
785 :
786 88125 : if (ctx->pending) {
787 : PyObject *inbuf_tmp;
788 :
789 836 : Py_INCREF(ctx->pending);
790 836 : origpending = ctx->pending;
791 :
792 836 : Py_INCREF(ctx->pending);
793 836 : inbuf_tmp = ctx->pending;
794 836 : PyUnicode_Append(&inbuf_tmp, unistr);
795 836 : if (inbuf_tmp == NULL)
796 0 : goto errorexit;
797 836 : Py_CLEAR(ctx->pending);
798 836 : inbuf = inbuf_tmp;
799 : }
800 : else {
801 87289 : origpending = NULL;
802 :
803 87289 : Py_INCREF(unistr);
804 87289 : inbuf = unistr;
805 : }
806 88125 : if (PyUnicode_READY(inbuf) < 0)
807 0 : goto errorexit;
808 88125 : inpos = 0;
809 88125 : datalen = PyUnicode_GET_LENGTH(inbuf);
810 :
811 88125 : r = multibytecodec_encode(ctx->codec, &ctx->state,
812 : inbuf, &inpos,
813 : ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
814 88125 : if (r == NULL) {
815 : /* recover the original pending buffer */
816 22 : Py_XSETREF(ctx->pending, origpending);
817 22 : origpending = NULL;
818 22 : goto errorexit;
819 : }
820 88103 : Py_XDECREF(origpending);
821 :
822 88103 : if (inpos < datalen) {
823 662 : if (datalen - inpos > MAXENCPENDING) {
824 : /* normal codecs can't reach here */
825 0 : PyErr_SetString(PyExc_UnicodeError,
826 : "pending buffer overflow");
827 0 : goto errorexit;
828 : }
829 662 : ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
830 662 : if (ctx->pending == NULL) {
831 : /* normal codecs can't reach here */
832 0 : goto errorexit;
833 : }
834 : }
835 :
836 88103 : Py_DECREF(inbuf);
837 88103 : Py_XDECREF(ucvt);
838 88103 : return r;
839 :
840 22 : errorexit:
841 22 : Py_XDECREF(r);
842 22 : Py_XDECREF(ucvt);
843 22 : Py_XDECREF(origpending);
844 22 : Py_XDECREF(inbuf);
845 22 : return NULL;
846 : }
847 :
848 : static int
849 72561 : decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
850 : MultibyteDecodeBuffer *buf)
851 : {
852 : Py_ssize_t npendings;
853 :
854 72561 : npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
855 72561 : if (npendings + ctx->pendingsize > MAXDECPENDING ||
856 72561 : npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
857 0 : PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
858 0 : return -1;
859 : }
860 72561 : memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
861 72561 : ctx->pendingsize += npendings;
862 72561 : return 0;
863 : }
864 :
865 : static int
866 193233 : decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
867 : Py_ssize_t size)
868 : {
869 193233 : buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
870 193233 : buf->inbuf_end = buf->inbuf_top + size;
871 193233 : buf->writer.min_length += size;
872 193233 : return 0;
873 : }
874 :
875 : static int
876 191010 : decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
877 : MultibyteDecodeBuffer *buf)
878 : {
879 191010 : while (buf->inbuf < buf->inbuf_end) {
880 : Py_ssize_t inleft;
881 : Py_ssize_t r;
882 :
883 190738 : inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
884 :
885 190738 : r = ctx->codec->decode(&ctx->state, ctx->codec->config,
886 : &buf->inbuf, inleft, &buf->writer);
887 190738 : if (r == 0 || r == MBERR_TOOFEW)
888 : break;
889 0 : else if (multibytecodec_decerror(ctx->codec, &ctx->state,
890 : buf, ctx->errors, r))
891 0 : return -1;
892 : }
893 191010 : return 0;
894 : }
895 :
896 :
897 : /*[clinic input]
898 : _multibytecodec.MultibyteIncrementalEncoder.encode
899 :
900 : input: object
901 : final: bool(accept={int}) = False
902 : [clinic start generated code]*/
903 :
904 : static PyObject *
905 27427 : _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
906 : PyObject *input,
907 : int final)
908 : /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
909 : {
910 27427 : return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
911 : }
912 :
913 : /*[clinic input]
914 : _multibytecodec.MultibyteIncrementalEncoder.getstate
915 : [clinic start generated code]*/
916 :
917 : static PyObject *
918 177 : _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
919 : /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
920 : {
921 : /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
922 : for UTF-8 encoded buffer (each character can use up to 4
923 : bytes), and required bytes for MultibyteCodec_State.c. A byte
924 : array is used to avoid different compilers generating different
925 : values for the same state, e.g. as a result of struct padding.
926 : */
927 : unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
928 : Py_ssize_t statesize;
929 177 : const char *pendingbuffer = NULL;
930 : Py_ssize_t pendingsize;
931 :
932 177 : if (self->pending != NULL) {
933 2 : pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
934 2 : if (pendingbuffer == NULL) {
935 0 : return NULL;
936 : }
937 2 : if (pendingsize > MAXENCPENDING*4) {
938 0 : PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
939 0 : return NULL;
940 : }
941 2 : statebytes[0] = (unsigned char)pendingsize;
942 2 : memcpy(statebytes + 1, pendingbuffer, pendingsize);
943 2 : statesize = 1 + pendingsize;
944 : } else {
945 175 : statebytes[0] = 0;
946 175 : statesize = 1;
947 : }
948 177 : memcpy(statebytes+statesize, self->state.c,
949 : sizeof(self->state.c));
950 177 : statesize += sizeof(self->state.c);
951 :
952 177 : return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
953 : 1 /* little-endian */ ,
954 : 0 /* unsigned */ );
955 : }
956 :
957 : /*[clinic input]
958 : _multibytecodec.MultibyteIncrementalEncoder.setstate
959 : state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
960 : /
961 : [clinic start generated code]*/
962 :
963 : static PyObject *
964 177 : _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
965 : PyLongObject *statelong)
966 : /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
967 : {
968 177 : PyObject *pending = NULL;
969 : unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
970 :
971 177 : if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
972 : 1 /* little-endian */ ,
973 : 0 /* unsigned */ ) < 0) {
974 0 : goto errorexit;
975 : }
976 :
977 177 : if (statebytes[0] > MAXENCPENDING*4) {
978 1 : PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
979 1 : return NULL;
980 : }
981 :
982 176 : pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
983 176 : statebytes[0], "strict");
984 176 : if (pending == NULL) {
985 1 : goto errorexit;
986 : }
987 :
988 175 : Py_CLEAR(self->pending);
989 175 : self->pending = pending;
990 175 : memcpy(self->state.c, statebytes+1+statebytes[0],
991 : sizeof(self->state.c));
992 :
993 175 : Py_RETURN_NONE;
994 :
995 1 : errorexit:
996 1 : Py_XDECREF(pending);
997 1 : return NULL;
998 : }
999 :
1000 : /*[clinic input]
1001 : _multibytecodec.MultibyteIncrementalEncoder.reset
1002 : [clinic start generated code]*/
1003 :
1004 : static PyObject *
1005 24 : _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1006 : /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1007 : {
1008 : /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1009 : unsigned char buffer[4], *outbuf;
1010 : Py_ssize_t r;
1011 24 : if (self->codec->encreset != NULL) {
1012 4 : outbuf = buffer;
1013 4 : r = self->codec->encreset(&self->state, self->codec->config,
1014 : &outbuf, sizeof(buffer));
1015 4 : if (r != 0)
1016 0 : return NULL;
1017 : }
1018 24 : Py_CLEAR(self->pending);
1019 24 : Py_RETURN_NONE;
1020 : }
1021 :
1022 : static struct PyMethodDef mbiencoder_methods[] = {
1023 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1024 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1025 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1026 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1027 : {NULL, NULL},
1028 : };
1029 :
1030 : static PyObject *
1031 1263 : mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1032 : {
1033 : MultibyteIncrementalEncoderObject *self;
1034 1263 : PyObject *codec = NULL;
1035 1263 : char *errors = NULL;
1036 :
1037 1263 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1038 : incnewkwarglist, &errors))
1039 0 : return NULL;
1040 :
1041 1263 : self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1042 1263 : if (self == NULL)
1043 0 : return NULL;
1044 :
1045 1263 : codec = PyObject_GetAttrString((PyObject *)type, "codec");
1046 1263 : if (codec == NULL)
1047 0 : goto errorexit;
1048 :
1049 1263 : _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1050 1263 : if (!MultibyteCodec_Check(state, codec)) {
1051 0 : PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1052 0 : goto errorexit;
1053 : }
1054 :
1055 1263 : self->codec = ((MultibyteCodecObject *)codec)->codec;
1056 1263 : self->pending = NULL;
1057 1263 : self->errors = internal_error_callback(errors);
1058 1263 : if (self->errors == NULL)
1059 0 : goto errorexit;
1060 1589 : if (self->codec->encinit != NULL &&
1061 326 : self->codec->encinit(&self->state, self->codec->config) != 0)
1062 0 : goto errorexit;
1063 :
1064 1263 : Py_DECREF(codec);
1065 1263 : return (PyObject *)self;
1066 :
1067 0 : errorexit:
1068 0 : Py_XDECREF(self);
1069 0 : Py_XDECREF(codec);
1070 0 : return NULL;
1071 : }
1072 :
1073 : static int
1074 1263 : mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1075 : {
1076 1263 : return 0;
1077 : }
1078 :
1079 : static int
1080 0 : mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1081 : visitproc visit, void *arg)
1082 : {
1083 0 : if (ERROR_ISCUSTOM(self->errors))
1084 0 : Py_VISIT(self->errors);
1085 0 : return 0;
1086 : }
1087 :
1088 : static void
1089 1263 : mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1090 : {
1091 1263 : PyTypeObject *tp = Py_TYPE(self);
1092 1263 : PyObject_GC_UnTrack(self);
1093 1263 : ERROR_DECREF(self->errors);
1094 1263 : Py_CLEAR(self->pending);
1095 1263 : tp->tp_free(self);
1096 1263 : Py_DECREF(tp);
1097 1263 : }
1098 :
1099 : static PyType_Slot encoder_slots[] = {
1100 : {Py_tp_dealloc, mbiencoder_dealloc},
1101 : {Py_tp_getattro, PyObject_GenericGetAttr},
1102 : {Py_tp_traverse, mbiencoder_traverse},
1103 : {Py_tp_methods, mbiencoder_methods},
1104 : {Py_tp_getset, codecctx_getsets},
1105 : {Py_tp_init, mbiencoder_init},
1106 : {Py_tp_new, mbiencoder_new},
1107 : {0, NULL},
1108 : };
1109 :
1110 : static PyType_Spec encoder_spec = {
1111 : .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1112 : .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1113 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1114 : Py_TPFLAGS_IMMUTABLETYPE),
1115 : .slots = encoder_slots,
1116 : };
1117 :
1118 :
1119 : /*[clinic input]
1120 : _multibytecodec.MultibyteIncrementalDecoder.decode
1121 :
1122 : input: Py_buffer
1123 : final: bool(accept={int}) = False
1124 : [clinic start generated code]*/
1125 :
1126 : static PyObject *
1127 47789 : _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1128 : Py_buffer *input,
1129 : int final)
1130 : /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1131 : {
1132 : MultibyteDecodeBuffer buf;
1133 47789 : char *data, *wdata = NULL;
1134 : Py_ssize_t wsize, size, origpending;
1135 : PyObject *res;
1136 :
1137 47789 : data = input->buf;
1138 47789 : size = input->len;
1139 :
1140 47789 : _PyUnicodeWriter_Init(&buf.writer);
1141 47789 : buf.excobj = NULL;
1142 47789 : origpending = self->pendingsize;
1143 :
1144 47789 : if (self->pendingsize == 0) {
1145 28726 : wsize = size;
1146 28726 : wdata = data;
1147 : }
1148 : else {
1149 19063 : if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1150 0 : PyErr_NoMemory();
1151 0 : goto errorexit;
1152 : }
1153 19063 : wsize = size + self->pendingsize;
1154 19063 : wdata = PyMem_Malloc(wsize);
1155 19063 : if (wdata == NULL) {
1156 0 : PyErr_NoMemory();
1157 0 : goto errorexit;
1158 : }
1159 19063 : memcpy(wdata, self->pending, self->pendingsize);
1160 19063 : memcpy(wdata + self->pendingsize, data, size);
1161 19063 : self->pendingsize = 0;
1162 : }
1163 :
1164 47789 : if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1165 0 : goto errorexit;
1166 :
1167 47789 : if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1168 0 : goto errorexit;
1169 :
1170 47789 : if (final && buf.inbuf < buf.inbuf_end) {
1171 3 : if (multibytecodec_decerror(self->codec, &self->state,
1172 : &buf, self->errors, MBERR_TOOFEW)) {
1173 : /* recover the original pending buffer */
1174 3 : memcpy(self->pending, wdata, origpending);
1175 3 : self->pendingsize = origpending;
1176 3 : goto errorexit;
1177 : }
1178 : }
1179 :
1180 47786 : if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1181 19061 : if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1182 0 : goto errorexit;
1183 : }
1184 :
1185 47786 : res = _PyUnicodeWriter_Finish(&buf.writer);
1186 47786 : if (res == NULL)
1187 0 : goto errorexit;
1188 :
1189 47786 : if (wdata != data)
1190 19060 : PyMem_Free(wdata);
1191 47786 : Py_XDECREF(buf.excobj);
1192 47786 : return res;
1193 :
1194 3 : errorexit:
1195 3 : if (wdata != NULL && wdata != data)
1196 3 : PyMem_Free(wdata);
1197 3 : Py_XDECREF(buf.excobj);
1198 3 : _PyUnicodeWriter_Dealloc(&buf.writer);
1199 3 : return NULL;
1200 : }
1201 :
1202 : /*[clinic input]
1203 : _multibytecodec.MultibyteIncrementalDecoder.getstate
1204 : [clinic start generated code]*/
1205 :
1206 : static PyObject *
1207 329 : _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1208 : /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1209 : {
1210 : PyObject *buffer;
1211 : PyObject *statelong;
1212 :
1213 329 : buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1214 : self->pendingsize);
1215 329 : if (buffer == NULL) {
1216 0 : return NULL;
1217 : }
1218 :
1219 329 : statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1220 : sizeof(self->state.c),
1221 : 1 /* little-endian */ ,
1222 : 0 /* unsigned */ );
1223 329 : if (statelong == NULL) {
1224 0 : Py_DECREF(buffer);
1225 0 : return NULL;
1226 : }
1227 :
1228 329 : return Py_BuildValue("NN", buffer, statelong);
1229 : }
1230 :
1231 : /*[clinic input]
1232 : _multibytecodec.MultibyteIncrementalDecoder.setstate
1233 : state: object(subclass_of='&PyTuple_Type')
1234 : /
1235 : [clinic start generated code]*/
1236 :
1237 : static PyObject *
1238 305 : _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1239 : PyObject *state)
1240 : /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1241 : {
1242 : PyObject *buffer;
1243 : PyLongObject *statelong;
1244 : Py_ssize_t buffersize;
1245 : const char *bufferstr;
1246 : unsigned char statebytes[8];
1247 :
1248 305 : if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1249 : &buffer, &PyLong_Type, &statelong))
1250 : {
1251 2 : return NULL;
1252 : }
1253 :
1254 303 : if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1255 : 1 /* little-endian */ ,
1256 : 0 /* unsigned */ ) < 0) {
1257 0 : return NULL;
1258 : }
1259 :
1260 303 : buffersize = PyBytes_Size(buffer);
1261 303 : if (buffersize == -1) {
1262 0 : return NULL;
1263 : }
1264 :
1265 303 : if (buffersize > MAXDECPENDING) {
1266 1 : PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1267 1 : return NULL;
1268 : }
1269 :
1270 302 : bufferstr = PyBytes_AsString(buffer);
1271 302 : if (bufferstr == NULL) {
1272 0 : return NULL;
1273 : }
1274 302 : self->pendingsize = buffersize;
1275 302 : memcpy(self->pending, bufferstr, self->pendingsize);
1276 302 : memcpy(self->state.c, statebytes, sizeof(statebytes));
1277 :
1278 302 : Py_RETURN_NONE;
1279 : }
1280 :
1281 : /*[clinic input]
1282 : _multibytecodec.MultibyteIncrementalDecoder.reset
1283 : [clinic start generated code]*/
1284 :
1285 : static PyObject *
1286 4 : _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1287 : /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1288 : {
1289 5 : if (self->codec->decreset != NULL &&
1290 1 : self->codec->decreset(&self->state, self->codec->config) != 0)
1291 0 : return NULL;
1292 4 : self->pendingsize = 0;
1293 :
1294 4 : Py_RETURN_NONE;
1295 : }
1296 :
1297 : static struct PyMethodDef mbidecoder_methods[] = {
1298 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1299 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1300 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1301 : _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1302 : {NULL, NULL},
1303 : };
1304 :
1305 : static PyObject *
1306 1263 : mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1307 : {
1308 : MultibyteIncrementalDecoderObject *self;
1309 1263 : PyObject *codec = NULL;
1310 1263 : char *errors = NULL;
1311 :
1312 1263 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1313 : incnewkwarglist, &errors))
1314 0 : return NULL;
1315 :
1316 1263 : self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1317 1263 : if (self == NULL)
1318 0 : return NULL;
1319 :
1320 1263 : codec = PyObject_GetAttrString((PyObject *)type, "codec");
1321 1263 : if (codec == NULL)
1322 0 : goto errorexit;
1323 :
1324 1263 : _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1325 1263 : if (!MultibyteCodec_Check(state, codec)) {
1326 0 : PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1327 0 : goto errorexit;
1328 : }
1329 :
1330 1263 : self->codec = ((MultibyteCodecObject *)codec)->codec;
1331 1263 : self->pendingsize = 0;
1332 1263 : self->errors = internal_error_callback(errors);
1333 1263 : if (self->errors == NULL)
1334 0 : goto errorexit;
1335 1588 : if (self->codec->decinit != NULL &&
1336 325 : self->codec->decinit(&self->state, self->codec->config) != 0)
1337 0 : goto errorexit;
1338 :
1339 1263 : Py_DECREF(codec);
1340 1263 : return (PyObject *)self;
1341 :
1342 0 : errorexit:
1343 0 : Py_XDECREF(self);
1344 0 : Py_XDECREF(codec);
1345 0 : return NULL;
1346 : }
1347 :
1348 : static int
1349 1263 : mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1350 : {
1351 1263 : return 0;
1352 : }
1353 :
1354 : static int
1355 2 : mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1356 : visitproc visit, void *arg)
1357 : {
1358 2 : if (ERROR_ISCUSTOM(self->errors))
1359 0 : Py_VISIT(self->errors);
1360 2 : return 0;
1361 : }
1362 :
1363 : static void
1364 1263 : mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1365 : {
1366 1263 : PyTypeObject *tp = Py_TYPE(self);
1367 1263 : PyObject_GC_UnTrack(self);
1368 1263 : ERROR_DECREF(self->errors);
1369 1263 : tp->tp_free(self);
1370 1263 : Py_DECREF(tp);
1371 1263 : }
1372 :
1373 : static PyType_Slot decoder_slots[] = {
1374 : {Py_tp_dealloc, mbidecoder_dealloc},
1375 : {Py_tp_getattro, PyObject_GenericGetAttr},
1376 : {Py_tp_traverse, mbidecoder_traverse},
1377 : {Py_tp_methods, mbidecoder_methods},
1378 : {Py_tp_getset, codecctx_getsets},
1379 : {Py_tp_init, mbidecoder_init},
1380 : {Py_tp_new, mbidecoder_new},
1381 : {0, NULL},
1382 : };
1383 :
1384 : static PyType_Spec decoder_spec = {
1385 : .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1386 : .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1387 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1388 : Py_TPFLAGS_IMMUTABLETYPE),
1389 : .slots = decoder_slots,
1390 : };
1391 :
1392 : static PyObject *
1393 129158 : mbstreamreader_iread(MultibyteStreamReaderObject *self,
1394 : const char *method, Py_ssize_t sizehint)
1395 : {
1396 : MultibyteDecodeBuffer buf;
1397 : PyObject *cres, *res;
1398 : Py_ssize_t rsize;
1399 :
1400 129158 : if (sizehint == 0)
1401 0 : return PyUnicode_New(0, 0);
1402 :
1403 129158 : _PyUnicodeWriter_Init(&buf.writer);
1404 129158 : buf.excobj = NULL;
1405 129158 : cres = NULL;
1406 :
1407 16286 : for (;;) {
1408 : int endoffile;
1409 :
1410 145444 : if (sizehint < 0)
1411 746 : cres = PyObject_CallMethod(self->stream,
1412 : method, NULL);
1413 : else
1414 144698 : cres = PyObject_CallMethod(self->stream,
1415 : method, "i", sizehint);
1416 145444 : if (cres == NULL)
1417 0 : goto errorexit;
1418 :
1419 145444 : if (!PyBytes_Check(cres)) {
1420 0 : PyErr_Format(PyExc_TypeError,
1421 : "stream function returned a "
1422 : "non-bytes object (%.100s)",
1423 0 : Py_TYPE(cres)->tp_name);
1424 0 : goto errorexit;
1425 : }
1426 :
1427 145444 : endoffile = (PyBytes_GET_SIZE(cres) == 0);
1428 :
1429 145444 : if (self->pendingsize > 0) {
1430 : PyObject *ctr;
1431 : char *ctrdata;
1432 :
1433 53500 : if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1434 0 : PyErr_NoMemory();
1435 0 : goto errorexit;
1436 : }
1437 53500 : rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1438 53500 : ctr = PyBytes_FromStringAndSize(NULL, rsize);
1439 53500 : if (ctr == NULL)
1440 0 : goto errorexit;
1441 53500 : ctrdata = PyBytes_AS_STRING(ctr);
1442 53500 : memcpy(ctrdata, self->pending, self->pendingsize);
1443 107000 : memcpy(ctrdata + self->pendingsize,
1444 53500 : PyBytes_AS_STRING(cres),
1445 53500 : PyBytes_GET_SIZE(cres));
1446 53500 : Py_DECREF(cres);
1447 53500 : cres = ctr;
1448 53500 : self->pendingsize = 0;
1449 : }
1450 :
1451 145444 : rsize = PyBytes_GET_SIZE(cres);
1452 145444 : if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1453 : rsize) != 0)
1454 0 : goto errorexit;
1455 :
1456 145444 : if (rsize > 0 && decoder_feed_buffer(
1457 : (MultibyteStatefulDecoderContext *)self, &buf))
1458 0 : goto errorexit;
1459 :
1460 145444 : if (endoffile || sizehint < 0) {
1461 2857 : if (buf.inbuf < buf.inbuf_end &&
1462 1 : multibytecodec_decerror(self->codec, &self->state,
1463 : &buf, self->errors, MBERR_TOOFEW))
1464 1 : goto errorexit;
1465 : }
1466 :
1467 145443 : if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1468 53500 : if (decoder_append_pending(STATEFUL_DCTX(self),
1469 : &buf) != 0)
1470 0 : goto errorexit;
1471 : }
1472 :
1473 145443 : Py_DECREF(cres);
1474 145443 : cres = NULL;
1475 :
1476 145443 : if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1477 : break;
1478 :
1479 16286 : sizehint = 1; /* read 1 more byte and retry */
1480 : }
1481 :
1482 129157 : res = _PyUnicodeWriter_Finish(&buf.writer);
1483 129157 : if (res == NULL)
1484 0 : goto errorexit;
1485 :
1486 129157 : Py_XDECREF(cres);
1487 129157 : Py_XDECREF(buf.excobj);
1488 129157 : return res;
1489 :
1490 1 : errorexit:
1491 1 : Py_XDECREF(cres);
1492 1 : Py_XDECREF(buf.excobj);
1493 1 : _PyUnicodeWriter_Dealloc(&buf.writer);
1494 1 : return NULL;
1495 : }
1496 :
1497 : /*[clinic input]
1498 : _multibytecodec.MultibyteStreamReader.read
1499 :
1500 : sizeobj: object = None
1501 : /
1502 : [clinic start generated code]*/
1503 :
1504 : static PyObject *
1505 42296 : _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1506 : PyObject *sizeobj)
1507 : /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1508 : {
1509 : Py_ssize_t size;
1510 :
1511 42296 : if (sizeobj == Py_None)
1512 302 : size = -1;
1513 41994 : else if (PyLong_Check(sizeobj))
1514 41994 : size = PyLong_AsSsize_t(sizeobj);
1515 : else {
1516 0 : PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1517 0 : return NULL;
1518 : }
1519 :
1520 42296 : if (size == -1 && PyErr_Occurred())
1521 0 : return NULL;
1522 :
1523 42296 : return mbstreamreader_iread(self, "read", size);
1524 : }
1525 :
1526 : /*[clinic input]
1527 : _multibytecodec.MultibyteStreamReader.readline
1528 :
1529 : sizeobj: object = None
1530 : /
1531 : [clinic start generated code]*/
1532 :
1533 : static PyObject *
1534 44831 : _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1535 : PyObject *sizeobj)
1536 : /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1537 : {
1538 : Py_ssize_t size;
1539 :
1540 44831 : if (sizeobj == Py_None)
1541 165 : size = -1;
1542 44666 : else if (PyLong_Check(sizeobj))
1543 44666 : size = PyLong_AsSsize_t(sizeobj);
1544 : else {
1545 0 : PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1546 0 : return NULL;
1547 : }
1548 :
1549 44831 : if (size == -1 && PyErr_Occurred())
1550 0 : return NULL;
1551 :
1552 44831 : return mbstreamreader_iread(self, "readline", size);
1553 : }
1554 :
1555 : /*[clinic input]
1556 : _multibytecodec.MultibyteStreamReader.readlines
1557 :
1558 : sizehintobj: object = None
1559 : /
1560 : [clinic start generated code]*/
1561 :
1562 : static PyObject *
1563 42031 : _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1564 : PyObject *sizehintobj)
1565 : /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1566 : {
1567 : PyObject *r, *sr;
1568 : Py_ssize_t sizehint;
1569 :
1570 42031 : if (sizehintobj == Py_None)
1571 38 : sizehint = -1;
1572 41993 : else if (PyLong_Check(sizehintobj))
1573 41993 : sizehint = PyLong_AsSsize_t(sizehintobj);
1574 : else {
1575 0 : PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1576 0 : return NULL;
1577 : }
1578 :
1579 42031 : if (sizehint == -1 && PyErr_Occurred())
1580 0 : return NULL;
1581 :
1582 42031 : r = mbstreamreader_iread(self, "read", sizehint);
1583 42031 : if (r == NULL)
1584 0 : return NULL;
1585 :
1586 42031 : sr = PyUnicode_Splitlines(r, 1);
1587 42031 : Py_DECREF(r);
1588 42031 : return sr;
1589 : }
1590 :
1591 : /*[clinic input]
1592 : _multibytecodec.MultibyteStreamReader.reset
1593 : [clinic start generated code]*/
1594 :
1595 : static PyObject *
1596 120 : _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1597 : /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1598 : {
1599 160 : if (self->codec->decreset != NULL &&
1600 40 : self->codec->decreset(&self->state, self->codec->config) != 0)
1601 0 : return NULL;
1602 120 : self->pendingsize = 0;
1603 :
1604 120 : Py_RETURN_NONE;
1605 : }
1606 :
1607 : static struct PyMethodDef mbstreamreader_methods[] = {
1608 : _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1609 : _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1610 : _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1611 : _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1612 : {NULL, NULL},
1613 : };
1614 :
1615 : static PyMemberDef mbstreamreader_members[] = {
1616 : {"stream", T_OBJECT,
1617 : offsetof(MultibyteStreamReaderObject, stream),
1618 : READONLY, NULL},
1619 : {NULL,}
1620 : };
1621 :
1622 : static PyObject *
1623 2273 : mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1624 : {
1625 : MultibyteStreamReaderObject *self;
1626 2273 : PyObject *stream, *codec = NULL;
1627 2273 : char *errors = NULL;
1628 :
1629 2273 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1630 : streamkwarglist, &stream, &errors))
1631 0 : return NULL;
1632 :
1633 2273 : self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1634 2273 : if (self == NULL)
1635 0 : return NULL;
1636 :
1637 2273 : codec = PyObject_GetAttrString((PyObject *)type, "codec");
1638 2273 : if (codec == NULL)
1639 1 : goto errorexit;
1640 :
1641 2272 : _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1642 2272 : if (!MultibyteCodec_Check(state, codec)) {
1643 0 : PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1644 0 : goto errorexit;
1645 : }
1646 :
1647 2272 : self->codec = ((MultibyteCodecObject *)codec)->codec;
1648 2272 : self->stream = stream;
1649 2272 : Py_INCREF(stream);
1650 2272 : self->pendingsize = 0;
1651 2272 : self->errors = internal_error_callback(errors);
1652 2272 : if (self->errors == NULL)
1653 0 : goto errorexit;
1654 2756 : if (self->codec->decinit != NULL &&
1655 484 : self->codec->decinit(&self->state, self->codec->config) != 0)
1656 0 : goto errorexit;
1657 :
1658 2272 : Py_DECREF(codec);
1659 2272 : return (PyObject *)self;
1660 :
1661 1 : errorexit:
1662 1 : Py_XDECREF(self);
1663 1 : Py_XDECREF(codec);
1664 1 : return NULL;
1665 : }
1666 :
1667 : static int
1668 2272 : mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1669 : {
1670 2272 : return 0;
1671 : }
1672 :
1673 : static int
1674 0 : mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1675 : visitproc visit, void *arg)
1676 : {
1677 0 : if (ERROR_ISCUSTOM(self->errors))
1678 0 : Py_VISIT(self->errors);
1679 0 : Py_VISIT(self->stream);
1680 0 : return 0;
1681 : }
1682 :
1683 : static void
1684 2273 : mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1685 : {
1686 2273 : PyTypeObject *tp = Py_TYPE(self);
1687 2273 : PyObject_GC_UnTrack(self);
1688 2273 : ERROR_DECREF(self->errors);
1689 2273 : Py_XDECREF(self->stream);
1690 2273 : tp->tp_free(self);
1691 2273 : Py_DECREF(tp);
1692 2273 : }
1693 :
1694 : static PyType_Slot reader_slots[] = {
1695 : {Py_tp_dealloc, mbstreamreader_dealloc},
1696 : {Py_tp_getattro, PyObject_GenericGetAttr},
1697 : {Py_tp_traverse, mbstreamreader_traverse},
1698 : {Py_tp_methods, mbstreamreader_methods},
1699 : {Py_tp_members, mbstreamreader_members},
1700 : {Py_tp_getset, codecctx_getsets},
1701 : {Py_tp_init, mbstreamreader_init},
1702 : {Py_tp_new, mbstreamreader_new},
1703 : {0, NULL},
1704 : };
1705 :
1706 : static PyType_Spec reader_spec = {
1707 : .name = MODULE_NAME ".MultibyteStreamReader",
1708 : .basicsize = sizeof(MultibyteStreamReaderObject),
1709 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1710 : Py_TPFLAGS_IMMUTABLETYPE),
1711 : .slots = reader_slots,
1712 : };
1713 :
1714 : static int
1715 60698 : mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1716 : PyObject *unistr, PyObject *str_write)
1717 : {
1718 : PyObject *str, *wr;
1719 :
1720 60698 : str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1721 60698 : if (str == NULL)
1722 0 : return -1;
1723 :
1724 60698 : wr = _PyObject_CallMethodOneArg(self->stream, str_write, str);
1725 60698 : Py_DECREF(str);
1726 60698 : if (wr == NULL)
1727 0 : return -1;
1728 :
1729 60698 : Py_DECREF(wr);
1730 60698 : return 0;
1731 : }
1732 :
1733 : /*[clinic input]
1734 : _multibytecodec.MultibyteStreamWriter.write
1735 :
1736 : cls: defining_class
1737 : strobj: object
1738 : /
1739 : [clinic start generated code]*/
1740 :
1741 : static PyObject *
1742 55150 : _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
1743 : PyTypeObject *cls,
1744 : PyObject *strobj)
1745 : /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
1746 : {
1747 55150 : _multibytecodec_state *state = PyType_GetModuleState(cls);
1748 55150 : assert(state != NULL);
1749 55150 : if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
1750 0 : return NULL;
1751 : }
1752 55150 : Py_RETURN_NONE;
1753 : }
1754 :
1755 : /*[clinic input]
1756 : _multibytecodec.MultibyteStreamWriter.writelines
1757 :
1758 : cls: defining_class
1759 : lines: object
1760 : /
1761 : [clinic start generated code]*/
1762 :
1763 : static PyObject *
1764 722 : _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
1765 : PyTypeObject *cls,
1766 : PyObject *lines)
1767 : /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
1768 : {
1769 : PyObject *strobj;
1770 : int i, r;
1771 :
1772 722 : if (!PySequence_Check(lines)) {
1773 0 : PyErr_SetString(PyExc_TypeError,
1774 : "arg must be a sequence object");
1775 0 : return NULL;
1776 : }
1777 :
1778 722 : _multibytecodec_state *state = PyType_GetModuleState(cls);
1779 722 : assert(state != NULL);
1780 6270 : for (i = 0; i < PySequence_Length(lines); i++) {
1781 : /* length can be changed even within this loop */
1782 5548 : strobj = PySequence_GetItem(lines, i);
1783 5548 : if (strobj == NULL)
1784 0 : return NULL;
1785 :
1786 5548 : r = mbstreamwriter_iwrite(self, strobj, state->str_write);
1787 5548 : Py_DECREF(strobj);
1788 5548 : if (r == -1)
1789 0 : return NULL;
1790 : }
1791 : /* PySequence_Length() can fail */
1792 722 : if (PyErr_Occurred())
1793 0 : return NULL;
1794 :
1795 722 : Py_RETURN_NONE;
1796 : }
1797 :
1798 : /*[clinic input]
1799 : _multibytecodec.MultibyteStreamWriter.reset
1800 :
1801 : cls: defining_class
1802 : /
1803 :
1804 : [clinic start generated code]*/
1805 :
1806 : static PyObject *
1807 19 : _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
1808 : PyTypeObject *cls)
1809 : /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
1810 : {
1811 : PyObject *pwrt;
1812 :
1813 19 : if (!self->pending)
1814 19 : Py_RETURN_NONE;
1815 :
1816 0 : pwrt = multibytecodec_encode(self->codec, &self->state,
1817 : self->pending, NULL, self->errors,
1818 : MBENC_FLUSH | MBENC_RESET);
1819 : /* some pending buffer can be truncated when UnicodeEncodeError is
1820 : * raised on 'strict' mode. but, 'reset' method is designed to
1821 : * reset the pending buffer or states so failed string sequence
1822 : * ought to be missed */
1823 0 : Py_CLEAR(self->pending);
1824 0 : if (pwrt == NULL)
1825 0 : return NULL;
1826 :
1827 0 : assert(PyBytes_Check(pwrt));
1828 :
1829 0 : _multibytecodec_state *state = PyType_GetModuleState(cls);
1830 0 : assert(state != NULL);
1831 :
1832 0 : if (PyBytes_Size(pwrt) > 0) {
1833 : PyObject *wr;
1834 :
1835 0 : wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
1836 0 : if (wr == NULL) {
1837 0 : Py_DECREF(pwrt);
1838 0 : return NULL;
1839 : }
1840 : }
1841 0 : Py_DECREF(pwrt);
1842 :
1843 0 : Py_RETURN_NONE;
1844 : }
1845 :
1846 : static PyObject *
1847 2213 : mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1848 : {
1849 : MultibyteStreamWriterObject *self;
1850 2213 : PyObject *stream, *codec = NULL;
1851 2213 : char *errors = NULL;
1852 :
1853 2213 : if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1854 : streamkwarglist, &stream, &errors))
1855 0 : return NULL;
1856 :
1857 2213 : self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1858 2213 : if (self == NULL)
1859 0 : return NULL;
1860 :
1861 2213 : codec = PyObject_GetAttrString((PyObject *)type, "codec");
1862 2213 : if (codec == NULL)
1863 1 : goto errorexit;
1864 :
1865 2212 : _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1866 2212 : if (!MultibyteCodec_Check(state, codec)) {
1867 0 : PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1868 0 : goto errorexit;
1869 : }
1870 :
1871 2212 : self->codec = ((MultibyteCodecObject *)codec)->codec;
1872 2212 : self->stream = stream;
1873 2212 : Py_INCREF(stream);
1874 2212 : self->pending = NULL;
1875 2212 : self->errors = internal_error_callback(errors);
1876 2212 : if (self->errors == NULL)
1877 0 : goto errorexit;
1878 2680 : if (self->codec->encinit != NULL &&
1879 468 : self->codec->encinit(&self->state, self->codec->config) != 0)
1880 0 : goto errorexit;
1881 :
1882 2212 : Py_DECREF(codec);
1883 2212 : return (PyObject *)self;
1884 :
1885 1 : errorexit:
1886 1 : Py_XDECREF(self);
1887 1 : Py_XDECREF(codec);
1888 1 : return NULL;
1889 : }
1890 :
1891 : static int
1892 2212 : mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1893 : {
1894 2212 : return 0;
1895 : }
1896 :
1897 : static int
1898 0 : mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1899 : visitproc visit, void *arg)
1900 : {
1901 0 : if (ERROR_ISCUSTOM(self->errors))
1902 0 : Py_VISIT(self->errors);
1903 0 : Py_VISIT(self->stream);
1904 0 : return 0;
1905 : }
1906 :
1907 : static void
1908 2213 : mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1909 : {
1910 2213 : PyTypeObject *tp = Py_TYPE(self);
1911 2213 : PyObject_GC_UnTrack(self);
1912 2213 : ERROR_DECREF(self->errors);
1913 2213 : Py_XDECREF(self->stream);
1914 2213 : tp->tp_free(self);
1915 2213 : Py_DECREF(tp);
1916 2213 : }
1917 :
1918 : static struct PyMethodDef mbstreamwriter_methods[] = {
1919 : _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1920 : _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1921 : _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1922 : {NULL, NULL},
1923 : };
1924 :
1925 : static PyMemberDef mbstreamwriter_members[] = {
1926 : {"stream", T_OBJECT,
1927 : offsetof(MultibyteStreamWriterObject, stream),
1928 : READONLY, NULL},
1929 : {NULL,}
1930 : };
1931 :
1932 : static PyType_Slot writer_slots[] = {
1933 : {Py_tp_dealloc, mbstreamwriter_dealloc},
1934 : {Py_tp_getattro, PyObject_GenericGetAttr},
1935 : {Py_tp_traverse, mbstreamwriter_traverse},
1936 : {Py_tp_methods, mbstreamwriter_methods},
1937 : {Py_tp_members, mbstreamwriter_members},
1938 : {Py_tp_getset, codecctx_getsets},
1939 : {Py_tp_init, mbstreamwriter_init},
1940 : {Py_tp_new, mbstreamwriter_new},
1941 : {0, NULL},
1942 : };
1943 :
1944 : static PyType_Spec writer_spec = {
1945 : .name = MODULE_NAME ".MultibyteStreamWriter",
1946 : .basicsize = sizeof(MultibyteStreamWriterObject),
1947 : .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1948 : Py_TPFLAGS_IMMUTABLETYPE),
1949 : .slots = writer_slots,
1950 : };
1951 :
1952 :
1953 : /*[clinic input]
1954 : _multibytecodec.__create_codec
1955 :
1956 : arg: object
1957 : /
1958 : [clinic start generated code]*/
1959 :
1960 : static PyObject *
1961 142 : _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1962 : /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1963 : {
1964 : MultibyteCodecObject *self;
1965 : MultibyteCodec *codec;
1966 :
1967 142 : if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1968 0 : PyErr_SetString(PyExc_ValueError, "argument type invalid");
1969 0 : return NULL;
1970 : }
1971 :
1972 142 : codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1973 142 : if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1974 0 : return NULL;
1975 :
1976 142 : _multibytecodec_state *state = _multibytecodec_get_state(module);
1977 142 : self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
1978 142 : if (self == NULL)
1979 0 : return NULL;
1980 142 : self->codec = codec;
1981 :
1982 142 : PyObject_GC_Track(self);
1983 142 : return (PyObject *)self;
1984 : }
1985 :
1986 : static int
1987 816 : _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1988 : {
1989 816 : _multibytecodec_state *state = _multibytecodec_get_state(mod);
1990 816 : Py_VISIT(state->multibytecodec_type);
1991 816 : Py_VISIT(state->encoder_type);
1992 816 : Py_VISIT(state->decoder_type);
1993 816 : Py_VISIT(state->reader_type);
1994 816 : Py_VISIT(state->writer_type);
1995 816 : return 0;
1996 : }
1997 :
1998 : static int
1999 32 : _multibytecodec_clear(PyObject *mod)
2000 : {
2001 32 : _multibytecodec_state *state = _multibytecodec_get_state(mod);
2002 32 : Py_CLEAR(state->multibytecodec_type);
2003 32 : Py_CLEAR(state->encoder_type);
2004 32 : Py_CLEAR(state->decoder_type);
2005 32 : Py_CLEAR(state->reader_type);
2006 32 : Py_CLEAR(state->writer_type);
2007 32 : Py_CLEAR(state->str_write);
2008 32 : return 0;
2009 : }
2010 :
2011 : static void
2012 25 : _multibytecodec_free(void *mod)
2013 : {
2014 25 : _multibytecodec_clear((PyObject *)mod);
2015 25 : }
2016 :
2017 : #define CREATE_TYPE(module, type, spec) \
2018 : do { \
2019 : type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2020 : if (!type) { \
2021 : return -1; \
2022 : } \
2023 : } while (0)
2024 :
2025 : #define ADD_TYPE(module, type) \
2026 : do { \
2027 : if (PyModule_AddType(module, type) < 0) { \
2028 : return -1; \
2029 : } \
2030 : } while (0)
2031 :
2032 : static int
2033 25 : _multibytecodec_exec(PyObject *mod)
2034 : {
2035 25 : _multibytecodec_state *state = _multibytecodec_get_state(mod);
2036 25 : state->str_write = PyUnicode_InternFromString("write");
2037 25 : if (state->str_write == NULL) {
2038 0 : return -1;
2039 : }
2040 25 : CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2041 25 : CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2042 25 : CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2043 25 : CREATE_TYPE(mod, state->reader_type, &reader_spec);
2044 25 : CREATE_TYPE(mod, state->writer_type, &writer_spec);
2045 :
2046 25 : ADD_TYPE(mod, state->encoder_type);
2047 25 : ADD_TYPE(mod, state->decoder_type);
2048 25 : ADD_TYPE(mod, state->reader_type);
2049 25 : ADD_TYPE(mod, state->writer_type);
2050 25 : return 0;
2051 : }
2052 :
2053 : #undef CREATE_TYPE
2054 : #undef ADD_TYPE
2055 :
2056 : static struct PyMethodDef _multibytecodec_methods[] = {
2057 : _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2058 : {NULL, NULL},
2059 : };
2060 :
2061 : static PyModuleDef_Slot _multibytecodec_slots[] = {
2062 : {Py_mod_exec, _multibytecodec_exec},
2063 : {0, NULL}
2064 : };
2065 :
2066 : static struct PyModuleDef _multibytecodecmodule = {
2067 : .m_base = PyModuleDef_HEAD_INIT,
2068 : .m_name = "_multibytecodec",
2069 : .m_size = sizeof(_multibytecodec_state),
2070 : .m_methods = _multibytecodec_methods,
2071 : .m_slots = _multibytecodec_slots,
2072 : .m_traverse = _multibytecodec_traverse,
2073 : .m_clear = _multibytecodec_clear,
2074 : .m_free = _multibytecodec_free,
2075 : };
2076 :
2077 : PyMODINIT_FUNC
2078 25 : PyInit__multibytecodec(void)
2079 : {
2080 25 : return PyModuleDef_Init(&_multibytecodecmodule);
2081 : }
|