Line data Source code
1 : /* bytes object implementation */
2 :
3 : #define PY_SSIZE_T_CLEAN
4 :
5 : #include "Python.h"
6 : #include "pycore_abstract.h" // _PyIndex_Check()
7 : #include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat()
8 : #include "pycore_bytes_methods.h" // _Py_bytes_startswith()
9 : #include "pycore_call.h" // _PyObject_CallNoArgs()
10 : #include "pycore_format.h" // F_LJUST
11 : #include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT()
12 : #include "pycore_initconfig.h" // _PyStatus_OK()
13 : #include "pycore_long.h" // _PyLong_DigitValue
14 : #include "pycore_object.h" // _PyObject_GC_TRACK
15 : #include "pycore_pymem.h" // PYMEM_CLEANBYTE
16 : #include "pycore_strhex.h" // _Py_strhex_with_sep()
17 :
18 : #include <stddef.h>
19 :
20 : /*[clinic input]
21 : class bytes "PyBytesObject *" "&PyBytes_Type"
22 : [clinic start generated code]*/
23 : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
24 :
25 : #include "clinic/bytesobject.c.h"
26 :
27 : /* PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
28 : for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
29 :
30 : Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
31 : 3 or 7 bytes per bytes object allocation on a typical system.
32 : */
33 : #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
34 :
35 : /* Forward declaration */
36 : Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
37 : char *str);
38 :
39 :
40 : #define CHARACTERS _Py_SINGLETON(bytes_characters)
41 : #define CHARACTER(ch) \
42 : ((PyBytesObject *)&(CHARACTERS[ch]));
43 : #define EMPTY (&_Py_SINGLETON(bytes_empty))
44 :
45 :
46 : // Return a borrowed reference to the empty bytes string singleton.
47 742 : static inline PyObject* bytes_get_empty(void)
48 : {
49 742 : return &EMPTY->ob_base.ob_base;
50 : }
51 :
52 :
53 : // Return a strong reference to the empty bytes string singleton.
54 8250890 : static inline PyObject* bytes_new_empty(void)
55 : {
56 8250890 : Py_INCREF(EMPTY);
57 8250890 : return (PyObject *)EMPTY;
58 : }
59 :
60 :
61 : /*
62 : For PyBytes_FromString(), the parameter `str' points to a null-terminated
63 : string containing exactly `size' bytes.
64 :
65 : For PyBytes_FromStringAndSize(), the parameter `str' is
66 : either NULL or else points to a string containing at least `size' bytes.
67 : For PyBytes_FromStringAndSize(), the string in the `str' parameter does
68 : not have to be null-terminated. (Therefore it is safe to construct a
69 : substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
70 : If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
71 : bytes (setting the last byte to the null terminating character) and you can
72 : fill in the data yourself. If `str' is non-NULL then the resulting
73 : PyBytes object must be treated as immutable and you must not fill in nor
74 : alter the data yourself, since the strings may be shared.
75 :
76 : The PyObject member `op->ob_size', which denotes the number of "extra
77 : items" in a variable-size object, will contain the number of bytes
78 : allocated for string data, not counting the null terminating character.
79 : It is therefore equal to the `size' parameter (for
80 : PyBytes_FromStringAndSize()) or the length of the string in the `str'
81 : parameter (for PyBytes_FromString()).
82 : */
83 : static PyObject *
84 64783200 : _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
85 : {
86 : PyBytesObject *op;
87 64783200 : assert(size >= 0);
88 :
89 64783200 : if (size == 0) {
90 3 : return bytes_new_empty();
91 : }
92 :
93 64783200 : if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
94 0 : PyErr_SetString(PyExc_OverflowError,
95 : "byte string is too large");
96 0 : return NULL;
97 : }
98 :
99 : /* Inline PyObject_NewVar */
100 64783200 : if (use_calloc)
101 127 : op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
102 : else
103 64783000 : op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
104 64783200 : if (op == NULL) {
105 1 : return PyErr_NoMemory();
106 : }
107 64783200 : _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
108 : _Py_COMP_DIAG_PUSH
109 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
110 64783200 : op->ob_shash = -1;
111 : _Py_COMP_DIAG_POP
112 64783200 : if (!use_calloc) {
113 64783000 : op->ob_sval[size] = '\0';
114 : }
115 64783200 : return (PyObject *) op;
116 : }
117 :
118 : PyObject *
119 101505000 : PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
120 : {
121 : PyBytesObject *op;
122 101505000 : if (size < 0) {
123 0 : PyErr_SetString(PyExc_SystemError,
124 : "Negative size passed to PyBytes_FromStringAndSize");
125 0 : return NULL;
126 : }
127 101505000 : if (size == 1 && str != NULL) {
128 29974500 : op = CHARACTER(*str & 255);
129 29974500 : Py_INCREF(op);
130 29974500 : return (PyObject *)op;
131 : }
132 71530800 : if (size == 0) {
133 6747780 : return bytes_new_empty();
134 : }
135 :
136 64783000 : op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
137 64783000 : if (op == NULL)
138 1 : return NULL;
139 64783000 : if (str == NULL)
140 34185700 : return (PyObject *) op;
141 :
142 30597400 : memcpy(op->ob_sval, str, size);
143 30597400 : return (PyObject *) op;
144 : }
145 :
146 : PyObject *
147 131364 : PyBytes_FromString(const char *str)
148 : {
149 : size_t size;
150 : PyBytesObject *op;
151 :
152 131364 : assert(str != NULL);
153 131364 : size = strlen(str);
154 131364 : if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
155 0 : PyErr_SetString(PyExc_OverflowError,
156 : "byte string is too long");
157 0 : return NULL;
158 : }
159 :
160 131364 : if (size == 0) {
161 2 : return bytes_new_empty();
162 : }
163 131362 : else if (size == 1) {
164 21 : op = CHARACTER(*str & 255);
165 21 : Py_INCREF(op);
166 21 : return (PyObject *)op;
167 : }
168 :
169 : /* Inline PyObject_NewVar */
170 131341 : op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
171 131341 : if (op == NULL) {
172 0 : return PyErr_NoMemory();
173 : }
174 131341 : _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
175 : _Py_COMP_DIAG_PUSH
176 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
177 131341 : op->ob_shash = -1;
178 : _Py_COMP_DIAG_POP
179 131341 : memcpy(op->ob_sval, str, size+1);
180 131341 : return (PyObject *) op;
181 : }
182 :
183 : PyObject *
184 10565 : PyBytes_FromFormatV(const char *format, va_list vargs)
185 : {
186 : char *s;
187 : const char *f;
188 : const char *p;
189 : Py_ssize_t prec;
190 : int longflag;
191 : int size_tflag;
192 : /* Longest 64-bit formatted numbers:
193 : - "18446744073709551615\0" (21 bytes)
194 : - "-9223372036854775808\0" (21 bytes)
195 : Decimal takes the most space (it isn't enough for octal.)
196 :
197 : Longest 64-bit pointer representation:
198 : "0xffffffffffffffff\0" (19 bytes). */
199 : char buffer[21];
200 : _PyBytesWriter writer;
201 :
202 10565 : _PyBytesWriter_Init(&writer);
203 :
204 10565 : s = _PyBytesWriter_Alloc(&writer, strlen(format));
205 10565 : if (s == NULL)
206 0 : return NULL;
207 10565 : writer.overallocate = 1;
208 :
209 : #define WRITE_BYTES(str) \
210 : do { \
211 : s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
212 : if (s == NULL) \
213 : goto error; \
214 : } while (0)
215 :
216 42264 : for (f = format; *f; f++) {
217 31703 : if (*f != '%') {
218 10601 : *s++ = *f;
219 10601 : continue;
220 : }
221 :
222 21102 : p = f++;
223 :
224 : /* ignore the width (ex: 10 in "%10s") */
225 21103 : while (Py_ISDIGIT(*f))
226 1 : f++;
227 :
228 : /* parse the precision (ex: 10 in "%.10s") */
229 21102 : prec = 0;
230 21102 : if (*f == '.') {
231 1 : f++;
232 2 : for (; Py_ISDIGIT(*f); f++) {
233 1 : prec = (prec * 10) + (*f - '0');
234 : }
235 : }
236 :
237 21102 : while (*f && *f != '%' && !Py_ISALPHA(*f))
238 0 : f++;
239 :
240 : /* handle the long flag ('l'), but only for %ld and %lu.
241 : others can be added when necessary. */
242 21102 : longflag = 0;
243 21102 : if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
244 6 : longflag = 1;
245 6 : ++f;
246 : }
247 :
248 : /* handle the size_t flag ('z'). */
249 21102 : size_tflag = 0;
250 21102 : if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
251 6 : size_tflag = 1;
252 6 : ++f;
253 : }
254 :
255 : /* subtract bytes preallocated for the format string
256 : (ex: 2 for "%s") */
257 21102 : writer.min_size -= (f - p + 1);
258 :
259 21102 : switch (*f) {
260 6 : case 'c':
261 : {
262 6 : int c = va_arg(vargs, int);
263 6 : if (c < 0 || c > 255) {
264 2 : PyErr_SetString(PyExc_OverflowError,
265 : "PyBytes_FromFormatV(): %c format "
266 : "expects an integer in range [0; 255]");
267 2 : goto error;
268 : }
269 4 : writer.min_size++;
270 4 : *s++ = (unsigned char)c;
271 4 : break;
272 : }
273 :
274 12 : case 'd':
275 12 : if (longflag) {
276 4 : sprintf(buffer, "%ld", va_arg(vargs, long));
277 : }
278 8 : else if (size_tflag) {
279 4 : sprintf(buffer, "%zd", va_arg(vargs, Py_ssize_t));
280 : }
281 : else {
282 4 : sprintf(buffer, "%d", va_arg(vargs, int));
283 : }
284 12 : assert(strlen(buffer) < sizeof(buffer));
285 12 : WRITE_BYTES(buffer);
286 12 : break;
287 :
288 5 : case 'u':
289 5 : if (longflag) {
290 2 : sprintf(buffer, "%lu", va_arg(vargs, unsigned long));
291 : }
292 3 : else if (size_tflag) {
293 2 : sprintf(buffer, "%zu", va_arg(vargs, size_t));
294 : }
295 : else {
296 1 : sprintf(buffer, "%u", va_arg(vargs, unsigned int));
297 : }
298 5 : assert(strlen(buffer) < sizeof(buffer));
299 5 : WRITE_BYTES(buffer);
300 5 : break;
301 :
302 3 : case 'i':
303 3 : sprintf(buffer, "%i", va_arg(vargs, int));
304 3 : assert(strlen(buffer) < sizeof(buffer));
305 3 : WRITE_BYTES(buffer);
306 3 : break;
307 :
308 1 : case 'x':
309 1 : sprintf(buffer, "%x", va_arg(vargs, int));
310 1 : assert(strlen(buffer) < sizeof(buffer));
311 1 : WRITE_BYTES(buffer);
312 1 : break;
313 :
314 21067 : case 's':
315 : {
316 : Py_ssize_t i;
317 :
318 21067 : p = va_arg(vargs, const char*);
319 21067 : if (prec <= 0) {
320 21066 : i = strlen(p);
321 : }
322 : else {
323 1 : i = 0;
324 4 : while (i < prec && p[i]) {
325 3 : i++;
326 : }
327 : }
328 21067 : s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
329 21067 : if (s == NULL)
330 0 : goto error;
331 21067 : break;
332 : }
333 :
334 2 : case 'p':
335 2 : sprintf(buffer, "%p", va_arg(vargs, void*));
336 2 : assert(strlen(buffer) < sizeof(buffer));
337 : /* %p is ill-defined: ensure leading 0x. */
338 2 : if (buffer[1] == 'X')
339 0 : buffer[1] = 'x';
340 2 : else if (buffer[1] != 'x') {
341 0 : memmove(buffer+2, buffer, strlen(buffer)+1);
342 0 : buffer[0] = '0';
343 0 : buffer[1] = 'x';
344 : }
345 2 : WRITE_BYTES(buffer);
346 2 : break;
347 :
348 4 : case '%':
349 4 : writer.min_size++;
350 4 : *s++ = '%';
351 4 : break;
352 :
353 2 : default:
354 2 : if (*f == 0) {
355 : /* fix min_size if we reached the end of the format string */
356 2 : writer.min_size++;
357 : }
358 :
359 : /* invalid format string: copy unformatted string and exit */
360 2 : WRITE_BYTES(p);
361 2 : return _PyBytesWriter_Finish(&writer, s);
362 : }
363 : }
364 :
365 : #undef WRITE_BYTES
366 :
367 10561 : return _PyBytesWriter_Finish(&writer, s);
368 :
369 2 : error:
370 2 : _PyBytesWriter_Dealloc(&writer);
371 2 : return NULL;
372 : }
373 :
374 : PyObject *
375 10565 : PyBytes_FromFormat(const char *format, ...)
376 : {
377 : PyObject* ret;
378 : va_list vargs;
379 :
380 10565 : va_start(vargs, format);
381 10565 : ret = PyBytes_FromFormatV(format, vargs);
382 10565 : va_end(vargs);
383 10565 : return ret;
384 : }
385 :
386 : /* Helpers for formatstring */
387 :
388 : Py_LOCAL_INLINE(PyObject *)
389 959 : getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
390 : {
391 959 : Py_ssize_t argidx = *p_argidx;
392 959 : if (argidx < arglen) {
393 959 : (*p_argidx)++;
394 959 : if (arglen < 0)
395 609 : return args;
396 : else
397 350 : return PyTuple_GetItem(args, argidx);
398 : }
399 0 : PyErr_SetString(PyExc_TypeError,
400 : "not enough arguments for format string");
401 0 : return NULL;
402 : }
403 :
404 : /* Returns a new reference to a PyBytes object, or NULL on failure. */
405 :
406 : static char*
407 24 : formatfloat(PyObject *v, int flags, int prec, int type,
408 : PyObject **p_result, _PyBytesWriter *writer, char *str)
409 : {
410 : char *p;
411 : PyObject *result;
412 : double x;
413 : size_t len;
414 24 : int dtoa_flags = 0;
415 :
416 24 : x = PyFloat_AsDouble(v);
417 24 : if (x == -1.0 && PyErr_Occurred()) {
418 2 : PyErr_Format(PyExc_TypeError, "float argument required, "
419 2 : "not %.200s", Py_TYPE(v)->tp_name);
420 2 : return NULL;
421 : }
422 :
423 22 : if (prec < 0)
424 6 : prec = 6;
425 :
426 22 : if (flags & F_ALT) {
427 16 : dtoa_flags |= Py_DTSF_ALT;
428 : }
429 22 : if (flags & F_NO_NEG_0) {
430 0 : dtoa_flags |= Py_DTSF_NO_NEG_0;
431 : }
432 22 : p = PyOS_double_to_string(x, type, prec, dtoa_flags, NULL);
433 :
434 22 : if (p == NULL)
435 0 : return NULL;
436 :
437 22 : len = strlen(p);
438 22 : if (writer != NULL) {
439 6 : str = _PyBytesWriter_Prepare(writer, str, len);
440 6 : if (str == NULL)
441 0 : return NULL;
442 6 : memcpy(str, p, len);
443 6 : PyMem_Free(p);
444 6 : str += len;
445 6 : return str;
446 : }
447 :
448 16 : result = PyBytes_FromStringAndSize(p, len);
449 16 : PyMem_Free(p);
450 16 : *p_result = result;
451 16 : return result != NULL ? str : NULL;
452 : }
453 :
454 : static PyObject *
455 190 : formatlong(PyObject *v, int flags, int prec, int type)
456 : {
457 : PyObject *result, *iobj;
458 190 : if (type == 'i')
459 0 : type = 'd';
460 190 : if (PyLong_Check(v))
461 182 : return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
462 8 : if (PyNumber_Check(v)) {
463 : /* make sure number is a type of integer for o, x, and X */
464 5 : if (type == 'o' || type == 'x' || type == 'X')
465 1 : iobj = _PyNumber_Index(v);
466 : else
467 4 : iobj = PyNumber_Long(v);
468 5 : if (iobj != NULL) {
469 4 : assert(PyLong_Check(iobj));
470 4 : result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
471 4 : Py_DECREF(iobj);
472 4 : return result;
473 : }
474 1 : if (!PyErr_ExceptionMatches(PyExc_TypeError))
475 0 : return NULL;
476 : }
477 8 : PyErr_Format(PyExc_TypeError,
478 : "%%%c format: %s is required, not %.200s", type,
479 4 : (type == 'o' || type == 'x' || type == 'X') ? "an integer"
480 : : "a real number",
481 4 : Py_TYPE(v)->tp_name);
482 4 : return NULL;
483 : }
484 :
485 : static int
486 18 : byte_converter(PyObject *arg, char *p)
487 : {
488 18 : if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
489 2 : *p = PyBytes_AS_STRING(arg)[0];
490 2 : return 1;
491 : }
492 16 : else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
493 2 : *p = PyByteArray_AS_STRING(arg)[0];
494 2 : return 1;
495 : }
496 : else {
497 : int overflow;
498 14 : long ival = PyLong_AsLongAndOverflow(arg, &overflow);
499 14 : if (ival == -1 && PyErr_Occurred()) {
500 3 : if (PyErr_ExceptionMatches(PyExc_TypeError)) {
501 3 : goto onError;
502 : }
503 11 : return 0;
504 : }
505 11 : if (!(0 <= ival && ival <= 255)) {
506 : /* this includes an overflow in converting to C long */
507 3 : PyErr_SetString(PyExc_OverflowError,
508 : "%c arg not in range(256)");
509 3 : return 0;
510 : }
511 8 : *p = (char)ival;
512 8 : return 1;
513 : }
514 3 : onError:
515 3 : PyErr_SetString(PyExc_TypeError,
516 : "%c requires an integer in range(256) or a single byte");
517 3 : return 0;
518 : }
519 :
520 : static PyObject *_PyBytes_FromBuffer(PyObject *x);
521 :
522 : static PyObject *
523 48 : format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
524 : {
525 : PyObject *func, *result;
526 : /* is it a bytes object? */
527 48 : if (PyBytes_Check(v)) {
528 34 : *pbuf = PyBytes_AS_STRING(v);
529 34 : *plen = PyBytes_GET_SIZE(v);
530 34 : Py_INCREF(v);
531 34 : return v;
532 : }
533 14 : if (PyByteArray_Check(v)) {
534 4 : *pbuf = PyByteArray_AS_STRING(v);
535 4 : *plen = PyByteArray_GET_SIZE(v);
536 4 : Py_INCREF(v);
537 4 : return v;
538 : }
539 : /* does it support __bytes__? */
540 10 : func = _PyObject_LookupSpecial(v, &_Py_ID(__bytes__));
541 10 : if (func != NULL) {
542 4 : result = _PyObject_CallNoArgs(func);
543 4 : Py_DECREF(func);
544 4 : if (result == NULL)
545 0 : return NULL;
546 4 : if (!PyBytes_Check(result)) {
547 0 : PyErr_Format(PyExc_TypeError,
548 : "__bytes__ returned non-bytes (type %.200s)",
549 0 : Py_TYPE(result)->tp_name);
550 0 : Py_DECREF(result);
551 0 : return NULL;
552 : }
553 4 : *pbuf = PyBytes_AS_STRING(result);
554 4 : *plen = PyBytes_GET_SIZE(result);
555 4 : return result;
556 : }
557 : /* does it support buffer protocol? */
558 6 : if (PyObject_CheckBuffer(v)) {
559 : /* maybe we can avoid making a copy of the buffer object here? */
560 4 : result = _PyBytes_FromBuffer(v);
561 4 : if (result == NULL)
562 0 : return NULL;
563 4 : *pbuf = PyBytes_AS_STRING(result);
564 4 : *plen = PyBytes_GET_SIZE(result);
565 4 : return result;
566 : }
567 2 : PyErr_Format(PyExc_TypeError,
568 : "%%b requires a bytes-like object, "
569 : "or an object that implements __bytes__, not '%.100s'",
570 2 : Py_TYPE(v)->tp_name);
571 2 : return NULL;
572 : }
573 :
574 : /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
575 :
576 : PyObject *
577 941 : _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
578 : PyObject *args, int use_bytearray)
579 : {
580 : const char *fmt;
581 : char *res;
582 : Py_ssize_t arglen, argidx;
583 : Py_ssize_t fmtcnt;
584 941 : int args_owned = 0;
585 941 : PyObject *dict = NULL;
586 : _PyBytesWriter writer;
587 :
588 941 : if (args == NULL) {
589 0 : PyErr_BadInternalCall();
590 0 : return NULL;
591 : }
592 941 : fmt = format;
593 941 : fmtcnt = format_len;
594 :
595 941 : _PyBytesWriter_Init(&writer);
596 941 : writer.use_bytearray = use_bytearray;
597 :
598 941 : res = _PyBytesWriter_Alloc(&writer, fmtcnt);
599 941 : if (res == NULL)
600 0 : return NULL;
601 941 : if (!use_bytearray)
602 776 : writer.overallocate = 1;
603 :
604 941 : if (PyTuple_Check(args)) {
605 329 : arglen = PyTuple_GET_SIZE(args);
606 329 : argidx = 0;
607 : }
608 : else {
609 612 : arglen = -1;
610 612 : argidx = -2;
611 : }
612 1298 : if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
613 386 : !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
614 1 : !PyByteArray_Check(args)) {
615 0 : dict = args;
616 : }
617 :
618 2519 : while (--fmtcnt >= 0) {
619 1596 : if (*fmt != '%') {
620 : Py_ssize_t len;
621 : char *pos;
622 :
623 646 : pos = (char *)memchr(fmt + 1, '%', fmtcnt);
624 646 : if (pos != NULL)
625 582 : len = pos - fmt;
626 : else
627 64 : len = fmtcnt + 1;
628 646 : assert(len != 0);
629 :
630 646 : memcpy(res, fmt, len);
631 646 : res += len;
632 646 : fmt += len;
633 646 : fmtcnt -= (len - 1);
634 : }
635 : else {
636 : /* Got a format specifier */
637 950 : int flags = 0;
638 950 : Py_ssize_t width = -1;
639 950 : int prec = -1;
640 950 : int c = '\0';
641 : int fill;
642 950 : PyObject *v = NULL;
643 950 : PyObject *temp = NULL;
644 950 : const char *pbuf = NULL;
645 : int sign;
646 950 : Py_ssize_t len = 0;
647 : char onechar; /* For byte_converter() */
648 : Py_ssize_t alloc;
649 :
650 950 : fmt++;
651 950 : if (*fmt == '%') {
652 6 : *res++ = '%';
653 6 : fmt++;
654 6 : fmtcnt--;
655 664 : continue;
656 : }
657 944 : if (*fmt == '(') {
658 : const char *keystart;
659 : Py_ssize_t keylen;
660 : PyObject *key;
661 0 : int pcount = 1;
662 :
663 0 : if (dict == NULL) {
664 0 : PyErr_SetString(PyExc_TypeError,
665 : "format requires a mapping");
666 18 : goto error;
667 : }
668 0 : ++fmt;
669 0 : --fmtcnt;
670 0 : keystart = fmt;
671 : /* Skip over balanced parentheses */
672 0 : while (pcount > 0 && --fmtcnt >= 0) {
673 0 : if (*fmt == ')')
674 0 : --pcount;
675 0 : else if (*fmt == '(')
676 0 : ++pcount;
677 0 : fmt++;
678 : }
679 0 : keylen = fmt - keystart - 1;
680 0 : if (fmtcnt < 0 || pcount > 0) {
681 0 : PyErr_SetString(PyExc_ValueError,
682 : "incomplete format key");
683 0 : goto error;
684 : }
685 0 : key = PyBytes_FromStringAndSize(keystart,
686 : keylen);
687 0 : if (key == NULL)
688 0 : goto error;
689 0 : if (args_owned) {
690 0 : Py_DECREF(args);
691 0 : args_owned = 0;
692 : }
693 0 : args = PyObject_GetItem(dict, key);
694 0 : Py_DECREF(key);
695 0 : if (args == NULL) {
696 0 : goto error;
697 : }
698 0 : args_owned = 1;
699 0 : arglen = -1;
700 0 : argidx = -2;
701 : }
702 :
703 : /* Parse flags. Example: "%+i" => flags=F_SIGN. */
704 1181 : while (--fmtcnt >= 0) {
705 1180 : switch (c = *fmt++) {
706 34 : case '-': flags |= F_LJUST; continue;
707 30 : case '+': flags |= F_SIGN; continue;
708 13 : case ' ': flags |= F_BLANK; continue;
709 110 : case '#': flags |= F_ALT; continue;
710 50 : case '0': flags |= F_ZERO; continue;
711 0 : case 'z': flags |= F_NO_NEG_0; continue;
712 : }
713 943 : break;
714 : }
715 :
716 : /* Parse width. Example: "%10s" => width=10 */
717 944 : if (c == '*') {
718 0 : v = getnextarg(args, arglen, &argidx);
719 0 : if (v == NULL)
720 0 : goto error;
721 0 : if (!PyLong_Check(v)) {
722 0 : PyErr_SetString(PyExc_TypeError,
723 : "* wants int");
724 0 : goto error;
725 : }
726 0 : width = PyLong_AsSsize_t(v);
727 0 : if (width == -1 && PyErr_Occurred())
728 0 : goto error;
729 0 : if (width < 0) {
730 0 : flags |= F_LJUST;
731 0 : width = -width;
732 : }
733 0 : if (--fmtcnt >= 0)
734 0 : c = *fmt++;
735 : }
736 944 : else if (c >= 0 && isdigit(c)) {
737 130 : width = c - '0';
738 250 : while (--fmtcnt >= 0) {
739 250 : c = Py_CHARMASK(*fmt++);
740 250 : if (!isdigit(c))
741 130 : break;
742 120 : if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
743 0 : PyErr_SetString(
744 : PyExc_ValueError,
745 : "width too big");
746 0 : goto error;
747 : }
748 120 : width = width*10 + (c - '0');
749 : }
750 : }
751 :
752 : /* Parse precision. Example: "%.3f" => prec=3 */
753 944 : if (c == '.') {
754 114 : prec = 0;
755 114 : if (--fmtcnt >= 0)
756 114 : c = *fmt++;
757 114 : if (c == '*') {
758 18 : v = getnextarg(args, arglen, &argidx);
759 18 : if (v == NULL)
760 0 : goto error;
761 18 : if (!PyLong_Check(v)) {
762 0 : PyErr_SetString(
763 : PyExc_TypeError,
764 : "* wants int");
765 0 : goto error;
766 : }
767 18 : prec = _PyLong_AsInt(v);
768 18 : if (prec == -1 && PyErr_Occurred())
769 2 : goto error;
770 16 : if (prec < 0)
771 0 : prec = 0;
772 16 : if (--fmtcnt >= 0)
773 16 : c = *fmt++;
774 : }
775 96 : else if (c >= 0 && isdigit(c)) {
776 96 : prec = c - '0';
777 190 : while (--fmtcnt >= 0) {
778 190 : c = Py_CHARMASK(*fmt++);
779 190 : if (!isdigit(c))
780 96 : break;
781 94 : if (prec > (INT_MAX - ((int)c - '0')) / 10) {
782 0 : PyErr_SetString(
783 : PyExc_ValueError,
784 : "prec too big");
785 0 : goto error;
786 : }
787 94 : prec = prec*10 + (c - '0');
788 : }
789 : }
790 : } /* prec */
791 942 : if (fmtcnt >= 0) {
792 941 : if (c == 'h' || c == 'l' || c == 'L') {
793 0 : if (--fmtcnt >= 0)
794 0 : c = *fmt++;
795 : }
796 : }
797 942 : if (fmtcnt < 0) {
798 1 : PyErr_SetString(PyExc_ValueError,
799 : "incomplete format");
800 1 : goto error;
801 : }
802 941 : v = getnextarg(args, arglen, &argidx);
803 941 : if (v == NULL)
804 0 : goto error;
805 :
806 941 : if (fmtcnt == 0) {
807 : /* last write: disable writer overallocation */
808 867 : writer.overallocate = 0;
809 : }
810 :
811 941 : sign = 0;
812 941 : fill = ' ';
813 941 : switch (c) {
814 16 : case 'r':
815 : // %r is only for 2/3 code; 3 only code should use %a
816 : case 'a':
817 16 : temp = PyObject_ASCII(v);
818 16 : if (temp == NULL)
819 0 : goto error;
820 16 : assert(PyUnicode_IS_ASCII(temp));
821 16 : pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
822 16 : len = PyUnicode_GET_LENGTH(temp);
823 16 : if (prec >= 0 && len > prec)
824 0 : len = prec;
825 16 : break;
826 :
827 48 : case 's':
828 : // %s is only for 2/3 code; 3 only code should use %b
829 : case 'b':
830 48 : temp = format_obj(v, &pbuf, &len);
831 48 : if (temp == NULL)
832 2 : goto error;
833 46 : if (prec >= 0 && len > prec)
834 0 : len = prec;
835 46 : break;
836 :
837 834 : case 'i':
838 : case 'd':
839 : case 'u':
840 : case 'o':
841 : case 'x':
842 : case 'X':
843 834 : if (PyLong_CheckExact(v)
844 826 : && width == -1 && prec == -1
845 652 : && !(flags & (F_SIGN | F_BLANK))
846 652 : && c != 'X')
847 : {
848 : /* Fast path */
849 644 : int alternate = flags & F_ALT;
850 : int base;
851 :
852 : switch(c)
853 : {
854 0 : default:
855 0 : Py_UNREACHABLE();
856 88 : case 'd':
857 : case 'i':
858 : case 'u':
859 88 : base = 10;
860 88 : break;
861 536 : case 'o':
862 536 : base = 8;
863 536 : break;
864 20 : case 'x':
865 : case 'X':
866 20 : base = 16;
867 20 : break;
868 : }
869 :
870 : /* Fast path */
871 644 : writer.min_size -= 2; /* size preallocated for "%d" */
872 644 : res = _PyLong_FormatBytesWriter(&writer, res,
873 : v, base, alternate);
874 644 : if (res == NULL)
875 0 : goto error;
876 644 : continue;
877 : }
878 :
879 190 : temp = formatlong(v, flags, prec, c);
880 190 : if (!temp)
881 4 : goto error;
882 186 : assert(PyUnicode_IS_ASCII(temp));
883 186 : pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
884 186 : len = PyUnicode_GET_LENGTH(temp);
885 186 : sign = 1;
886 186 : if (flags & F_ZERO)
887 50 : fill = '0';
888 186 : break;
889 :
890 24 : case 'e':
891 : case 'E':
892 : case 'f':
893 : case 'F':
894 : case 'g':
895 : case 'G':
896 24 : if (width == -1 && prec == -1
897 8 : && !(flags & (F_SIGN | F_BLANK)))
898 : {
899 : /* Fast path */
900 8 : writer.min_size -= 2; /* size preallocated for "%f" */
901 8 : res = formatfloat(v, flags, prec, c, NULL, &writer, res);
902 8 : if (res == NULL)
903 2 : goto error;
904 6 : continue;
905 : }
906 :
907 16 : if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
908 0 : goto error;
909 16 : pbuf = PyBytes_AS_STRING(temp);
910 16 : len = PyBytes_GET_SIZE(temp);
911 16 : sign = 1;
912 16 : if (flags & F_ZERO)
913 0 : fill = '0';
914 16 : break;
915 :
916 18 : case 'c':
917 18 : pbuf = &onechar;
918 18 : len = byte_converter(v, &onechar);
919 18 : if (!len)
920 6 : goto error;
921 12 : if (width == -1) {
922 : /* Fast path */
923 8 : *res++ = onechar;
924 8 : continue;
925 : }
926 4 : break;
927 :
928 1 : default:
929 1 : PyErr_Format(PyExc_ValueError,
930 : "unsupported format character '%c' (0x%x) "
931 : "at index %zd",
932 : c, c,
933 1 : (Py_ssize_t)(fmt - 1 - format));
934 1 : goto error;
935 : }
936 :
937 268 : if (sign) {
938 202 : if (*pbuf == '-' || *pbuf == '+') {
939 60 : sign = *pbuf++;
940 60 : len--;
941 : }
942 142 : else if (flags & F_SIGN)
943 30 : sign = '+';
944 112 : else if (flags & F_BLANK)
945 12 : sign = ' ';
946 : else
947 100 : sign = 0;
948 : }
949 268 : if (width < len)
950 146 : width = len;
951 :
952 268 : alloc = width;
953 268 : if (sign != 0 && len == width)
954 34 : alloc++;
955 : /* 2: size preallocated for %s */
956 268 : if (alloc > 2) {
957 262 : res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
958 262 : if (res == NULL)
959 0 : goto error;
960 : }
961 : #ifndef NDEBUG
962 268 : char *before = res;
963 : #endif
964 :
965 : /* Write the sign if needed */
966 268 : if (sign) {
967 102 : if (fill != ' ')
968 32 : *res++ = sign;
969 102 : if (width > len)
970 68 : width--;
971 : }
972 :
973 : /* Write the numeric prefix for "x", "X" and "o" formats
974 : if the alternate form is used.
975 : For example, write "0x" for the "%#x" format. */
976 268 : if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
977 78 : assert(pbuf[0] == '0');
978 78 : assert(pbuf[1] == c);
979 78 : if (fill != ' ') {
980 18 : *res++ = *pbuf++;
981 18 : *res++ = *pbuf++;
982 : }
983 78 : width -= 2;
984 78 : if (width < 0)
985 0 : width = 0;
986 78 : len -= 2;
987 : }
988 :
989 : /* Pad left with the fill character if needed */
990 268 : if (width > len && !(flags & F_LJUST)) {
991 80 : memset(res, fill, width - len);
992 80 : res += (width - len);
993 80 : width = len;
994 : }
995 :
996 : /* If padding with spaces: write sign if needed and/or numeric
997 : prefix if the alternate form is used */
998 268 : if (fill == ' ') {
999 218 : if (sign)
1000 70 : *res++ = sign;
1001 218 : if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1002 60 : assert(pbuf[0] == '0');
1003 60 : assert(pbuf[1] == c);
1004 60 : *res++ = *pbuf++;
1005 60 : *res++ = *pbuf++;
1006 : }
1007 : }
1008 :
1009 : /* Copy bytes */
1010 268 : memcpy(res, pbuf, len);
1011 268 : res += len;
1012 :
1013 : /* Pad right with the fill character if needed */
1014 268 : if (width > len) {
1015 34 : memset(res, ' ', width - len);
1016 34 : res += (width - len);
1017 : }
1018 :
1019 268 : if (dict && (argidx < arglen)) {
1020 0 : PyErr_SetString(PyExc_TypeError,
1021 : "not all arguments converted during bytes formatting");
1022 0 : Py_XDECREF(temp);
1023 0 : goto error;
1024 : }
1025 268 : Py_XDECREF(temp);
1026 :
1027 : #ifndef NDEBUG
1028 : /* check that we computed the exact size for this write */
1029 268 : assert((res - before) == alloc);
1030 : #endif
1031 : } /* '%' */
1032 :
1033 : /* If overallocation was disabled, ensure that it was the last
1034 : write. Otherwise, we missed an optimization */
1035 914 : assert(writer.overallocate || fmtcnt == 0 || use_bytearray);
1036 : } /* until end */
1037 :
1038 923 : if (argidx < arglen && !dict) {
1039 3 : PyErr_SetString(PyExc_TypeError,
1040 : "not all arguments converted during bytes formatting");
1041 3 : goto error;
1042 : }
1043 :
1044 920 : if (args_owned) {
1045 0 : Py_DECREF(args);
1046 : }
1047 920 : return _PyBytesWriter_Finish(&writer, res);
1048 :
1049 21 : error:
1050 21 : _PyBytesWriter_Dealloc(&writer);
1051 21 : if (args_owned) {
1052 0 : Py_DECREF(args);
1053 : }
1054 21 : return NULL;
1055 : }
1056 :
1057 : /* Unescape a backslash-escaped string. */
1058 27589 : PyObject *_PyBytes_DecodeEscape(const char *s,
1059 : Py_ssize_t len,
1060 : const char *errors,
1061 : const char **first_invalid_escape)
1062 : {
1063 : int c;
1064 : char *p;
1065 : const char *end;
1066 : _PyBytesWriter writer;
1067 :
1068 27589 : _PyBytesWriter_Init(&writer);
1069 :
1070 27589 : p = _PyBytesWriter_Alloc(&writer, len);
1071 27589 : if (p == NULL)
1072 0 : return NULL;
1073 27589 : writer.overallocate = 1;
1074 :
1075 27589 : *first_invalid_escape = NULL;
1076 :
1077 27589 : end = s + len;
1078 323939 : while (s < end) {
1079 296359 : if (*s != '\\') {
1080 218374 : *p++ = *s++;
1081 218374 : continue;
1082 : }
1083 :
1084 77985 : s++;
1085 77985 : if (s == end) {
1086 3 : PyErr_SetString(PyExc_ValueError,
1087 : "Trailing \\ in string");
1088 3 : goto failed;
1089 : }
1090 :
1091 77982 : switch (*s++) {
1092 : /* XXX This assumes ASCII! */
1093 57 : case '\n': break;
1094 1847 : case '\\': *p++ = '\\'; break;
1095 143 : case '\'': *p++ = '\''; break;
1096 65 : case '\"': *p++ = '\"'; break;
1097 2 : case 'b': *p++ = '\b'; break;
1098 59 : case 'f': *p++ = '\014'; break; /* FF */
1099 1181 : case 't': *p++ = '\t'; break;
1100 8729 : case 'n': *p++ = '\n'; break;
1101 3278 : case 'r': *p++ = '\r'; break;
1102 37 : case 'v': *p++ = '\013'; break; /* VT */
1103 2 : case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1104 3637 : case '0': case '1': case '2': case '3':
1105 : case '4': case '5': case '6': case '7':
1106 3637 : c = s[-1] - '0';
1107 3637 : if (s < end && '0' <= *s && *s <= '7') {
1108 2436 : c = (c<<3) + *s++ - '0';
1109 2436 : if (s < end && '0' <= *s && *s <= '7')
1110 2420 : c = (c<<3) + *s++ - '0';
1111 : }
1112 3637 : if (c > 0377) {
1113 514 : if (*first_invalid_escape == NULL) {
1114 514 : *first_invalid_escape = s-3; /* Back up 3 chars, since we've
1115 : already incremented s. */
1116 : }
1117 : }
1118 3637 : *p++ = c;
1119 3637 : break;
1120 58790 : case 'x':
1121 58790 : if (s+1 < end) {
1122 : int digit1, digit2;
1123 58781 : digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1124 58781 : digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1125 58781 : if (digit1 < 16 && digit2 < 16) {
1126 58776 : *p++ = (unsigned char)((digit1 << 4) + digit2);
1127 58776 : s += 2;
1128 58776 : break;
1129 : }
1130 : }
1131 : /* invalid hexadecimal digits */
1132 :
1133 14 : if (!errors || strcmp(errors, "strict") == 0) {
1134 6 : PyErr_Format(PyExc_ValueError,
1135 : "invalid \\x escape at position %zd",
1136 6 : s - 2 - (end - len));
1137 6 : goto failed;
1138 : }
1139 8 : if (strcmp(errors, "replace") == 0) {
1140 4 : *p++ = '?';
1141 4 : } else if (strcmp(errors, "ignore") == 0)
1142 : /* do nothing */;
1143 : else {
1144 0 : PyErr_Format(PyExc_ValueError,
1145 : "decoding error; unknown "
1146 : "error handling code: %.400s",
1147 : errors);
1148 0 : goto failed;
1149 : }
1150 : /* skip \x */
1151 8 : if (s < end && Py_ISXDIGIT(s[0]))
1152 4 : s++; /* and a hexdigit */
1153 8 : break;
1154 :
1155 155 : default:
1156 155 : if (*first_invalid_escape == NULL) {
1157 155 : *first_invalid_escape = s-1; /* Back up one char, since we've
1158 : already incremented s. */
1159 : }
1160 155 : *p++ = '\\';
1161 155 : s--;
1162 : }
1163 : }
1164 :
1165 27580 : return _PyBytesWriter_Finish(&writer, p);
1166 :
1167 9 : failed:
1168 9 : _PyBytesWriter_Dealloc(&writer);
1169 9 : return NULL;
1170 : }
1171 :
1172 8403 : PyObject *PyBytes_DecodeEscape(const char *s,
1173 : Py_ssize_t len,
1174 : const char *errors,
1175 : Py_ssize_t Py_UNUSED(unicode),
1176 : const char *Py_UNUSED(recode_encoding))
1177 : {
1178 : const char* first_invalid_escape;
1179 8403 : PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
1180 : &first_invalid_escape);
1181 8403 : if (result == NULL)
1182 7 : return NULL;
1183 8396 : if (first_invalid_escape != NULL) {
1184 303 : unsigned char c = *first_invalid_escape;
1185 303 : if ('4' <= c && c <= '7') {
1186 256 : if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1187 : "invalid octal escape sequence '\\%.3s'",
1188 : first_invalid_escape) < 0)
1189 : {
1190 0 : Py_DECREF(result);
1191 0 : return NULL;
1192 : }
1193 : }
1194 : else {
1195 47 : if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1196 : "invalid escape sequence '\\%c'",
1197 : c) < 0)
1198 : {
1199 0 : Py_DECREF(result);
1200 0 : return NULL;
1201 : }
1202 : }
1203 : }
1204 8396 : return result;
1205 :
1206 : }
1207 : /* -------------------------------------------------------------------- */
1208 : /* object api */
1209 :
1210 : Py_ssize_t
1211 185145 : PyBytes_Size(PyObject *op)
1212 : {
1213 185145 : if (!PyBytes_Check(op)) {
1214 0 : PyErr_Format(PyExc_TypeError,
1215 0 : "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1216 0 : return -1;
1217 : }
1218 185145 : return Py_SIZE(op);
1219 : }
1220 :
1221 : char *
1222 45015500 : PyBytes_AsString(PyObject *op)
1223 : {
1224 45015500 : if (!PyBytes_Check(op)) {
1225 0 : PyErr_Format(PyExc_TypeError,
1226 0 : "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1227 0 : return NULL;
1228 : }
1229 45015500 : return ((PyBytesObject *)op)->ob_sval;
1230 : }
1231 :
1232 : int
1233 283477 : PyBytes_AsStringAndSize(PyObject *obj,
1234 : char **s,
1235 : Py_ssize_t *len)
1236 : {
1237 283477 : if (s == NULL) {
1238 0 : PyErr_BadInternalCall();
1239 0 : return -1;
1240 : }
1241 :
1242 283477 : if (!PyBytes_Check(obj)) {
1243 0 : PyErr_Format(PyExc_TypeError,
1244 0 : "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1245 0 : return -1;
1246 : }
1247 :
1248 283477 : *s = PyBytes_AS_STRING(obj);
1249 283477 : if (len != NULL)
1250 1819 : *len = PyBytes_GET_SIZE(obj);
1251 281658 : else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1252 7 : PyErr_SetString(PyExc_ValueError,
1253 : "embedded null byte");
1254 7 : return -1;
1255 : }
1256 283470 : return 0;
1257 : }
1258 :
1259 : /* -------------------------------------------------------------------- */
1260 : /* Methods */
1261 :
1262 : #define STRINGLIB_GET_EMPTY() bytes_get_empty()
1263 :
1264 : #include "stringlib/stringdefs.h"
1265 : #define STRINGLIB_MUTABLE 0
1266 :
1267 : #include "stringlib/fastsearch.h"
1268 : #include "stringlib/count.h"
1269 : #include "stringlib/find.h"
1270 : #include "stringlib/join.h"
1271 : #include "stringlib/partition.h"
1272 : #include "stringlib/split.h"
1273 : #include "stringlib/ctype.h"
1274 :
1275 : #include "stringlib/transmogrify.h"
1276 :
1277 : #undef STRINGLIB_GET_EMPTY
1278 :
1279 : Py_ssize_t
1280 314 : _PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
1281 : const char *needle, Py_ssize_t len_needle,
1282 : Py_ssize_t offset)
1283 : {
1284 314 : return stringlib_find(haystack, len_haystack,
1285 : needle, len_needle, offset);
1286 : }
1287 :
1288 : Py_ssize_t
1289 7 : _PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
1290 : const char *needle, Py_ssize_t len_needle,
1291 : Py_ssize_t offset)
1292 : {
1293 7 : return stringlib_rfind(haystack, len_haystack,
1294 : needle, len_needle, offset);
1295 : }
1296 :
1297 : PyObject *
1298 76463 : PyBytes_Repr(PyObject *obj, int smartquotes)
1299 : {
1300 76463 : PyBytesObject* op = (PyBytesObject*) obj;
1301 76463 : Py_ssize_t i, length = Py_SIZE(op);
1302 : Py_ssize_t newsize, squotes, dquotes;
1303 : PyObject *v;
1304 : unsigned char quote;
1305 : const unsigned char *s;
1306 : Py_UCS1 *p;
1307 :
1308 : /* Compute size of output string */
1309 76463 : squotes = dquotes = 0;
1310 76463 : newsize = 3; /* b'' */
1311 76463 : s = (const unsigned char*)op->ob_sval;
1312 1452880 : for (i = 0; i < length; i++) {
1313 1376410 : Py_ssize_t incr = 1;
1314 1376410 : switch(s[i]) {
1315 2693 : case '\'': squotes++; break;
1316 4065 : case '"': dquotes++; break;
1317 42528 : case '\\': case '\t': case '\n': case '\r':
1318 42528 : incr = 2; break; /* \C */
1319 1327130 : default:
1320 1327130 : if (s[i] < ' ' || s[i] >= 0x7f)
1321 820125 : incr = 4; /* \xHH */
1322 : }
1323 1376410 : if (newsize > PY_SSIZE_T_MAX - incr)
1324 0 : goto overflow;
1325 1376410 : newsize += incr;
1326 : }
1327 76463 : quote = '\'';
1328 76463 : if (smartquotes && squotes && !dquotes)
1329 838 : quote = '"';
1330 76463 : if (squotes && quote == '\'') {
1331 413 : if (newsize > PY_SSIZE_T_MAX - squotes)
1332 0 : goto overflow;
1333 413 : newsize += squotes;
1334 : }
1335 :
1336 76463 : v = PyUnicode_New(newsize, 127);
1337 76463 : if (v == NULL) {
1338 0 : return NULL;
1339 : }
1340 76463 : p = PyUnicode_1BYTE_DATA(v);
1341 :
1342 76463 : *p++ = 'b', *p++ = quote;
1343 1452880 : for (i = 0; i < length; i++) {
1344 1376410 : unsigned char c = op->ob_sval[i];
1345 1376410 : if (c == quote || c == '\\')
1346 2690 : *p++ = '\\', *p++ = c;
1347 1373720 : else if (c == '\t')
1348 13679 : *p++ = '\\', *p++ = 't';
1349 1360040 : else if (c == '\n')
1350 15304 : *p++ = '\\', *p++ = 'n';
1351 1344740 : else if (c == '\r')
1352 11820 : *p++ = '\\', *p++ = 'r';
1353 1332920 : else if (c < ' ' || c >= 0x7f) {
1354 820125 : *p++ = '\\';
1355 820125 : *p++ = 'x';
1356 820125 : *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1357 820125 : *p++ = Py_hexdigits[c & 0xf];
1358 : }
1359 : else
1360 512794 : *p++ = c;
1361 : }
1362 76463 : *p++ = quote;
1363 76463 : assert(_PyUnicode_CheckConsistency(v, 1));
1364 76463 : return v;
1365 :
1366 0 : overflow:
1367 0 : PyErr_SetString(PyExc_OverflowError,
1368 : "bytes object is too large to make repr");
1369 0 : return NULL;
1370 : }
1371 :
1372 : static PyObject *
1373 76463 : bytes_repr(PyObject *op)
1374 : {
1375 76463 : return PyBytes_Repr(op, 1);
1376 : }
1377 :
1378 : static PyObject *
1379 38 : bytes_str(PyObject *op)
1380 : {
1381 38 : if (_Py_GetConfig()->bytes_warning) {
1382 28 : if (PyErr_WarnEx(PyExc_BytesWarning,
1383 : "str() on a bytes instance", 1)) {
1384 2 : return NULL;
1385 : }
1386 : }
1387 36 : return bytes_repr(op);
1388 : }
1389 :
1390 : static Py_ssize_t
1391 14094200 : bytes_length(PyBytesObject *a)
1392 : {
1393 14094200 : return Py_SIZE(a);
1394 : }
1395 :
1396 : /* This is also used by PyBytes_Concat() */
1397 : static PyObject *
1398 2389720 : bytes_concat(PyObject *a, PyObject *b)
1399 : {
1400 : Py_buffer va, vb;
1401 2389720 : PyObject *result = NULL;
1402 :
1403 2389720 : va.len = -1;
1404 2389720 : vb.len = -1;
1405 4779450 : if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1406 2389720 : PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1407 6 : PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1408 6 : Py_TYPE(b)->tp_name, Py_TYPE(a)->tp_name);
1409 6 : goto done;
1410 : }
1411 :
1412 : /* Optimize end cases */
1413 2389720 : if (va.len == 0 && PyBytes_CheckExact(b)) {
1414 315293 : result = b;
1415 315293 : Py_INCREF(result);
1416 315293 : goto done;
1417 : }
1418 2074420 : if (vb.len == 0 && PyBytes_CheckExact(a)) {
1419 39765 : result = a;
1420 39765 : Py_INCREF(result);
1421 39765 : goto done;
1422 : }
1423 :
1424 2034660 : if (va.len > PY_SSIZE_T_MAX - vb.len) {
1425 0 : PyErr_NoMemory();
1426 0 : goto done;
1427 : }
1428 :
1429 2034660 : result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1430 2034660 : if (result != NULL) {
1431 2034660 : memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1432 2034660 : memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1433 : }
1434 :
1435 0 : done:
1436 2389720 : if (va.len != -1)
1437 2389720 : PyBuffer_Release(&va);
1438 2389720 : if (vb.len != -1)
1439 2389720 : PyBuffer_Release(&vb);
1440 2389720 : return result;
1441 : }
1442 :
1443 : static PyObject *
1444 44398 : bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1445 : {
1446 : Py_ssize_t size;
1447 : PyBytesObject *op;
1448 : size_t nbytes;
1449 44398 : if (n < 0)
1450 92 : n = 0;
1451 : /* watch out for overflows: the size can overflow int,
1452 : * and the # of bytes needed can overflow size_t
1453 : */
1454 44398 : if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1455 6 : PyErr_SetString(PyExc_OverflowError,
1456 : "repeated bytes are too long");
1457 6 : return NULL;
1458 : }
1459 44392 : size = Py_SIZE(a) * n;
1460 44392 : if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1461 1889 : Py_INCREF(a);
1462 1889 : return (PyObject *)a;
1463 : }
1464 42503 : nbytes = (size_t)size;
1465 42503 : if (nbytes + PyBytesObject_SIZE <= nbytes) {
1466 0 : PyErr_SetString(PyExc_OverflowError,
1467 : "repeated bytes are too long");
1468 0 : return NULL;
1469 : }
1470 42503 : op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + nbytes);
1471 42503 : if (op == NULL) {
1472 0 : return PyErr_NoMemory();
1473 : }
1474 42503 : _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
1475 : _Py_COMP_DIAG_PUSH
1476 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1477 42503 : op->ob_shash = -1;
1478 : _Py_COMP_DIAG_POP
1479 42503 : op->ob_sval[size] = '\0';
1480 :
1481 42503 : _PyBytes_Repeat(op->ob_sval, size, a->ob_sval, Py_SIZE(a));
1482 :
1483 42503 : return (PyObject *) op;
1484 : }
1485 :
1486 : static int
1487 495256 : bytes_contains(PyObject *self, PyObject *arg)
1488 : {
1489 495256 : return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1490 : }
1491 :
1492 : static PyObject *
1493 116 : bytes_item(PyBytesObject *a, Py_ssize_t i)
1494 : {
1495 116 : if (i < 0 || i >= Py_SIZE(a)) {
1496 2 : PyErr_SetString(PyExc_IndexError, "index out of range");
1497 2 : return NULL;
1498 : }
1499 114 : return _PyLong_FromUnsignedChar((unsigned char)a->ob_sval[i]);
1500 : }
1501 :
1502 : static int
1503 7649540 : bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1504 : {
1505 : int cmp;
1506 : Py_ssize_t len;
1507 :
1508 7649540 : len = Py_SIZE(a);
1509 7649540 : if (Py_SIZE(b) != len)
1510 1309980 : return 0;
1511 :
1512 6339560 : if (a->ob_sval[0] != b->ob_sval[0])
1513 3462210 : return 0;
1514 :
1515 2877350 : cmp = memcmp(a->ob_sval, b->ob_sval, len);
1516 2877350 : return (cmp == 0);
1517 : }
1518 :
1519 : static PyObject*
1520 7884940 : bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1521 : {
1522 : int c;
1523 : Py_ssize_t len_a, len_b;
1524 : Py_ssize_t min_len;
1525 :
1526 : /* Make sure both arguments are strings. */
1527 7884940 : if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1528 79783 : if (_Py_GetConfig()->bytes_warning && (op == Py_EQ || op == Py_NE)) {
1529 79744 : if (PyUnicode_Check(a) || PyUnicode_Check(b)) {
1530 448 : if (PyErr_WarnEx(PyExc_BytesWarning,
1531 : "Comparison between bytes and string", 1))
1532 1 : return NULL;
1533 : }
1534 79743 : if (PyLong_Check(a) || PyLong_Check(b)) {
1535 28 : if (PyErr_WarnEx(PyExc_BytesWarning,
1536 : "Comparison between bytes and int", 1))
1537 0 : return NULL;
1538 : }
1539 : }
1540 79782 : Py_RETURN_NOTIMPLEMENTED;
1541 : }
1542 7805160 : else if (a == b) {
1543 112069 : switch (op) {
1544 24376 : case Py_EQ:
1545 : case Py_LE:
1546 : case Py_GE:
1547 : /* a byte string is equal to itself */
1548 24376 : Py_RETURN_TRUE;
1549 87693 : case Py_NE:
1550 : case Py_LT:
1551 : case Py_GT:
1552 87693 : Py_RETURN_FALSE;
1553 0 : default:
1554 0 : PyErr_BadArgument();
1555 0 : return NULL;
1556 : }
1557 : }
1558 7693090 : else if (op == Py_EQ || op == Py_NE) {
1559 7649540 : int eq = bytes_compare_eq(a, b);
1560 7649540 : eq ^= (op == Py_NE);
1561 7649540 : return PyBool_FromLong(eq);
1562 : }
1563 : else {
1564 43556 : len_a = Py_SIZE(a);
1565 43556 : len_b = Py_SIZE(b);
1566 43556 : min_len = Py_MIN(len_a, len_b);
1567 43556 : if (min_len > 0) {
1568 43522 : c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1569 43522 : if (c == 0)
1570 32260 : c = memcmp(a->ob_sval, b->ob_sval, min_len);
1571 : }
1572 : else
1573 34 : c = 0;
1574 43556 : if (c != 0)
1575 39588 : Py_RETURN_RICHCOMPARE(c, 0, op);
1576 3968 : Py_RETURN_RICHCOMPARE(len_a, len_b, op);
1577 : }
1578 : }
1579 :
1580 : static Py_hash_t
1581 4219360 : bytes_hash(PyBytesObject *a)
1582 : {
1583 : _Py_COMP_DIAG_PUSH
1584 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
1585 4219360 : if (a->ob_shash == -1) {
1586 : /* Can't fail */
1587 2742220 : a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1588 : }
1589 4219360 : return a->ob_shash;
1590 : _Py_COMP_DIAG_POP
1591 : }
1592 :
1593 : static PyObject*
1594 7012000 : bytes_subscript(PyBytesObject* self, PyObject* item)
1595 : {
1596 7012000 : if (_PyIndex_Check(item)) {
1597 3058400 : Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1598 3058400 : if (i == -1 && PyErr_Occurred())
1599 6 : return NULL;
1600 3058400 : if (i < 0)
1601 5016 : i += PyBytes_GET_SIZE(self);
1602 3058400 : if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1603 20 : PyErr_SetString(PyExc_IndexError,
1604 : "index out of range");
1605 20 : return NULL;
1606 : }
1607 3058380 : return _PyLong_FromUnsignedChar((unsigned char)self->ob_sval[i]);
1608 : }
1609 3953590 : else if (PySlice_Check(item)) {
1610 : Py_ssize_t start, stop, step, slicelength, i;
1611 : size_t cur;
1612 : const char* source_buf;
1613 : char* result_buf;
1614 : PyObject* result;
1615 :
1616 3953590 : if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1617 2 : return NULL;
1618 : }
1619 3953590 : slicelength = PySlice_AdjustIndices(PyBytes_GET_SIZE(self), &start,
1620 : &stop, step);
1621 :
1622 3953590 : if (slicelength <= 0) {
1623 340650 : return PyBytes_FromStringAndSize("", 0);
1624 : }
1625 4355810 : else if (start == 0 && step == 1 &&
1626 858204 : slicelength == PyBytes_GET_SIZE(self) &&
1627 115331 : PyBytes_CheckExact(self)) {
1628 115331 : Py_INCREF(self);
1629 115331 : return (PyObject *)self;
1630 : }
1631 3497610 : else if (step == 1) {
1632 3482340 : return PyBytes_FromStringAndSize(
1633 3482340 : PyBytes_AS_STRING(self) + start,
1634 : slicelength);
1635 : }
1636 : else {
1637 15263 : source_buf = PyBytes_AS_STRING(self);
1638 15263 : result = PyBytes_FromStringAndSize(NULL, slicelength);
1639 15263 : if (result == NULL)
1640 0 : return NULL;
1641 :
1642 15263 : result_buf = PyBytes_AS_STRING(result);
1643 200366 : for (cur = start, i = 0; i < slicelength;
1644 185103 : cur += step, i++) {
1645 185103 : result_buf[i] = source_buf[cur];
1646 : }
1647 :
1648 15263 : return result;
1649 : }
1650 : }
1651 : else {
1652 4 : PyErr_Format(PyExc_TypeError,
1653 : "byte indices must be integers or slices, not %.200s",
1654 4 : Py_TYPE(item)->tp_name);
1655 4 : return NULL;
1656 : }
1657 : }
1658 :
1659 : static int
1660 18835900 : bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1661 : {
1662 18835900 : return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1663 : 1, flags);
1664 : }
1665 :
1666 : static PySequenceMethods bytes_as_sequence = {
1667 : (lenfunc)bytes_length, /*sq_length*/
1668 : (binaryfunc)bytes_concat, /*sq_concat*/
1669 : (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1670 : (ssizeargfunc)bytes_item, /*sq_item*/
1671 : 0, /*sq_slice*/
1672 : 0, /*sq_ass_item*/
1673 : 0, /*sq_ass_slice*/
1674 : (objobjproc)bytes_contains /*sq_contains*/
1675 : };
1676 :
1677 : static PyMappingMethods bytes_as_mapping = {
1678 : (lenfunc)bytes_length,
1679 : (binaryfunc)bytes_subscript,
1680 : 0,
1681 : };
1682 :
1683 : static PyBufferProcs bytes_as_buffer = {
1684 : (getbufferproc)bytes_buffer_getbuffer,
1685 : NULL,
1686 : };
1687 :
1688 :
1689 : /*[clinic input]
1690 : bytes.__bytes__
1691 : Convert this value to exact type bytes.
1692 : [clinic start generated code]*/
1693 :
1694 : static PyObject *
1695 115419 : bytes___bytes___impl(PyBytesObject *self)
1696 : /*[clinic end generated code: output=63a306a9bc0caac5 input=34ec5ddba98bd6bb]*/
1697 : {
1698 115419 : if (PyBytes_CheckExact(self)) {
1699 110771 : Py_INCREF(self);
1700 110771 : return (PyObject *)self;
1701 : }
1702 : else {
1703 4648 : return PyBytes_FromStringAndSize(self->ob_sval, Py_SIZE(self));
1704 : }
1705 : }
1706 :
1707 :
1708 : #define LEFTSTRIP 0
1709 : #define RIGHTSTRIP 1
1710 : #define BOTHSTRIP 2
1711 :
1712 : /*[clinic input]
1713 : bytes.split
1714 :
1715 : sep: object = None
1716 : The delimiter according which to split the bytes.
1717 : None (the default value) means split on ASCII whitespace characters
1718 : (space, tab, return, newline, formfeed, vertical tab).
1719 : maxsplit: Py_ssize_t = -1
1720 : Maximum number of splits to do.
1721 : -1 (the default value) means no limit.
1722 :
1723 : Return a list of the sections in the bytes, using sep as the delimiter.
1724 : [clinic start generated code]*/
1725 :
1726 : static PyObject *
1727 7991 : bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1728 : /*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1729 : {
1730 7991 : Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1731 7991 : const char *s = PyBytes_AS_STRING(self), *sub;
1732 : Py_buffer vsub;
1733 : PyObject *list;
1734 :
1735 7991 : if (maxsplit < 0)
1736 3322 : maxsplit = PY_SSIZE_T_MAX;
1737 7991 : if (sep == Py_None)
1738 122 : return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1739 7869 : if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1740 7 : return NULL;
1741 7862 : sub = vsub.buf;
1742 7862 : n = vsub.len;
1743 :
1744 7862 : list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1745 7862 : PyBuffer_Release(&vsub);
1746 7862 : return list;
1747 : }
1748 :
1749 : /*[clinic input]
1750 : bytes.partition
1751 :
1752 : sep: Py_buffer
1753 : /
1754 :
1755 : Partition the bytes into three parts using the given separator.
1756 :
1757 : This will search for the separator sep in the bytes. If the separator is found,
1758 : returns a 3-tuple containing the part before the separator, the separator
1759 : itself, and the part after it.
1760 :
1761 : If the separator is not found, returns a 3-tuple containing the original bytes
1762 : object and two empty bytes objects.
1763 : [clinic start generated code]*/
1764 :
1765 : static PyObject *
1766 889 : bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1767 : /*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1768 : {
1769 889 : return stringlib_partition(
1770 : (PyObject*) self,
1771 889 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1772 889 : sep->obj, (const char *)sep->buf, sep->len
1773 : );
1774 : }
1775 :
1776 : /*[clinic input]
1777 : bytes.rpartition
1778 :
1779 : sep: Py_buffer
1780 : /
1781 :
1782 : Partition the bytes into three parts using the given separator.
1783 :
1784 : This will search for the separator sep in the bytes, starting at the end. If
1785 : the separator is found, returns a 3-tuple containing the part before the
1786 : separator, the separator itself, and the part after it.
1787 :
1788 : If the separator is not found, returns a 3-tuple containing two empty bytes
1789 : objects and the original bytes object.
1790 : [clinic start generated code]*/
1791 :
1792 : static PyObject *
1793 341 : bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1794 : /*[clinic end generated code: output=191b114cbb028e50 input=d78db010c8cfdbe1]*/
1795 : {
1796 341 : return stringlib_rpartition(
1797 : (PyObject*) self,
1798 341 : PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1799 341 : sep->obj, (const char *)sep->buf, sep->len
1800 : );
1801 : }
1802 :
1803 : /*[clinic input]
1804 : bytes.rsplit = bytes.split
1805 :
1806 : Return a list of the sections in the bytes, using sep as the delimiter.
1807 :
1808 : Splitting is done starting at the end of the bytes and working to the front.
1809 : [clinic start generated code]*/
1810 :
1811 : static PyObject *
1812 87 : bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1813 : /*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1814 : {
1815 87 : Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1816 87 : const char *s = PyBytes_AS_STRING(self), *sub;
1817 : Py_buffer vsub;
1818 : PyObject *list;
1819 :
1820 87 : if (maxsplit < 0)
1821 42 : maxsplit = PY_SSIZE_T_MAX;
1822 87 : if (sep == Py_None)
1823 39 : return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1824 48 : if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1825 2 : return NULL;
1826 46 : sub = vsub.buf;
1827 46 : n = vsub.len;
1828 :
1829 46 : list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1830 46 : PyBuffer_Release(&vsub);
1831 46 : return list;
1832 : }
1833 :
1834 :
1835 : /*[clinic input]
1836 : bytes.join
1837 :
1838 : iterable_of_bytes: object
1839 : /
1840 :
1841 : Concatenate any number of bytes objects.
1842 :
1843 : The bytes whose method is called is inserted in between each pair.
1844 :
1845 : The result is returned as a new bytes object.
1846 :
1847 : Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1848 : [clinic start generated code]*/
1849 :
1850 : static PyObject *
1851 73431 : bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1852 : /*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1853 : {
1854 73431 : return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1855 : }
1856 :
1857 : PyObject *
1858 25606 : _PyBytes_Join(PyObject *sep, PyObject *x)
1859 : {
1860 25606 : assert(sep != NULL && PyBytes_Check(sep));
1861 25606 : assert(x != NULL);
1862 25606 : return bytes_join((PyBytesObject*)sep, x);
1863 : }
1864 :
1865 : static PyObject *
1866 360579 : bytes_find(PyBytesObject *self, PyObject *args)
1867 : {
1868 360579 : return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1869 : }
1870 :
1871 : static PyObject *
1872 92 : bytes_index(PyBytesObject *self, PyObject *args)
1873 : {
1874 92 : return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1875 : }
1876 :
1877 :
1878 : static PyObject *
1879 138938 : bytes_rfind(PyBytesObject *self, PyObject *args)
1880 : {
1881 138938 : return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1882 : }
1883 :
1884 :
1885 : static PyObject *
1886 47 : bytes_rindex(PyBytesObject *self, PyObject *args)
1887 : {
1888 47 : return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1889 : }
1890 :
1891 :
1892 : Py_LOCAL_INLINE(PyObject *)
1893 6407 : do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1894 : {
1895 : Py_buffer vsep;
1896 6407 : const char *s = PyBytes_AS_STRING(self);
1897 6407 : Py_ssize_t len = PyBytes_GET_SIZE(self);
1898 : char *sep;
1899 : Py_ssize_t seplen;
1900 : Py_ssize_t i, j;
1901 :
1902 6407 : if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1903 6 : return NULL;
1904 6401 : sep = vsep.buf;
1905 6401 : seplen = vsep.len;
1906 :
1907 6401 : i = 0;
1908 6401 : if (striptype != RIGHTSTRIP) {
1909 31878 : while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1910 31134 : i++;
1911 : }
1912 : }
1913 :
1914 6401 : j = len;
1915 6401 : if (striptype != LEFTSTRIP) {
1916 : do {
1917 25651 : j--;
1918 25651 : } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1919 6272 : j++;
1920 : }
1921 :
1922 6401 : PyBuffer_Release(&vsep);
1923 :
1924 6401 : if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1925 598 : Py_INCREF(self);
1926 598 : return (PyObject*)self;
1927 : }
1928 : else
1929 5803 : return PyBytes_FromStringAndSize(s+i, j-i);
1930 : }
1931 :
1932 :
1933 : Py_LOCAL_INLINE(PyObject *)
1934 14080 : do_strip(PyBytesObject *self, int striptype)
1935 : {
1936 14080 : const char *s = PyBytes_AS_STRING(self);
1937 14080 : Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1938 :
1939 14080 : i = 0;
1940 14080 : if (striptype != RIGHTSTRIP) {
1941 127758 : while (i < len && Py_ISSPACE(s[i])) {
1942 113890 : i++;
1943 : }
1944 : }
1945 :
1946 14080 : j = len;
1947 14080 : if (striptype != LEFTSTRIP) {
1948 : do {
1949 50958 : j--;
1950 50958 : } while (j >= i && Py_ISSPACE(s[j]));
1951 14072 : j++;
1952 : }
1953 :
1954 14080 : if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1955 159 : Py_INCREF(self);
1956 159 : return (PyObject*)self;
1957 : }
1958 : else
1959 13921 : return PyBytes_FromStringAndSize(s+i, j-i);
1960 : }
1961 :
1962 :
1963 : Py_LOCAL_INLINE(PyObject *)
1964 20487 : do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
1965 : {
1966 20487 : if (bytes != Py_None) {
1967 6407 : return do_xstrip(self, striptype, bytes);
1968 : }
1969 14080 : return do_strip(self, striptype);
1970 : }
1971 :
1972 : /*[clinic input]
1973 : bytes.strip
1974 :
1975 : bytes: object = None
1976 : /
1977 :
1978 : Strip leading and trailing bytes contained in the argument.
1979 :
1980 : If the argument is omitted or None, strip leading and trailing ASCII whitespace.
1981 : [clinic start generated code]*/
1982 :
1983 : static PyObject *
1984 14477 : bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
1985 : /*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
1986 : {
1987 14477 : return do_argstrip(self, BOTHSTRIP, bytes);
1988 : }
1989 :
1990 : /*[clinic input]
1991 : bytes.lstrip
1992 :
1993 : bytes: object = None
1994 : /
1995 :
1996 : Strip leading bytes contained in the argument.
1997 :
1998 : If the argument is omitted or None, strip leading ASCII whitespace.
1999 : [clinic start generated code]*/
2000 :
2001 : static PyObject *
2002 139 : bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2003 : /*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2004 : {
2005 139 : return do_argstrip(self, LEFTSTRIP, bytes);
2006 : }
2007 :
2008 : /*[clinic input]
2009 : bytes.rstrip
2010 :
2011 : bytes: object = None
2012 : /
2013 :
2014 : Strip trailing bytes contained in the argument.
2015 :
2016 : If the argument is omitted or None, strip trailing ASCII whitespace.
2017 : [clinic start generated code]*/
2018 :
2019 : static PyObject *
2020 5871 : bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2021 : /*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2022 : {
2023 5871 : return do_argstrip(self, RIGHTSTRIP, bytes);
2024 : }
2025 :
2026 :
2027 : static PyObject *
2028 79830 : bytes_count(PyBytesObject *self, PyObject *args)
2029 : {
2030 79830 : return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2031 : }
2032 :
2033 :
2034 : /*[clinic input]
2035 : bytes.translate
2036 :
2037 : table: object
2038 : Translation table, which must be a bytes object of length 256.
2039 : /
2040 : delete as deletechars: object(c_default="NULL") = b''
2041 :
2042 : Return a copy with each character mapped by the given translation table.
2043 :
2044 : All characters occurring in the optional argument delete are removed.
2045 : The remaining characters are mapped through the given translation table.
2046 : [clinic start generated code]*/
2047 :
2048 : static PyObject *
2049 179 : bytes_translate_impl(PyBytesObject *self, PyObject *table,
2050 : PyObject *deletechars)
2051 : /*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2052 : {
2053 : const char *input;
2054 : char *output;
2055 179 : Py_buffer table_view = {NULL, NULL};
2056 179 : Py_buffer del_table_view = {NULL, NULL};
2057 : const char *table_chars;
2058 179 : Py_ssize_t i, c, changed = 0;
2059 179 : PyObject *input_obj = (PyObject*)self;
2060 179 : const char *output_start, *del_table_chars=NULL;
2061 179 : Py_ssize_t inlen, tablen, dellen = 0;
2062 : PyObject *result;
2063 : int trans_table[256];
2064 :
2065 179 : if (PyBytes_Check(table)) {
2066 170 : table_chars = PyBytes_AS_STRING(table);
2067 170 : tablen = PyBytes_GET_SIZE(table);
2068 : }
2069 9 : else if (table == Py_None) {
2070 3 : table_chars = NULL;
2071 3 : tablen = 256;
2072 : }
2073 : else {
2074 6 : if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2075 0 : return NULL;
2076 6 : table_chars = table_view.buf;
2077 6 : tablen = table_view.len;
2078 : }
2079 :
2080 179 : if (tablen != 256) {
2081 1 : PyErr_SetString(PyExc_ValueError,
2082 : "translation table must be 256 characters long");
2083 1 : PyBuffer_Release(&table_view);
2084 1 : return NULL;
2085 : }
2086 :
2087 178 : if (deletechars != NULL) {
2088 8 : if (PyBytes_Check(deletechars)) {
2089 7 : del_table_chars = PyBytes_AS_STRING(deletechars);
2090 7 : dellen = PyBytes_GET_SIZE(deletechars);
2091 : }
2092 : else {
2093 1 : if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2094 1 : PyBuffer_Release(&table_view);
2095 1 : return NULL;
2096 : }
2097 0 : del_table_chars = del_table_view.buf;
2098 0 : dellen = del_table_view.len;
2099 : }
2100 : }
2101 : else {
2102 170 : del_table_chars = NULL;
2103 170 : dellen = 0;
2104 : }
2105 :
2106 177 : inlen = PyBytes_GET_SIZE(input_obj);
2107 177 : result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2108 177 : if (result == NULL) {
2109 0 : PyBuffer_Release(&del_table_view);
2110 0 : PyBuffer_Release(&table_view);
2111 0 : return NULL;
2112 : }
2113 177 : output_start = output = PyBytes_AS_STRING(result);
2114 177 : input = PyBytes_AS_STRING(input_obj);
2115 :
2116 177 : if (dellen == 0 && table_chars != NULL) {
2117 : /* If no deletions are required, use faster code */
2118 14061 : for (i = inlen; --i >= 0; ) {
2119 13889 : c = Py_CHARMASK(*input++);
2120 13889 : if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2121 10148 : changed = 1;
2122 : }
2123 172 : if (!changed && PyBytes_CheckExact(input_obj)) {
2124 24 : Py_INCREF(input_obj);
2125 24 : Py_DECREF(result);
2126 24 : result = input_obj;
2127 : }
2128 172 : PyBuffer_Release(&del_table_view);
2129 172 : PyBuffer_Release(&table_view);
2130 172 : return result;
2131 : }
2132 :
2133 5 : if (table_chars == NULL) {
2134 514 : for (i = 0; i < 256; i++)
2135 512 : trans_table[i] = Py_CHARMASK(i);
2136 : } else {
2137 771 : for (i = 0; i < 256; i++)
2138 768 : trans_table[i] = Py_CHARMASK(table_chars[i]);
2139 : }
2140 5 : PyBuffer_Release(&table_view);
2141 :
2142 14 : for (i = 0; i < dellen; i++)
2143 9 : trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2144 5 : PyBuffer_Release(&del_table_view);
2145 :
2146 30 : for (i = inlen; --i >= 0; ) {
2147 25 : c = Py_CHARMASK(*input++);
2148 25 : if (trans_table[c] != -1)
2149 14 : if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2150 12 : continue;
2151 13 : changed = 1;
2152 : }
2153 5 : if (!changed && PyBytes_CheckExact(input_obj)) {
2154 0 : Py_DECREF(result);
2155 0 : Py_INCREF(input_obj);
2156 0 : return input_obj;
2157 : }
2158 : /* Fix the size of the resulting byte string */
2159 5 : if (inlen > 0)
2160 5 : _PyBytes_Resize(&result, output - output_start);
2161 5 : return result;
2162 : }
2163 :
2164 :
2165 : /*[clinic input]
2166 :
2167 : @staticmethod
2168 : bytes.maketrans
2169 :
2170 : frm: Py_buffer
2171 : to: Py_buffer
2172 : /
2173 :
2174 : Return a translation table useable for the bytes or bytearray translate method.
2175 :
2176 : The returned table will be one where each byte in frm is mapped to the byte at
2177 : the same position in to.
2178 :
2179 : The bytes objects frm and to must be of the same length.
2180 : [clinic start generated code]*/
2181 :
2182 : static PyObject *
2183 978 : bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2184 : /*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2185 : {
2186 978 : return _Py_bytes_maketrans(frm, to);
2187 : }
2188 :
2189 :
2190 : /*[clinic input]
2191 : bytes.replace
2192 :
2193 : old: Py_buffer
2194 : new: Py_buffer
2195 : count: Py_ssize_t = -1
2196 : Maximum number of occurrences to replace.
2197 : -1 (the default value) means replace all occurrences.
2198 : /
2199 :
2200 : Return a copy with all occurrences of substring old replaced by new.
2201 :
2202 : If the optional argument count is given, only the first count occurrences are
2203 : replaced.
2204 : [clinic start generated code]*/
2205 :
2206 : static PyObject *
2207 108215 : bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2208 : Py_ssize_t count)
2209 : /*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2210 : {
2211 216430 : return stringlib_replace((PyObject *)self,
2212 108215 : (const char *)old->buf, old->len,
2213 108215 : (const char *)new->buf, new->len, count);
2214 : }
2215 :
2216 : /** End DALKE **/
2217 :
2218 : /*[clinic input]
2219 : bytes.removeprefix as bytes_removeprefix
2220 :
2221 : prefix: Py_buffer
2222 : /
2223 :
2224 : Return a bytes object with the given prefix string removed if present.
2225 :
2226 : If the bytes starts with the prefix string, return bytes[len(prefix):].
2227 : Otherwise, return a copy of the original bytes.
2228 : [clinic start generated code]*/
2229 :
2230 : static PyObject *
2231 27 : bytes_removeprefix_impl(PyBytesObject *self, Py_buffer *prefix)
2232 : /*[clinic end generated code: output=f006865331a06ab6 input=0c93bac817a8502c]*/
2233 : {
2234 27 : const char *self_start = PyBytes_AS_STRING(self);
2235 27 : Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2236 27 : const char *prefix_start = prefix->buf;
2237 27 : Py_ssize_t prefix_len = prefix->len;
2238 :
2239 27 : if (self_len >= prefix_len
2240 19 : && prefix_len > 0
2241 15 : && memcmp(self_start, prefix_start, prefix_len) == 0)
2242 : {
2243 15 : return PyBytes_FromStringAndSize(self_start + prefix_len,
2244 : self_len - prefix_len);
2245 : }
2246 :
2247 12 : if (PyBytes_CheckExact(self)) {
2248 6 : Py_INCREF(self);
2249 6 : return (PyObject *)self;
2250 : }
2251 :
2252 6 : return PyBytes_FromStringAndSize(self_start, self_len);
2253 : }
2254 :
2255 : /*[clinic input]
2256 : bytes.removesuffix as bytes_removesuffix
2257 :
2258 : suffix: Py_buffer
2259 : /
2260 :
2261 : Return a bytes object with the given suffix string removed if present.
2262 :
2263 : If the bytes ends with the suffix string and that suffix is not empty,
2264 : return bytes[:-len(prefix)]. Otherwise, return a copy of the original
2265 : bytes.
2266 : [clinic start generated code]*/
2267 :
2268 : static PyObject *
2269 15 : bytes_removesuffix_impl(PyBytesObject *self, Py_buffer *suffix)
2270 : /*[clinic end generated code: output=d887d308e3242eeb input=9f4e1da8c637bbf1]*/
2271 : {
2272 15 : const char *self_start = PyBytes_AS_STRING(self);
2273 15 : Py_ssize_t self_len = PyBytes_GET_SIZE(self);
2274 15 : const char *suffix_start = suffix->buf;
2275 15 : Py_ssize_t suffix_len = suffix->len;
2276 :
2277 15 : if (self_len >= suffix_len
2278 9 : && suffix_len > 0
2279 5 : && memcmp(self_start + self_len - suffix_len,
2280 : suffix_start, suffix_len) == 0)
2281 : {
2282 3 : return PyBytes_FromStringAndSize(self_start,
2283 : self_len - suffix_len);
2284 : }
2285 :
2286 12 : if (PyBytes_CheckExact(self)) {
2287 6 : Py_INCREF(self);
2288 6 : return (PyObject *)self;
2289 : }
2290 :
2291 6 : return PyBytes_FromStringAndSize(self_start, self_len);
2292 : }
2293 :
2294 : static PyObject *
2295 60153 : bytes_startswith(PyBytesObject *self, PyObject *args)
2296 : {
2297 60153 : return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2298 : }
2299 :
2300 : static PyObject *
2301 129760 : bytes_endswith(PyBytesObject *self, PyObject *args)
2302 : {
2303 129760 : return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2304 : }
2305 :
2306 :
2307 : /*[clinic input]
2308 : bytes.decode
2309 :
2310 : encoding: str(c_default="NULL") = 'utf-8'
2311 : The encoding with which to decode the bytes.
2312 : errors: str(c_default="NULL") = 'strict'
2313 : The error handling scheme to use for the handling of decoding errors.
2314 : The default is 'strict' meaning that decoding errors raise a
2315 : UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2316 : as well as any other name registered with codecs.register_error that
2317 : can handle UnicodeDecodeErrors.
2318 :
2319 : Decode the bytes using the codec registered for encoding.
2320 : [clinic start generated code]*/
2321 :
2322 : static PyObject *
2323 7345400 : bytes_decode_impl(PyBytesObject *self, const char *encoding,
2324 : const char *errors)
2325 : /*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2326 : {
2327 7345400 : return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2328 : }
2329 :
2330 :
2331 : /*[clinic input]
2332 : bytes.splitlines
2333 :
2334 : keepends: bool(accept={int}) = False
2335 :
2336 : Return a list of the lines in the bytes, breaking at line boundaries.
2337 :
2338 : Line breaks are not included in the resulting list unless keepends is given and
2339 : true.
2340 : [clinic start generated code]*/
2341 :
2342 : static PyObject *
2343 1868 : bytes_splitlines_impl(PyBytesObject *self, int keepends)
2344 : /*[clinic end generated code: output=3484149a5d880ffb input=a8b32eb01ff5a5ed]*/
2345 : {
2346 3736 : return stringlib_splitlines(
2347 1868 : (PyObject*) self, PyBytes_AS_STRING(self),
2348 : PyBytes_GET_SIZE(self), keepends
2349 : );
2350 : }
2351 :
2352 : /*[clinic input]
2353 : @classmethod
2354 : bytes.fromhex
2355 :
2356 : string: unicode
2357 : /
2358 :
2359 : Create a bytes object from a string of hexadecimal numbers.
2360 :
2361 : Spaces between two numbers are accepted.
2362 : Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2363 : [clinic start generated code]*/
2364 :
2365 : static PyObject *
2366 200208 : bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2367 : /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2368 : {
2369 200208 : PyObject *result = _PyBytes_FromHex(string, 0);
2370 200208 : if (type != &PyBytes_Type && result != NULL) {
2371 3 : Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result));
2372 : }
2373 200208 : return result;
2374 : }
2375 :
2376 : PyObject*
2377 200243 : _PyBytes_FromHex(PyObject *string, int use_bytearray)
2378 : {
2379 : char *buf;
2380 : Py_ssize_t hexlen, invalid_char;
2381 : unsigned int top, bot;
2382 : const Py_UCS1 *str, *end;
2383 : _PyBytesWriter writer;
2384 :
2385 200243 : _PyBytesWriter_Init(&writer);
2386 200243 : writer.use_bytearray = use_bytearray;
2387 :
2388 200243 : assert(PyUnicode_Check(string));
2389 200243 : if (PyUnicode_READY(string))
2390 0 : return NULL;
2391 200243 : hexlen = PyUnicode_GET_LENGTH(string);
2392 :
2393 200243 : if (!PyUnicode_IS_ASCII(string)) {
2394 12 : const void *data = PyUnicode_DATA(string);
2395 12 : int kind = PyUnicode_KIND(string);
2396 : Py_ssize_t i;
2397 :
2398 : /* search for the first non-ASCII character */
2399 20 : for (i = 0; i < hexlen; i++) {
2400 20 : if (PyUnicode_READ(kind, data, i) >= 128)
2401 12 : break;
2402 : }
2403 12 : invalid_char = i;
2404 12 : goto error;
2405 : }
2406 :
2407 200231 : assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2408 200231 : str = PyUnicode_1BYTE_DATA(string);
2409 :
2410 : /* This overestimates if there are spaces */
2411 200231 : buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2412 200231 : if (buf == NULL)
2413 0 : return NULL;
2414 :
2415 200231 : end = str + hexlen;
2416 1861340 : while (str < end) {
2417 : /* skip over spaces in the input */
2418 1661170 : if (Py_ISSPACE(*str)) {
2419 : do {
2420 46779 : str++;
2421 46779 : } while (Py_ISSPACE(*str));
2422 45748 : if (str >= end)
2423 35 : break;
2424 : }
2425 :
2426 1661140 : top = _PyLong_DigitValue[*str];
2427 1661140 : if (top >= 16) {
2428 18 : invalid_char = str - PyUnicode_1BYTE_DATA(string);
2429 18 : goto error;
2430 : }
2431 1661120 : str++;
2432 :
2433 1661120 : bot = _PyLong_DigitValue[*str];
2434 1661120 : if (bot >= 16) {
2435 6 : invalid_char = str - PyUnicode_1BYTE_DATA(string);
2436 6 : goto error;
2437 : }
2438 1661110 : str++;
2439 :
2440 1661110 : *buf++ = (unsigned char)((top << 4) + bot);
2441 : }
2442 :
2443 200207 : return _PyBytesWriter_Finish(&writer, buf);
2444 :
2445 36 : error:
2446 36 : PyErr_Format(PyExc_ValueError,
2447 : "non-hexadecimal number found in "
2448 : "fromhex() arg at position %zd", invalid_char);
2449 36 : _PyBytesWriter_Dealloc(&writer);
2450 36 : return NULL;
2451 : }
2452 :
2453 : /*[clinic input]
2454 : bytes.hex
2455 :
2456 : sep: object = NULL
2457 : An optional single character or byte to separate hex bytes.
2458 : bytes_per_sep: int = 1
2459 : How many bytes between separators. Positive values count from the
2460 : right, negative values count from the left.
2461 :
2462 : Create a string of hexadecimal numbers from a bytes object.
2463 :
2464 : Example:
2465 : >>> value = b'\xb9\x01\xef'
2466 : >>> value.hex()
2467 : 'b901ef'
2468 : >>> value.hex(':')
2469 : 'b9:01:ef'
2470 : >>> value.hex(':', 2)
2471 : 'b9:01ef'
2472 : >>> value.hex(':', -2)
2473 : 'b901:ef'
2474 : [clinic start generated code]*/
2475 :
2476 : static PyObject *
2477 58 : bytes_hex_impl(PyBytesObject *self, PyObject *sep, int bytes_per_sep)
2478 : /*[clinic end generated code: output=1f134da504064139 input=1a21282b1f1ae595]*/
2479 : {
2480 58 : const char *argbuf = PyBytes_AS_STRING(self);
2481 58 : Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2482 58 : return _Py_strhex_with_sep(argbuf, arglen, sep, bytes_per_sep);
2483 : }
2484 :
2485 : static PyObject *
2486 11 : bytes_getnewargs(PyBytesObject *v, PyObject *Py_UNUSED(ignored))
2487 : {
2488 11 : return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2489 : }
2490 :
2491 :
2492 : static PyMethodDef
2493 : bytes_methods[] = {
2494 : {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS},
2495 : BYTES___BYTES___METHODDEF
2496 : {"capitalize", stringlib_capitalize, METH_NOARGS,
2497 : _Py_capitalize__doc__},
2498 : STRINGLIB_CENTER_METHODDEF
2499 : {"count", (PyCFunction)bytes_count, METH_VARARGS,
2500 : _Py_count__doc__},
2501 : BYTES_DECODE_METHODDEF
2502 : {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2503 : _Py_endswith__doc__},
2504 : STRINGLIB_EXPANDTABS_METHODDEF
2505 : {"find", (PyCFunction)bytes_find, METH_VARARGS,
2506 : _Py_find__doc__},
2507 : BYTES_FROMHEX_METHODDEF
2508 : BYTES_HEX_METHODDEF
2509 : {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2510 : {"isalnum", stringlib_isalnum, METH_NOARGS,
2511 : _Py_isalnum__doc__},
2512 : {"isalpha", stringlib_isalpha, METH_NOARGS,
2513 : _Py_isalpha__doc__},
2514 : {"isascii", stringlib_isascii, METH_NOARGS,
2515 : _Py_isascii__doc__},
2516 : {"isdigit", stringlib_isdigit, METH_NOARGS,
2517 : _Py_isdigit__doc__},
2518 : {"islower", stringlib_islower, METH_NOARGS,
2519 : _Py_islower__doc__},
2520 : {"isspace", stringlib_isspace, METH_NOARGS,
2521 : _Py_isspace__doc__},
2522 : {"istitle", stringlib_istitle, METH_NOARGS,
2523 : _Py_istitle__doc__},
2524 : {"isupper", stringlib_isupper, METH_NOARGS,
2525 : _Py_isupper__doc__},
2526 : BYTES_JOIN_METHODDEF
2527 : STRINGLIB_LJUST_METHODDEF
2528 : {"lower", stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2529 : BYTES_LSTRIP_METHODDEF
2530 : BYTES_MAKETRANS_METHODDEF
2531 : BYTES_PARTITION_METHODDEF
2532 : BYTES_REPLACE_METHODDEF
2533 : BYTES_REMOVEPREFIX_METHODDEF
2534 : BYTES_REMOVESUFFIX_METHODDEF
2535 : {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2536 : {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2537 : STRINGLIB_RJUST_METHODDEF
2538 : BYTES_RPARTITION_METHODDEF
2539 : BYTES_RSPLIT_METHODDEF
2540 : BYTES_RSTRIP_METHODDEF
2541 : BYTES_SPLIT_METHODDEF
2542 : BYTES_SPLITLINES_METHODDEF
2543 : {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2544 : _Py_startswith__doc__},
2545 : BYTES_STRIP_METHODDEF
2546 : {"swapcase", stringlib_swapcase, METH_NOARGS,
2547 : _Py_swapcase__doc__},
2548 : {"title", stringlib_title, METH_NOARGS, _Py_title__doc__},
2549 : BYTES_TRANSLATE_METHODDEF
2550 : {"upper", stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2551 : STRINGLIB_ZFILL_METHODDEF
2552 : {NULL, NULL} /* sentinel */
2553 : };
2554 :
2555 : static PyObject *
2556 778 : bytes_mod(PyObject *self, PyObject *arg)
2557 : {
2558 778 : if (!PyBytes_Check(self)) {
2559 2 : Py_RETURN_NOTIMPLEMENTED;
2560 : }
2561 776 : return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2562 : arg, 0);
2563 : }
2564 :
2565 : static PyNumberMethods bytes_as_number = {
2566 : 0, /*nb_add*/
2567 : 0, /*nb_subtract*/
2568 : 0, /*nb_multiply*/
2569 : bytes_mod, /*nb_remainder*/
2570 : };
2571 :
2572 : static PyObject *
2573 : bytes_subtype_new(PyTypeObject *, PyObject *);
2574 :
2575 : /*[clinic input]
2576 : @classmethod
2577 : bytes.__new__ as bytes_new
2578 :
2579 : source as x: object = NULL
2580 : encoding: str = NULL
2581 : errors: str = NULL
2582 :
2583 : [clinic start generated code]*/
2584 :
2585 : static PyObject *
2586 450056 : bytes_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
2587 : const char *errors)
2588 : /*[clinic end generated code: output=1e0c471be311a425 input=f0a966d19b7262b4]*/
2589 : {
2590 : PyObject *bytes;
2591 : PyObject *func;
2592 : Py_ssize_t size;
2593 :
2594 450056 : if (x == NULL) {
2595 121 : if (encoding != NULL || errors != NULL) {
2596 2 : PyErr_SetString(PyExc_TypeError,
2597 : encoding != NULL ?
2598 : "encoding without a string argument" :
2599 : "errors without a string argument");
2600 2 : return NULL;
2601 : }
2602 119 : bytes = PyBytes_FromStringAndSize(NULL, 0);
2603 : }
2604 449935 : else if (encoding != NULL) {
2605 : /* Encode via the codec registry */
2606 37020 : if (!PyUnicode_Check(x)) {
2607 2 : PyErr_SetString(PyExc_TypeError,
2608 : "encoding without a string argument");
2609 2 : return NULL;
2610 : }
2611 37018 : bytes = PyUnicode_AsEncodedString(x, encoding, errors);
2612 : }
2613 412915 : else if (errors != NULL) {
2614 3 : PyErr_SetString(PyExc_TypeError,
2615 3 : PyUnicode_Check(x) ?
2616 : "string argument without an encoding" :
2617 : "errors without a string argument");
2618 3 : return NULL;
2619 : }
2620 : /* We'd like to call PyObject_Bytes here, but we need to check for an
2621 : integer argument before deferring to PyBytes_FromObject, something
2622 : PyObject_Bytes doesn't do. */
2623 412912 : else if ((func = _PyObject_LookupSpecial(x, &_Py_ID(__bytes__))) != NULL) {
2624 115567 : bytes = _PyObject_CallNoArgs(func);
2625 115567 : Py_DECREF(func);
2626 115567 : if (bytes == NULL)
2627 4 : return NULL;
2628 115563 : if (!PyBytes_Check(bytes)) {
2629 2 : PyErr_Format(PyExc_TypeError,
2630 : "__bytes__ returned non-bytes (type %.200s)",
2631 2 : Py_TYPE(bytes)->tp_name);
2632 2 : Py_DECREF(bytes);
2633 2 : return NULL;
2634 : }
2635 : }
2636 297345 : else if (PyErr_Occurred())
2637 1 : return NULL;
2638 297344 : else if (PyUnicode_Check(x)) {
2639 1 : PyErr_SetString(PyExc_TypeError,
2640 : "string argument without an encoding");
2641 1 : return NULL;
2642 : }
2643 : /* Is it an integer? */
2644 297343 : else if (_PyIndex_Check(x)) {
2645 133 : size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2646 133 : if (size == -1 && PyErr_Occurred()) {
2647 2 : if (!PyErr_ExceptionMatches(PyExc_TypeError))
2648 2 : return NULL;
2649 0 : PyErr_Clear(); /* fall through */
2650 0 : bytes = PyBytes_FromObject(x);
2651 : }
2652 : else {
2653 131 : if (size < 0) {
2654 1 : PyErr_SetString(PyExc_ValueError, "negative count");
2655 1 : return NULL;
2656 : }
2657 130 : bytes = _PyBytes_FromSize(size, 1);
2658 : }
2659 : }
2660 : else {
2661 297210 : bytes = PyBytes_FromObject(x);
2662 : }
2663 :
2664 450038 : if (bytes != NULL && type != &PyBytes_Type) {
2665 3463 : Py_SETREF(bytes, bytes_subtype_new(type, bytes));
2666 : }
2667 :
2668 450038 : return bytes;
2669 : }
2670 :
2671 : static PyObject*
2672 160816 : _PyBytes_FromBuffer(PyObject *x)
2673 : {
2674 : PyObject *new;
2675 : Py_buffer view;
2676 :
2677 160816 : if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2678 27 : return NULL;
2679 :
2680 160789 : new = PyBytes_FromStringAndSize(NULL, view.len);
2681 160789 : if (!new)
2682 0 : goto fail;
2683 160789 : if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2684 : &view, view.len, 'C') < 0)
2685 0 : goto fail;
2686 160789 : PyBuffer_Release(&view);
2687 160789 : return new;
2688 :
2689 0 : fail:
2690 0 : Py_XDECREF(new);
2691 0 : PyBuffer_Release(&view);
2692 0 : return NULL;
2693 : }
2694 :
2695 : static PyObject*
2696 128725 : _PyBytes_FromList(PyObject *x)
2697 : {
2698 128725 : Py_ssize_t i, size = PyList_GET_SIZE(x);
2699 : Py_ssize_t value;
2700 : char *str;
2701 : PyObject *item;
2702 : _PyBytesWriter writer;
2703 :
2704 128725 : _PyBytesWriter_Init(&writer);
2705 128725 : str = _PyBytesWriter_Alloc(&writer, size);
2706 128725 : if (str == NULL)
2707 0 : return NULL;
2708 128725 : writer.overallocate = 1;
2709 128725 : size = writer.allocated;
2710 :
2711 625809 : for (i = 0; i < PyList_GET_SIZE(x); i++) {
2712 497102 : item = PyList_GET_ITEM(x, i);
2713 497102 : Py_INCREF(item);
2714 497102 : value = PyNumber_AsSsize_t(item, NULL);
2715 497102 : Py_DECREF(item);
2716 497102 : if (value == -1 && PyErr_Occurred())
2717 5 : goto error;
2718 :
2719 497097 : if (value < 0 || value >= 256) {
2720 13 : PyErr_SetString(PyExc_ValueError,
2721 : "bytes must be in range(0, 256)");
2722 13 : goto error;
2723 : }
2724 :
2725 497084 : if (i >= size) {
2726 40 : str = _PyBytesWriter_Resize(&writer, str, size+1);
2727 40 : if (str == NULL)
2728 0 : return NULL;
2729 40 : size = writer.allocated;
2730 : }
2731 497084 : *str++ = (char) value;
2732 : }
2733 128707 : return _PyBytesWriter_Finish(&writer, str);
2734 :
2735 18 : error:
2736 18 : _PyBytesWriter_Dealloc(&writer);
2737 18 : return NULL;
2738 : }
2739 :
2740 : static PyObject*
2741 4458 : _PyBytes_FromTuple(PyObject *x)
2742 : {
2743 : PyObject *bytes;
2744 4458 : Py_ssize_t i, size = PyTuple_GET_SIZE(x);
2745 : Py_ssize_t value;
2746 : char *str;
2747 : PyObject *item;
2748 :
2749 4458 : bytes = PyBytes_FromStringAndSize(NULL, size);
2750 4458 : if (bytes == NULL)
2751 0 : return NULL;
2752 4458 : str = ((PyBytesObject *)bytes)->ob_sval;
2753 :
2754 450536 : for (i = 0; i < size; i++) {
2755 446078 : item = PyTuple_GET_ITEM(x, i);
2756 446078 : value = PyNumber_AsSsize_t(item, NULL);
2757 446078 : if (value == -1 && PyErr_Occurred())
2758 0 : goto error;
2759 :
2760 446078 : if (value < 0 || value >= 256) {
2761 0 : PyErr_SetString(PyExc_ValueError,
2762 : "bytes must be in range(0, 256)");
2763 0 : goto error;
2764 : }
2765 446078 : *str++ = (char) value;
2766 : }
2767 4458 : return bytes;
2768 :
2769 0 : error:
2770 0 : Py_DECREF(bytes);
2771 0 : return NULL;
2772 : }
2773 :
2774 : static PyObject *
2775 5079 : _PyBytes_FromIterator(PyObject *it, PyObject *x)
2776 : {
2777 : char *str;
2778 : Py_ssize_t i, size;
2779 : _PyBytesWriter writer;
2780 :
2781 : /* For iterator version, create a bytes object and resize as needed */
2782 5079 : size = PyObject_LengthHint(x, 64);
2783 5079 : if (size == -1 && PyErr_Occurred())
2784 0 : return NULL;
2785 :
2786 5079 : _PyBytesWriter_Init(&writer);
2787 5079 : str = _PyBytesWriter_Alloc(&writer, size);
2788 5079 : if (str == NULL)
2789 0 : return NULL;
2790 5079 : writer.overallocate = 1;
2791 5079 : size = writer.allocated;
2792 :
2793 : /* Run the iterator to exhaustion */
2794 5079 : for (i = 0; ; i++) {
2795 : PyObject *item;
2796 : Py_ssize_t value;
2797 :
2798 : /* Get the next item */
2799 6284580 : item = PyIter_Next(it);
2800 6284580 : if (item == NULL) {
2801 5077 : if (PyErr_Occurred())
2802 64 : goto error;
2803 5013 : break;
2804 : }
2805 :
2806 : /* Interpret it as an int (__index__) */
2807 6279500 : value = PyNumber_AsSsize_t(item, NULL);
2808 6279500 : Py_DECREF(item);
2809 6279500 : if (value == -1 && PyErr_Occurred())
2810 2 : goto error;
2811 :
2812 : /* Range check */
2813 6279500 : if (value < 0 || value >= 256) {
2814 0 : PyErr_SetString(PyExc_ValueError,
2815 : "bytes must be in range(0, 256)");
2816 0 : goto error;
2817 : }
2818 :
2819 : /* Append the byte */
2820 6279500 : if (i >= size) {
2821 5280 : str = _PyBytesWriter_Resize(&writer, str, size+1);
2822 5280 : if (str == NULL)
2823 0 : return NULL;
2824 5280 : size = writer.allocated;
2825 : }
2826 6279500 : *str++ = (char) value;
2827 : }
2828 :
2829 5013 : return _PyBytesWriter_Finish(&writer, str);
2830 :
2831 66 : error:
2832 66 : _PyBytesWriter_Dealloc(&writer);
2833 66 : return NULL;
2834 : }
2835 :
2836 : PyObject *
2837 299089 : PyBytes_FromObject(PyObject *x)
2838 : {
2839 : PyObject *it, *result;
2840 :
2841 299089 : if (x == NULL) {
2842 0 : PyErr_BadInternalCall();
2843 0 : return NULL;
2844 : }
2845 :
2846 299089 : if (PyBytes_CheckExact(x)) {
2847 2 : Py_INCREF(x);
2848 2 : return x;
2849 : }
2850 :
2851 : /* Use the modern buffer interface */
2852 299087 : if (PyObject_CheckBuffer(x))
2853 160812 : return _PyBytes_FromBuffer(x);
2854 :
2855 138275 : if (PyList_CheckExact(x))
2856 128725 : return _PyBytes_FromList(x);
2857 :
2858 9550 : if (PyTuple_CheckExact(x))
2859 4458 : return _PyBytes_FromTuple(x);
2860 :
2861 5092 : if (!PyUnicode_Check(x)) {
2862 5088 : it = PyObject_GetIter(x);
2863 5088 : if (it != NULL) {
2864 5079 : result = _PyBytes_FromIterator(it, x);
2865 5079 : Py_DECREF(it);
2866 5079 : return result;
2867 : }
2868 9 : if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
2869 1 : return NULL;
2870 : }
2871 : }
2872 :
2873 12 : PyErr_Format(PyExc_TypeError,
2874 : "cannot convert '%.200s' object to bytes",
2875 12 : Py_TYPE(x)->tp_name);
2876 12 : return NULL;
2877 : }
2878 :
2879 : /* This allocator is needed for subclasses don't want to use __new__.
2880 : * See https://github.com/python/cpython/issues/91020#issuecomment-1096793239
2881 : *
2882 : * This allocator will be removed when ob_shash is removed.
2883 : */
2884 : static PyObject *
2885 0 : bytes_alloc(PyTypeObject *self, Py_ssize_t nitems)
2886 : {
2887 0 : PyBytesObject *obj = (PyBytesObject*)PyType_GenericAlloc(self, nitems);
2888 0 : if (obj == NULL) {
2889 0 : return NULL;
2890 : }
2891 : _Py_COMP_DIAG_PUSH
2892 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2893 0 : obj->ob_shash = -1;
2894 : _Py_COMP_DIAG_POP
2895 0 : return (PyObject*)obj;
2896 : }
2897 :
2898 : static PyObject *
2899 3463 : bytes_subtype_new(PyTypeObject *type, PyObject *tmp)
2900 : {
2901 : PyObject *pnew;
2902 : Py_ssize_t n;
2903 :
2904 3463 : assert(PyType_IsSubtype(type, &PyBytes_Type));
2905 3463 : assert(PyBytes_Check(tmp));
2906 3463 : n = PyBytes_GET_SIZE(tmp);
2907 3463 : pnew = type->tp_alloc(type, n);
2908 3463 : if (pnew != NULL) {
2909 3463 : memcpy(PyBytes_AS_STRING(pnew),
2910 3463 : PyBytes_AS_STRING(tmp), n+1);
2911 : _Py_COMP_DIAG_PUSH
2912 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
2913 3463 : ((PyBytesObject *)pnew)->ob_shash =
2914 3463 : ((PyBytesObject *)tmp)->ob_shash;
2915 : _Py_COMP_DIAG_POP
2916 : }
2917 3463 : return pnew;
2918 : }
2919 :
2920 : PyDoc_STRVAR(bytes_doc,
2921 : "bytes(iterable_of_ints) -> bytes\n\
2922 : bytes(string, encoding[, errors]) -> bytes\n\
2923 : bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2924 : bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2925 : bytes() -> empty bytes object\n\
2926 : \n\
2927 : Construct an immutable array of bytes from:\n\
2928 : - an iterable yielding integers in range(256)\n\
2929 : - a text string encoded using the specified encoding\n\
2930 : - any object implementing the buffer API.\n\
2931 : - an integer");
2932 :
2933 : static PyObject *bytes_iter(PyObject *seq);
2934 :
2935 : PyTypeObject PyBytes_Type = {
2936 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
2937 : "bytes",
2938 : PyBytesObject_SIZE,
2939 : sizeof(char),
2940 : 0, /* tp_dealloc */
2941 : 0, /* tp_vectorcall_offset */
2942 : 0, /* tp_getattr */
2943 : 0, /* tp_setattr */
2944 : 0, /* tp_as_async */
2945 : (reprfunc)bytes_repr, /* tp_repr */
2946 : &bytes_as_number, /* tp_as_number */
2947 : &bytes_as_sequence, /* tp_as_sequence */
2948 : &bytes_as_mapping, /* tp_as_mapping */
2949 : (hashfunc)bytes_hash, /* tp_hash */
2950 : 0, /* tp_call */
2951 : bytes_str, /* tp_str */
2952 : PyObject_GenericGetAttr, /* tp_getattro */
2953 : 0, /* tp_setattro */
2954 : &bytes_as_buffer, /* tp_as_buffer */
2955 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2956 : Py_TPFLAGS_BYTES_SUBCLASS |
2957 : _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2958 : bytes_doc, /* tp_doc */
2959 : 0, /* tp_traverse */
2960 : 0, /* tp_clear */
2961 : (richcmpfunc)bytes_richcompare, /* tp_richcompare */
2962 : 0, /* tp_weaklistoffset */
2963 : bytes_iter, /* tp_iter */
2964 : 0, /* tp_iternext */
2965 : bytes_methods, /* tp_methods */
2966 : 0, /* tp_members */
2967 : 0, /* tp_getset */
2968 : 0, /* tp_base */
2969 : 0, /* tp_dict */
2970 : 0, /* tp_descr_get */
2971 : 0, /* tp_descr_set */
2972 : 0, /* tp_dictoffset */
2973 : 0, /* tp_init */
2974 : bytes_alloc, /* tp_alloc */
2975 : bytes_new, /* tp_new */
2976 : PyObject_Del, /* tp_free */
2977 : };
2978 :
2979 : void
2980 21531 : PyBytes_Concat(PyObject **pv, PyObject *w)
2981 : {
2982 21531 : assert(pv != NULL);
2983 21531 : if (*pv == NULL)
2984 0 : return;
2985 21531 : if (w == NULL) {
2986 0 : Py_CLEAR(*pv);
2987 0 : return;
2988 : }
2989 :
2990 21531 : if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2991 : /* Only one reference, so we can resize in place */
2992 : Py_ssize_t oldsize;
2993 : Py_buffer wb;
2994 :
2995 7335 : if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2996 0 : PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2997 0 : Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2998 0 : Py_CLEAR(*pv);
2999 0 : return;
3000 : }
3001 :
3002 7335 : oldsize = PyBytes_GET_SIZE(*pv);
3003 7335 : if (oldsize > PY_SSIZE_T_MAX - wb.len) {
3004 0 : PyErr_NoMemory();
3005 0 : goto error;
3006 : }
3007 7335 : if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
3008 0 : goto error;
3009 :
3010 7335 : memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
3011 7335 : PyBuffer_Release(&wb);
3012 7335 : return;
3013 :
3014 0 : error:
3015 0 : PyBuffer_Release(&wb);
3016 0 : Py_CLEAR(*pv);
3017 0 : return;
3018 : }
3019 :
3020 : else {
3021 : /* Multiple references, need to create new object */
3022 : PyObject *v;
3023 14196 : v = bytes_concat(*pv, w);
3024 14196 : Py_SETREF(*pv, v);
3025 : }
3026 : }
3027 :
3028 : void
3029 3564 : PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
3030 : {
3031 3564 : PyBytes_Concat(pv, w);
3032 3564 : Py_XDECREF(w);
3033 3564 : }
3034 :
3035 :
3036 : /* The following function breaks the notion that bytes are immutable:
3037 : it changes the size of a bytes object. We get away with this only if there
3038 : is only one module referencing the object. You can also think of it
3039 : as creating a new bytes object and destroying the old one, only
3040 : more efficiently. In any case, don't use this if the bytes object may
3041 : already be known to some other part of the code...
3042 : Note that if there's not enough memory to resize the bytes object, the
3043 : original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
3044 : memory" exception is set, and -1 is returned. Else (on success) 0 is
3045 : returned, and the value in *pv may or may not be the same as on input.
3046 : As always, an extra byte is allocated for a trailing \0 byte (newsize
3047 : does *not* include that), and a trailing \0 byte is stored.
3048 : */
3049 :
3050 : int
3051 5955310 : _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
3052 : {
3053 : PyObject *v;
3054 : PyBytesObject *sv;
3055 5955310 : v = *pv;
3056 5955310 : if (!PyBytes_Check(v) || newsize < 0) {
3057 0 : goto error;
3058 : }
3059 5955310 : if (Py_SIZE(v) == newsize) {
3060 : /* return early if newsize equals to v->ob_size */
3061 4428 : return 0;
3062 : }
3063 5950880 : if (Py_SIZE(v) == 0) {
3064 0 : if (newsize == 0) {
3065 0 : return 0;
3066 : }
3067 0 : *pv = _PyBytes_FromSize(newsize, 0);
3068 0 : Py_DECREF(v);
3069 0 : return (*pv == NULL) ? -1 : 0;
3070 : }
3071 5950880 : if (Py_REFCNT(v) != 1) {
3072 0 : goto error;
3073 : }
3074 5950880 : if (newsize == 0) {
3075 1503110 : *pv = bytes_new_empty();
3076 1503110 : Py_DECREF(v);
3077 1503110 : return 0;
3078 : }
3079 : /* XXX UNREF/NEWREF interface should be more symmetrical */
3080 : #ifdef Py_REF_DEBUG
3081 4447770 : _Py_RefTotal--;
3082 : #endif
3083 : #ifdef Py_TRACE_REFS
3084 : _Py_ForgetReference(v);
3085 : #endif
3086 4447770 : *pv = (PyObject *)
3087 4447770 : PyObject_Realloc(v, PyBytesObject_SIZE + newsize);
3088 4447770 : if (*pv == NULL) {
3089 0 : PyObject_Free(v);
3090 0 : PyErr_NoMemory();
3091 0 : return -1;
3092 : }
3093 4447770 : _Py_NewReference(*pv);
3094 4447770 : sv = (PyBytesObject *) *pv;
3095 4447770 : Py_SET_SIZE(sv, newsize);
3096 4447770 : sv->ob_sval[newsize] = '\0';
3097 : _Py_COMP_DIAG_PUSH
3098 : _Py_COMP_DIAG_IGNORE_DEPR_DECLS
3099 4447770 : sv->ob_shash = -1; /* invalidate cached hash value */
3100 : _Py_COMP_DIAG_POP
3101 4447770 : return 0;
3102 0 : error:
3103 0 : *pv = 0;
3104 0 : Py_DECREF(v);
3105 0 : PyErr_BadInternalCall();
3106 0 : return -1;
3107 : }
3108 :
3109 :
3110 : PyStatus
3111 3134 : _PyBytes_InitTypes(PyInterpreterState *interp)
3112 : {
3113 3134 : if (!_Py_IsMainInterpreter(interp)) {
3114 171 : return _PyStatus_OK();
3115 : }
3116 :
3117 2963 : if (PyType_Ready(&PyBytes_Type) < 0) {
3118 0 : return _PyStatus_ERR("Can't initialize bytes type");
3119 : }
3120 :
3121 2963 : if (PyType_Ready(&PyBytesIter_Type) < 0) {
3122 0 : return _PyStatus_ERR("Can't initialize bytes iterator type");
3123 : }
3124 :
3125 2963 : return _PyStatus_OK();
3126 : }
3127 :
3128 :
3129 : /*********************** Bytes Iterator ****************************/
3130 :
3131 : typedef struct {
3132 : PyObject_HEAD
3133 : Py_ssize_t it_index;
3134 : PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
3135 : } striterobject;
3136 :
3137 : static void
3138 101578 : striter_dealloc(striterobject *it)
3139 : {
3140 101578 : _PyObject_GC_UNTRACK(it);
3141 101578 : Py_XDECREF(it->it_seq);
3142 101578 : PyObject_GC_Del(it);
3143 101578 : }
3144 :
3145 : static int
3146 10 : striter_traverse(striterobject *it, visitproc visit, void *arg)
3147 : {
3148 10 : Py_VISIT(it->it_seq);
3149 10 : return 0;
3150 : }
3151 :
3152 : static PyObject *
3153 2236570 : striter_next(striterobject *it)
3154 : {
3155 : PyBytesObject *seq;
3156 :
3157 2236570 : assert(it != NULL);
3158 2236570 : seq = it->it_seq;
3159 2236570 : if (seq == NULL)
3160 1 : return NULL;
3161 2236570 : assert(PyBytes_Check(seq));
3162 :
3163 2236570 : if (it->it_index < PyBytes_GET_SIZE(seq)) {
3164 2148600 : return _PyLong_FromUnsignedChar(
3165 2148600 : (unsigned char)seq->ob_sval[it->it_index++]);
3166 : }
3167 :
3168 87967 : it->it_seq = NULL;
3169 87967 : Py_DECREF(seq);
3170 87967 : return NULL;
3171 : }
3172 :
3173 : static PyObject *
3174 1271 : striter_len(striterobject *it, PyObject *Py_UNUSED(ignored))
3175 : {
3176 1271 : Py_ssize_t len = 0;
3177 1271 : if (it->it_seq)
3178 1271 : len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3179 1271 : return PyLong_FromSsize_t(len);
3180 : }
3181 :
3182 : PyDoc_STRVAR(length_hint_doc,
3183 : "Private method returning an estimate of len(list(it)).");
3184 :
3185 : static PyObject *
3186 54 : striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
3187 : {
3188 54 : if (it->it_seq != NULL) {
3189 54 : return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
3190 : it->it_seq, it->it_index);
3191 : } else {
3192 0 : return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
3193 : }
3194 : }
3195 :
3196 : PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3197 :
3198 : static PyObject *
3199 84 : striter_setstate(striterobject *it, PyObject *state)
3200 : {
3201 84 : Py_ssize_t index = PyLong_AsSsize_t(state);
3202 84 : if (index == -1 && PyErr_Occurred())
3203 0 : return NULL;
3204 84 : if (it->it_seq != NULL) {
3205 84 : if (index < 0)
3206 0 : index = 0;
3207 84 : else if (index > PyBytes_GET_SIZE(it->it_seq))
3208 0 : index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3209 84 : it->it_index = index;
3210 : }
3211 84 : Py_RETURN_NONE;
3212 : }
3213 :
3214 : PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3215 :
3216 : static PyMethodDef striter_methods[] = {
3217 : {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3218 : length_hint_doc},
3219 : {"__reduce__", (PyCFunction)striter_reduce, METH_NOARGS,
3220 : reduce_doc},
3221 : {"__setstate__", (PyCFunction)striter_setstate, METH_O,
3222 : setstate_doc},
3223 : {NULL, NULL} /* sentinel */
3224 : };
3225 :
3226 : PyTypeObject PyBytesIter_Type = {
3227 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
3228 : "bytes_iterator", /* tp_name */
3229 : sizeof(striterobject), /* tp_basicsize */
3230 : 0, /* tp_itemsize */
3231 : /* methods */
3232 : (destructor)striter_dealloc, /* tp_dealloc */
3233 : 0, /* tp_vectorcall_offset */
3234 : 0, /* tp_getattr */
3235 : 0, /* tp_setattr */
3236 : 0, /* tp_as_async */
3237 : 0, /* tp_repr */
3238 : 0, /* tp_as_number */
3239 : 0, /* tp_as_sequence */
3240 : 0, /* tp_as_mapping */
3241 : 0, /* tp_hash */
3242 : 0, /* tp_call */
3243 : 0, /* tp_str */
3244 : PyObject_GenericGetAttr, /* tp_getattro */
3245 : 0, /* tp_setattro */
3246 : 0, /* tp_as_buffer */
3247 : Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3248 : 0, /* tp_doc */
3249 : (traverseproc)striter_traverse, /* tp_traverse */
3250 : 0, /* tp_clear */
3251 : 0, /* tp_richcompare */
3252 : 0, /* tp_weaklistoffset */
3253 : PyObject_SelfIter, /* tp_iter */
3254 : (iternextfunc)striter_next, /* tp_iternext */
3255 : striter_methods, /* tp_methods */
3256 : 0,
3257 : };
3258 :
3259 : static PyObject *
3260 101578 : bytes_iter(PyObject *seq)
3261 : {
3262 : striterobject *it;
3263 :
3264 101578 : if (!PyBytes_Check(seq)) {
3265 0 : PyErr_BadInternalCall();
3266 0 : return NULL;
3267 : }
3268 101578 : it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3269 101578 : if (it == NULL)
3270 0 : return NULL;
3271 101578 : it->it_index = 0;
3272 101578 : Py_INCREF(seq);
3273 101578 : it->it_seq = (PyBytesObject *)seq;
3274 101578 : _PyObject_GC_TRACK(it);
3275 101578 : return (PyObject *)it;
3276 : }
3277 :
3278 :
3279 : /* _PyBytesWriter API */
3280 :
3281 : #ifdef MS_WINDOWS
3282 : /* On Windows, overallocate by 50% is the best factor */
3283 : # define OVERALLOCATE_FACTOR 2
3284 : #else
3285 : /* On Linux, overallocate by 25% is the best factor */
3286 : # define OVERALLOCATE_FACTOR 4
3287 : #endif
3288 :
3289 : void
3290 3093770 : _PyBytesWriter_Init(_PyBytesWriter *writer)
3291 : {
3292 : /* Set all attributes before small_buffer to 0 */
3293 3093770 : memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3294 : #ifndef NDEBUG
3295 3093770 : memset(writer->small_buffer, PYMEM_CLEANBYTE,
3296 : sizeof(writer->small_buffer));
3297 : #endif
3298 3093770 : }
3299 :
3300 : void
3301 19216 : _PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3302 : {
3303 19216 : Py_CLEAR(writer->buffer);
3304 19216 : }
3305 :
3306 : Py_LOCAL_INLINE(char*)
3307 15200500 : _PyBytesWriter_AsString(_PyBytesWriter *writer)
3308 : {
3309 15200500 : if (writer->use_small_buffer) {
3310 9265570 : assert(writer->buffer == NULL);
3311 9265570 : return writer->small_buffer;
3312 : }
3313 5934950 : else if (writer->use_bytearray) {
3314 476 : assert(writer->buffer != NULL);
3315 476 : return PyByteArray_AS_STRING(writer->buffer);
3316 : }
3317 : else {
3318 5934470 : assert(writer->buffer != NULL);
3319 5934470 : return PyBytes_AS_STRING(writer->buffer);
3320 : }
3321 : }
3322 :
3323 : Py_LOCAL_INLINE(Py_ssize_t)
3324 4554390 : _PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3325 : {
3326 4554390 : const char *start = _PyBytesWriter_AsString(writer);
3327 4554390 : assert(str != NULL);
3328 4554390 : assert(str >= start);
3329 4554390 : assert(str - start <= writer->allocated);
3330 4554390 : return str - start;
3331 : }
3332 :
3333 : #ifndef NDEBUG
3334 : Py_LOCAL_INLINE(int)
3335 9166290 : _PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3336 : {
3337 : const char *start, *end;
3338 :
3339 9166290 : if (writer->use_small_buffer) {
3340 6186920 : assert(writer->buffer == NULL);
3341 : }
3342 : else {
3343 2979370 : assert(writer->buffer != NULL);
3344 2979370 : if (writer->use_bytearray)
3345 240 : assert(PyByteArray_CheckExact(writer->buffer));
3346 : else
3347 2979130 : assert(PyBytes_CheckExact(writer->buffer));
3348 2979370 : assert(Py_REFCNT(writer->buffer) == 1);
3349 : }
3350 :
3351 9166290 : if (writer->use_bytearray) {
3352 : /* bytearray has its own overallocation algorithm,
3353 : writer overallocation must be disabled */
3354 757 : assert(!writer->overallocate);
3355 : }
3356 :
3357 9166290 : assert(0 <= writer->allocated);
3358 9166290 : assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3359 : /* the last byte must always be null */
3360 9166290 : start = _PyBytesWriter_AsString(writer);
3361 9166290 : assert(start[writer->allocated] == 0);
3362 :
3363 9166290 : end = start + writer->allocated;
3364 9166290 : assert(str != NULL);
3365 9166290 : assert(start <= str && str <= end);
3366 9166290 : return 1;
3367 : }
3368 : #endif
3369 :
3370 : void*
3371 1479840 : _PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3372 : {
3373 : Py_ssize_t allocated, pos;
3374 :
3375 1479840 : assert(_PyBytesWriter_CheckConsistency(writer, str));
3376 1479840 : assert(writer->allocated < size);
3377 :
3378 1479840 : allocated = size;
3379 1479840 : if (writer->overallocate
3380 28437 : && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3381 : /* overallocate to limit the number of realloc() */
3382 28437 : allocated += allocated / OVERALLOCATE_FACTOR;
3383 : }
3384 :
3385 1479840 : pos = _PyBytesWriter_GetSize(writer, str);
3386 1479840 : if (!writer->use_small_buffer) {
3387 16979 : if (writer->use_bytearray) {
3388 2 : if (PyByteArray_Resize(writer->buffer, allocated))
3389 0 : goto error;
3390 : /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3391 : but we cannot use ob_alloc because bytes may need to be moved
3392 : to use the whole buffer. bytearray uses an internal optimization
3393 : to avoid moving or copying bytes when bytes are removed at the
3394 : beginning (ex: del bytearray[:1]). */
3395 : }
3396 : else {
3397 16977 : if (_PyBytes_Resize(&writer->buffer, allocated))
3398 0 : goto error;
3399 : }
3400 : }
3401 : else {
3402 : /* convert from stack buffer to bytes object buffer */
3403 1462870 : assert(writer->buffer == NULL);
3404 :
3405 1462870 : if (writer->use_bytearray)
3406 116 : writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3407 : else
3408 1462750 : writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3409 1462870 : if (writer->buffer == NULL)
3410 0 : goto error;
3411 :
3412 1462870 : if (pos != 0) {
3413 : char *dest;
3414 2614 : if (writer->use_bytearray)
3415 4 : dest = PyByteArray_AS_STRING(writer->buffer);
3416 : else
3417 2610 : dest = PyBytes_AS_STRING(writer->buffer);
3418 2614 : memcpy(dest,
3419 2614 : writer->small_buffer,
3420 : pos);
3421 : }
3422 :
3423 1462870 : writer->use_small_buffer = 0;
3424 : #ifndef NDEBUG
3425 1462870 : memset(writer->small_buffer, PYMEM_CLEANBYTE,
3426 : sizeof(writer->small_buffer));
3427 : #endif
3428 : }
3429 1479840 : writer->allocated = allocated;
3430 :
3431 1479840 : str = _PyBytesWriter_AsString(writer) + pos;
3432 1479840 : assert(_PyBytesWriter_CheckConsistency(writer, str));
3433 1479840 : return str;
3434 :
3435 0 : error:
3436 0 : _PyBytesWriter_Dealloc(writer);
3437 0 : return NULL;
3438 : }
3439 :
3440 : void*
3441 3132050 : _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3442 : {
3443 : Py_ssize_t new_min_size;
3444 :
3445 3132050 : assert(_PyBytesWriter_CheckConsistency(writer, str));
3446 3132050 : assert(size >= 0);
3447 :
3448 3132050 : if (size == 0) {
3449 : /* nothing to do */
3450 2697 : return str;
3451 : }
3452 :
3453 3129350 : if (writer->min_size > PY_SSIZE_T_MAX - size) {
3454 0 : PyErr_NoMemory();
3455 0 : _PyBytesWriter_Dealloc(writer);
3456 0 : return NULL;
3457 : }
3458 3129350 : new_min_size = writer->min_size + size;
3459 :
3460 3129350 : if (new_min_size > writer->allocated)
3461 1474520 : str = _PyBytesWriter_Resize(writer, str, new_min_size);
3462 :
3463 3129350 : writer->min_size = new_min_size;
3464 3129350 : return str;
3465 : }
3466 :
3467 : /* Allocate the buffer to write size bytes.
3468 : Return the pointer to the beginning of buffer data.
3469 : Raise an exception and return NULL on error. */
3470 : void*
3471 3093750 : _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3472 : {
3473 : /* ensure that _PyBytesWriter_Alloc() is only called once */
3474 3093750 : assert(writer->min_size == 0 && writer->buffer == NULL);
3475 3093750 : assert(size >= 0);
3476 :
3477 3093750 : writer->use_small_buffer = 1;
3478 : #ifndef NDEBUG
3479 3093750 : writer->allocated = sizeof(writer->small_buffer) - 1;
3480 : /* In debug mode, don't use the full small buffer because it is less
3481 : efficient than bytes and bytearray objects to detect buffer underflow
3482 : and buffer overflow. Use 10 bytes of the small buffer to test also
3483 : code using the smaller buffer in debug mode.
3484 :
3485 : Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3486 : in debug mode to also be able to detect stack overflow when running
3487 : tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3488 : if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3489 : stack overflow. */
3490 3093750 : writer->allocated = Py_MIN(writer->allocated, 10);
3491 : /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3492 : to detect buffer overflow */
3493 3093750 : writer->small_buffer[writer->allocated] = 0;
3494 : #else
3495 : writer->allocated = sizeof(writer->small_buffer);
3496 : #endif
3497 3093750 : return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3498 : }
3499 :
3500 : PyObject *
3501 3074550 : _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3502 : {
3503 : Py_ssize_t size;
3504 : PyObject *result;
3505 :
3506 3074550 : assert(_PyBytesWriter_CheckConsistency(writer, str));
3507 :
3508 3074550 : size = _PyBytesWriter_GetSize(writer, str);
3509 3074550 : if (size == 0 && !writer->use_bytearray) {
3510 1544 : Py_CLEAR(writer->buffer);
3511 : /* Get the empty byte string singleton */
3512 1544 : result = PyBytes_FromStringAndSize(NULL, 0);
3513 : }
3514 3073000 : else if (writer->use_small_buffer) {
3515 1614260 : if (writer->use_bytearray) {
3516 59 : result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3517 : }
3518 : else {
3519 1614200 : result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3520 : }
3521 : }
3522 : else {
3523 1458750 : result = writer->buffer;
3524 1458750 : writer->buffer = NULL;
3525 :
3526 1458750 : if (size != writer->allocated) {
3527 1377610 : if (writer->use_bytearray) {
3528 101 : if (PyByteArray_Resize(result, size)) {
3529 0 : Py_DECREF(result);
3530 0 : return NULL;
3531 : }
3532 : }
3533 : else {
3534 1377510 : if (_PyBytes_Resize(&result, size)) {
3535 0 : assert(result == NULL);
3536 0 : return NULL;
3537 : }
3538 : }
3539 : }
3540 : }
3541 3074550 : return result;
3542 : }
3543 :
3544 : void*
3545 27286 : _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3546 : const void *bytes, Py_ssize_t size)
3547 : {
3548 27286 : char *str = (char *)ptr;
3549 :
3550 27286 : str = _PyBytesWriter_Prepare(writer, str, size);
3551 27286 : if (str == NULL)
3552 0 : return NULL;
3553 :
3554 27286 : memcpy(str, bytes, size);
3555 27286 : str += size;
3556 :
3557 27286 : return str;
3558 : }
3559 :
3560 :
3561 : void
3562 425161 : _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
3563 : const char* src, Py_ssize_t len_src)
3564 : {
3565 425161 : if (len_dest == 0) {
3566 13599 : return;
3567 : }
3568 411562 : if (len_src == 1) {
3569 28726 : memset(dest, src[0], len_dest);
3570 : }
3571 : else {
3572 382836 : if (src != dest) {
3573 382822 : memcpy(dest, src, len_src);
3574 : }
3575 382836 : Py_ssize_t copied = len_src;
3576 1071030 : while (copied < len_dest) {
3577 688195 : Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
3578 688195 : memcpy(dest + copied, dest, bytes_to_copy);
3579 688195 : copied += bytes_to_copy;
3580 : }
3581 : }
3582 : }
3583 :
|