Line data Source code
1 : #define PY_SSIZE_T_CLEAN
2 : #include "Python.h"
3 : #include "pycore_abstract.h" // _PyIndex_Check()
4 : #include "pycore_bytes_methods.h"
5 :
6 : PyDoc_STRVAR_shared(_Py_isspace__doc__,
7 : "B.isspace() -> bool\n\
8 : \n\
9 : Return True if all characters in B are whitespace\n\
10 : and there is at least one character in B, False otherwise.");
11 :
12 : PyObject*
13 276 : _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
14 : {
15 276 : const unsigned char *p
16 : = (const unsigned char *) cptr;
17 : const unsigned char *e;
18 :
19 : /* Shortcut for single character strings */
20 276 : if (len == 1 && Py_ISSPACE(*p))
21 14 : Py_RETURN_TRUE;
22 :
23 : /* Special case for empty strings */
24 262 : if (len == 0)
25 2 : Py_RETURN_FALSE;
26 :
27 260 : e = p + len;
28 20892 : for (; p < e; p++) {
29 20888 : if (!Py_ISSPACE(*p))
30 256 : Py_RETURN_FALSE;
31 : }
32 4 : Py_RETURN_TRUE;
33 : }
34 :
35 :
36 : PyDoc_STRVAR_shared(_Py_isalpha__doc__,
37 : "B.isalpha() -> bool\n\
38 : \n\
39 : Return True if all characters in B are alphabetic\n\
40 : and there is at least one character in B, False otherwise.");
41 :
42 : PyObject*
43 274 : _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
44 : {
45 274 : const unsigned char *p
46 : = (const unsigned char *) cptr;
47 : const unsigned char *e;
48 :
49 : /* Shortcut for single character strings */
50 274 : if (len == 1 && Py_ISALPHA(*p))
51 56 : Py_RETURN_TRUE;
52 :
53 : /* Special case for empty strings */
54 218 : if (len == 0)
55 2 : Py_RETURN_FALSE;
56 :
57 216 : e = p + len;
58 20850 : for (; p < e; p++) {
59 20846 : if (!Py_ISALPHA(*p))
60 212 : Py_RETURN_FALSE;
61 : }
62 4 : Py_RETURN_TRUE;
63 : }
64 :
65 :
66 : PyDoc_STRVAR_shared(_Py_isalnum__doc__,
67 : "B.isalnum() -> bool\n\
68 : \n\
69 : Return True if all characters in B are alphanumeric\n\
70 : and there is at least one character in B, False otherwise.");
71 :
72 : PyObject*
73 276 : _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
74 : {
75 276 : const unsigned char *p
76 : = (const unsigned char *) cptr;
77 : const unsigned char *e;
78 :
79 : /* Shortcut for single character strings */
80 276 : if (len == 1 && Py_ISALNUM(*p))
81 66 : Py_RETURN_TRUE;
82 :
83 : /* Special case for empty strings */
84 210 : if (len == 0)
85 2 : Py_RETURN_FALSE;
86 :
87 208 : e = p + len;
88 20866 : for (; p < e; p++) {
89 20860 : if (!Py_ISALNUM(*p))
90 202 : Py_RETURN_FALSE;
91 : }
92 6 : Py_RETURN_TRUE;
93 : }
94 :
95 :
96 : PyDoc_STRVAR_shared(_Py_isascii__doc__,
97 : "B.isascii() -> bool\n\
98 : \n\
99 : Return True if B is empty or all characters in B are ASCII,\n\
100 : False otherwise.");
101 :
102 : // Optimization is copied from ascii_decode in unicodeobject.c
103 : /* Mask to quickly check whether a C 'size_t' contains a
104 : non-ASCII, UTF8-encoded char. */
105 : #if (SIZEOF_SIZE_T == 8)
106 : # define ASCII_CHAR_MASK 0x8080808080808080ULL
107 : #elif (SIZEOF_SIZE_T == 4)
108 : # define ASCII_CHAR_MASK 0x80808080U
109 : #else
110 : # error C 'size_t' size should be either 4 or 8!
111 : #endif
112 :
113 : PyObject*
114 76 : _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115 : {
116 76 : const char *p = cptr;
117 76 : const char *end = p + len;
118 :
119 226 : while (p < end) {
120 : /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
121 : for an explanation. */
122 190 : if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
123 : /* Help allocation */
124 74 : const char *_p = p;
125 94 : while (_p + SIZEOF_SIZE_T <= end) {
126 40 : size_t value = *(const size_t *) _p;
127 40 : if (value & ASCII_CHAR_MASK) {
128 20 : Py_RETURN_FALSE;
129 : }
130 20 : _p += SIZEOF_SIZE_T;
131 : }
132 54 : p = _p;
133 54 : if (_p == end)
134 4 : break;
135 : }
136 166 : if ((unsigned char)*p & 0x80) {
137 16 : Py_RETURN_FALSE;
138 : }
139 150 : p++;
140 : }
141 40 : Py_RETURN_TRUE;
142 : }
143 :
144 : #undef ASCII_CHAR_MASK
145 :
146 :
147 : PyDoc_STRVAR_shared(_Py_isdigit__doc__,
148 : "B.isdigit() -> bool\n\
149 : \n\
150 : Return True if all characters in B are digits\n\
151 : and there is at least one character in B, False otherwise.");
152 :
153 : PyObject*
154 270 : _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
155 : {
156 270 : const unsigned char *p
157 : = (const unsigned char *) cptr;
158 : const unsigned char *e;
159 :
160 : /* Shortcut for single character strings */
161 270 : if (len == 1 && Py_ISDIGIT(*p))
162 12 : Py_RETURN_TRUE;
163 :
164 : /* Special case for empty strings */
165 258 : if (len == 0)
166 2 : Py_RETURN_FALSE;
167 :
168 256 : e = p + len;
169 20908 : for (; p < e; p++) {
170 20904 : if (!Py_ISDIGIT(*p))
171 252 : Py_RETURN_FALSE;
172 : }
173 4 : Py_RETURN_TRUE;
174 : }
175 :
176 :
177 : PyDoc_STRVAR_shared(_Py_islower__doc__,
178 : "B.islower() -> bool\n\
179 : \n\
180 : Return True if all cased characters in B are lowercase and there is\n\
181 : at least one cased character in B, False otherwise.");
182 :
183 : PyObject*
184 274 : _Py_bytes_islower(const char *cptr, Py_ssize_t len)
185 : {
186 274 : const unsigned char *p
187 : = (const unsigned char *) cptr;
188 : const unsigned char *e;
189 : int cased;
190 :
191 : /* Shortcut for single character strings */
192 274 : if (len == 1)
193 262 : return PyBool_FromLong(Py_ISLOWER(*p));
194 :
195 : /* Special case for empty strings */
196 12 : if (len == 0)
197 2 : Py_RETURN_FALSE;
198 :
199 10 : e = p + len;
200 10 : cased = 0;
201 21518 : for (; p < e; p++) {
202 21512 : if (Py_ISUPPER(*p))
203 4 : Py_RETURN_FALSE;
204 21508 : else if (!cased && Py_ISLOWER(*p))
205 10 : cased = 1;
206 : }
207 6 : return PyBool_FromLong(cased);
208 : }
209 :
210 :
211 : PyDoc_STRVAR_shared(_Py_isupper__doc__,
212 : "B.isupper() -> bool\n\
213 : \n\
214 : Return True if all cased characters in B are uppercase and there is\n\
215 : at least one cased character in B, False otherwise.");
216 :
217 : PyObject*
218 274 : _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
219 : {
220 274 : const unsigned char *p
221 : = (const unsigned char *) cptr;
222 : const unsigned char *e;
223 : int cased;
224 :
225 : /* Shortcut for single character strings */
226 274 : if (len == 1)
227 262 : return PyBool_FromLong(Py_ISUPPER(*p));
228 :
229 : /* Special case for empty strings */
230 12 : if (len == 0)
231 2 : Py_RETURN_FALSE;
232 :
233 10 : e = p + len;
234 10 : cased = 0;
235 21866 : for (; p < e; p++) {
236 21860 : if (Py_ISLOWER(*p))
237 4 : Py_RETURN_FALSE;
238 21856 : else if (!cased && Py_ISUPPER(*p))
239 10 : cased = 1;
240 : }
241 6 : return PyBool_FromLong(cased);
242 : }
243 :
244 :
245 : PyDoc_STRVAR_shared(_Py_istitle__doc__,
246 : "B.istitle() -> bool\n\
247 : \n\
248 : Return True if B is a titlecased string and there is at least one\n\
249 : character in B, i.e. uppercase characters may only follow uncased\n\
250 : characters and lowercase characters only cased ones. Return False\n\
251 : otherwise.");
252 :
253 : PyObject*
254 28 : _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
255 : {
256 28 : const unsigned char *p
257 : = (const unsigned char *) cptr;
258 : const unsigned char *e;
259 : int cased, previous_is_cased;
260 :
261 : /* Shortcut for single character strings */
262 28 : if (len == 1)
263 6 : return PyBool_FromLong(Py_ISUPPER(*p));
264 :
265 : /* Special case for empty strings */
266 22 : if (len == 0)
267 2 : Py_RETURN_FALSE;
268 :
269 20 : e = p + len;
270 20 : cased = 0;
271 20 : previous_is_cased = 0;
272 31082 : for (; p < e; p++) {
273 31072 : const unsigned char ch = *p;
274 :
275 31072 : if (Py_ISUPPER(ch)) {
276 40 : if (previous_is_cased)
277 4 : Py_RETURN_FALSE;
278 36 : previous_is_cased = 1;
279 36 : cased = 1;
280 : }
281 31032 : else if (Py_ISLOWER(ch)) {
282 30974 : if (!previous_is_cased)
283 6 : Py_RETURN_FALSE;
284 30968 : previous_is_cased = 1;
285 30968 : cased = 1;
286 : }
287 : else
288 58 : previous_is_cased = 0;
289 : }
290 10 : return PyBool_FromLong(cased);
291 : }
292 :
293 :
294 : PyDoc_STRVAR_shared(_Py_lower__doc__,
295 : "B.lower() -> copy of B\n\
296 : \n\
297 : Return a copy of B with all ASCII characters converted to lowercase.");
298 :
299 : void
300 695200 : _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
301 : {
302 : Py_ssize_t i;
303 :
304 7707870 : for (i = 0; i < len; i++) {
305 7012670 : result[i] = Py_TOLOWER((unsigned char) cptr[i]);
306 : }
307 695200 : }
308 :
309 :
310 : PyDoc_STRVAR_shared(_Py_upper__doc__,
311 : "B.upper() -> copy of B\n\
312 : \n\
313 : Return a copy of B with all ASCII characters converted to uppercase.");
314 :
315 : void
316 64235 : _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
317 : {
318 : Py_ssize_t i;
319 :
320 243360 : for (i = 0; i < len; i++) {
321 179125 : result[i] = Py_TOUPPER((unsigned char) cptr[i]);
322 : }
323 64235 : }
324 :
325 :
326 : PyDoc_STRVAR_shared(_Py_title__doc__,
327 : "B.title() -> copy of B\n\
328 : \n\
329 : Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
330 : characters, all remaining cased characters have lowercase.");
331 :
332 : void
333 16 : _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
334 : {
335 : Py_ssize_t i;
336 16 : int previous_is_cased = 0;
337 :
338 10574 : for (i = 0; i < len; i++) {
339 10558 : int c = Py_CHARMASK(*s++);
340 10558 : if (Py_ISLOWER(c)) {
341 7302 : if (!previous_is_cased)
342 22 : c = Py_TOUPPER(c);
343 7302 : previous_is_cased = 1;
344 3256 : } else if (Py_ISUPPER(c)) {
345 3232 : if (previous_is_cased)
346 3222 : c = Py_TOLOWER(c);
347 3232 : previous_is_cased = 1;
348 : } else
349 24 : previous_is_cased = 0;
350 10558 : *result++ = c;
351 : }
352 16 : }
353 :
354 :
355 : PyDoc_STRVAR_shared(_Py_capitalize__doc__,
356 : "B.capitalize() -> copy of B\n\
357 : \n\
358 : Return a copy of B with only its first character capitalized (ASCII)\n\
359 : and the rest lower-cased.");
360 :
361 : void
362 14 : _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
363 : {
364 14 : if (len > 0) {
365 14 : *result = Py_TOUPPER(*s);
366 14 : _Py_bytes_lower(result + 1, s + 1, len - 1);
367 : }
368 14 : }
369 :
370 :
371 : PyDoc_STRVAR_shared(_Py_swapcase__doc__,
372 : "B.swapcase() -> copy of B\n\
373 : \n\
374 : Return a copy of B with uppercase ASCII characters converted\n\
375 : to lowercase ASCII and vice versa.");
376 :
377 : void
378 8 : _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
379 : {
380 : Py_ssize_t i;
381 :
382 10412 : for (i = 0; i < len; i++) {
383 10404 : int c = Py_CHARMASK(*s++);
384 10404 : if (Py_ISLOWER(c)) {
385 2410 : *result = Py_TOUPPER(c);
386 : }
387 7994 : else if (Py_ISUPPER(c)) {
388 3204 : *result = Py_TOLOWER(c);
389 : }
390 : else
391 4790 : *result = c;
392 10404 : result++;
393 : }
394 8 : }
395 :
396 :
397 : PyDoc_STRVAR_shared(_Py_maketrans__doc__,
398 : "B.maketrans(frm, to) -> translation table\n\
399 : \n\
400 : Return a translation table (a bytes object of length 256) suitable\n\
401 : for use in the bytes or bytearray translate method where each byte\n\
402 : in frm is mapped to the byte at the same position in to.\n\
403 : The bytes objects frm and to must be of the same length.");
404 :
405 : PyObject *
406 1053 : _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
407 : {
408 1053 : PyObject *res = NULL;
409 : Py_ssize_t i;
410 : char *p;
411 :
412 1053 : if (frm->len != to->len) {
413 2 : PyErr_Format(PyExc_ValueError,
414 : "maketrans arguments must have same length");
415 2 : return NULL;
416 : }
417 1051 : res = PyBytes_FromStringAndSize(NULL, 256);
418 1051 : if (!res)
419 0 : return NULL;
420 1051 : p = PyBytes_AS_STRING(res);
421 270107 : for (i = 0; i < 256; i++)
422 269056 : p[i] = (char) i;
423 3231 : for (i = 0; i < frm->len; i++) {
424 2180 : p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
425 : }
426 :
427 1051 : return res;
428 : }
429 :
430 : #define FASTSEARCH fastsearch
431 : #define STRINGLIB(F) stringlib_##F
432 : #define STRINGLIB_CHAR char
433 : #define STRINGLIB_SIZEOF_CHAR 1
434 : #define STRINGLIB_FAST_MEMCHR memchr
435 :
436 : #include "stringlib/fastsearch.h"
437 : #include "stringlib/count.h"
438 : #include "stringlib/find.h"
439 :
440 : /*
441 : Wraps stringlib_parse_args_finds() and additionally checks the first
442 : argument type.
443 :
444 : In case the first argument is a bytes-like object, sets it to subobj,
445 : and doesn't touch the byte parameter.
446 : In case it is an integer in range(0, 256), writes the integer value
447 : to byte, and sets subobj to NULL.
448 :
449 : The other parameters are similar to those of
450 : stringlib_parse_args_finds().
451 : */
452 :
453 : Py_LOCAL_INLINE(int)
454 1377020 : parse_args_finds_byte(const char *function_name, PyObject *args,
455 : PyObject **subobj, char *byte,
456 : Py_ssize_t *start, Py_ssize_t *end)
457 : {
458 : PyObject *tmp_subobj;
459 : Py_ssize_t ival;
460 :
461 1377020 : if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
462 : start, end))
463 20 : return 0;
464 :
465 1377000 : if (PyObject_CheckBuffer(tmp_subobj)) {
466 910652 : *subobj = tmp_subobj;
467 910652 : return 1;
468 : }
469 :
470 466350 : if (!_PyIndex_Check(tmp_subobj)) {
471 0 : PyErr_Format(PyExc_TypeError,
472 : "argument should be integer or bytes-like object, "
473 : "not '%.200s'",
474 0 : Py_TYPE(tmp_subobj)->tp_name);
475 0 : return 0;
476 : }
477 :
478 466350 : ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
479 466350 : if (ival == -1 && PyErr_Occurred()) {
480 0 : return 0;
481 : }
482 466350 : if (ival < 0 || ival > 255) {
483 36 : PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
484 36 : return 0;
485 : }
486 :
487 466314 : *subobj = NULL;
488 466314 : *byte = (char)ival;
489 466314 : return 1;
490 : }
491 :
492 : /* helper macro to fixup start/end slice values */
493 : #define ADJUST_INDICES(start, end, len) \
494 : if (end > len) \
495 : end = len; \
496 : else if (end < 0) { \
497 : end += len; \
498 : if (end < 0) \
499 : end = 0; \
500 : } \
501 : if (start < 0) { \
502 : start += len; \
503 : if (start < 0) \
504 : start = 0; \
505 : }
506 :
507 : Py_LOCAL_INLINE(Py_ssize_t)
508 1232100 : find_internal(const char *str, Py_ssize_t len,
509 : const char *function_name, PyObject *args, int dir)
510 : {
511 : PyObject *subobj;
512 : char byte;
513 : Py_buffer subbuf;
514 : const char *sub;
515 : Py_ssize_t sub_len;
516 1232100 : Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
517 : Py_ssize_t res;
518 :
519 1232100 : if (!parse_args_finds_byte(function_name, args,
520 : &subobj, &byte, &start, &end))
521 46 : return -2;
522 :
523 1232060 : if (subobj) {
524 765755 : if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
525 0 : return -2;
526 :
527 765755 : sub = subbuf.buf;
528 765755 : sub_len = subbuf.len;
529 : }
530 : else {
531 466300 : sub = &byte;
532 466300 : sub_len = 1;
533 : }
534 :
535 1232060 : ADJUST_INDICES(start, end, len);
536 1232060 : if (end - start < sub_len)
537 132416 : res = -1;
538 1099640 : else if (sub_len == 1) {
539 701860 : if (dir > 0)
540 693700 : res = stringlib_find_char(
541 : str + start, end - start,
542 693700 : *sub);
543 : else
544 8160 : res = stringlib_rfind_char(
545 : str + start, end - start,
546 8160 : *sub);
547 701860 : if (res >= 0)
548 566843 : res += start;
549 : }
550 : else {
551 397779 : if (dir > 0)
552 200476 : res = stringlib_find_slice(
553 : str, len,
554 : sub, sub_len, start, end);
555 : else
556 197303 : res = stringlib_rfind_slice(
557 : str, len,
558 : sub, sub_len, start, end);
559 : }
560 :
561 1232060 : if (subobj)
562 765755 : PyBuffer_Release(&subbuf);
563 :
564 1232060 : return res;
565 : }
566 :
567 : PyDoc_STRVAR_shared(_Py_find__doc__,
568 : "B.find(sub[, start[, end]]) -> int\n\
569 : \n\
570 : Return the lowest index in B where subsection sub is found,\n\
571 : such that sub is contained within B[start,end]. Optional\n\
572 : arguments start and end are interpreted as in slice notation.\n\
573 : \n\
574 : Return -1 on failure.");
575 :
576 : PyObject *
577 960389 : _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
578 : {
579 960389 : Py_ssize_t result = find_internal(str, len, "find", args, +1);
580 960389 : if (result == -2)
581 16 : return NULL;
582 960373 : return PyLong_FromSsize_t(result);
583 : }
584 :
585 : PyDoc_STRVAR_shared(_Py_index__doc__,
586 : "B.index(sub[, start[, end]]) -> int\n\
587 : \n\
588 : Return the lowest index in B where subsection sub is found,\n\
589 : such that sub is contained within B[start,end]. Optional\n\
590 : arguments start and end are interpreted as in slice notation.\n\
591 : \n\
592 : Raises ValueError when the subsection is not found.");
593 :
594 : PyObject *
595 137 : _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
596 : {
597 137 : Py_ssize_t result = find_internal(str, len, "index", args, +1);
598 137 : if (result == -2)
599 10 : return NULL;
600 127 : if (result == -1) {
601 33 : PyErr_SetString(PyExc_ValueError,
602 : "subsection not found");
603 33 : return NULL;
604 : }
605 94 : return PyLong_FromSsize_t(result);
606 : }
607 :
608 : PyDoc_STRVAR_shared(_Py_rfind__doc__,
609 : "B.rfind(sub[, start[, end]]) -> int\n\
610 : \n\
611 : Return the highest index in B where subsection sub is found,\n\
612 : such that sub is contained within B[start,end]. Optional\n\
613 : arguments start and end are interpreted as in slice notation.\n\
614 : \n\
615 : Return -1 on failure.");
616 :
617 : PyObject *
618 271481 : _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
619 : {
620 271481 : Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
621 271481 : if (result == -2)
622 10 : return NULL;
623 271471 : return PyLong_FromSsize_t(result);
624 : }
625 :
626 : PyDoc_STRVAR_shared(_Py_rindex__doc__,
627 : "B.rindex(sub[, start[, end]]) -> int\n\
628 : \n\
629 : Return the highest index in B where subsection sub is found,\n\
630 : such that sub is contained within B[start,end]. Optional\n\
631 : arguments start and end are interpreted as in slice notation.\n\
632 : \n\
633 : Raise ValueError when the subsection is not found.");
634 :
635 : PyObject *
636 94 : _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
637 : {
638 94 : Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
639 94 : if (result == -2)
640 10 : return NULL;
641 84 : if (result == -1) {
642 24 : PyErr_SetString(PyExc_ValueError,
643 : "subsection not found");
644 24 : return NULL;
645 : }
646 60 : return PyLong_FromSsize_t(result);
647 : }
648 :
649 : PyDoc_STRVAR_shared(_Py_count__doc__,
650 : "B.count(sub[, start[, end]]) -> int\n\
651 : \n\
652 : Return the number of non-overlapping occurrences of subsection sub in\n\
653 : bytes B[start:end]. Optional arguments start and end are interpreted\n\
654 : as in slice notation.");
655 :
656 : PyObject *
657 144921 : _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
658 : {
659 : PyObject *sub_obj;
660 : const char *sub;
661 : Py_ssize_t sub_len;
662 : char byte;
663 144921 : Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
664 :
665 : Py_buffer vsub;
666 : PyObject *count_obj;
667 :
668 144921 : if (!parse_args_finds_byte("count", args,
669 : &sub_obj, &byte, &start, &end))
670 10 : return NULL;
671 :
672 144911 : if (sub_obj) {
673 144897 : if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
674 0 : return NULL;
675 :
676 144897 : sub = vsub.buf;
677 144897 : sub_len = vsub.len;
678 : }
679 : else {
680 14 : sub = &byte;
681 14 : sub_len = 1;
682 : }
683 :
684 144911 : ADJUST_INDICES(start, end, len);
685 :
686 144911 : count_obj = PyLong_FromSsize_t(
687 : stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
688 : );
689 :
690 144911 : if (sub_obj)
691 144897 : PyBuffer_Release(&vsub);
692 :
693 144911 : return count_obj;
694 : }
695 :
696 : int
697 760293 : _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
698 : {
699 760293 : Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
700 760293 : if (ival == -1 && PyErr_Occurred()) {
701 : Py_buffer varg;
702 : Py_ssize_t pos;
703 753185 : PyErr_Clear();
704 753185 : if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
705 9 : return -1;
706 753176 : pos = stringlib_find(str, len,
707 753176 : varg.buf, varg.len, 0);
708 753176 : PyBuffer_Release(&varg);
709 753176 : return pos >= 0;
710 : }
711 7108 : if (ival < 0 || ival >= 256) {
712 6 : PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
713 6 : return -1;
714 : }
715 :
716 7102 : return memchr(str, (int) ival, len) != NULL;
717 : }
718 :
719 :
720 : /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
721 : * against substr, using the start and end arguments. Returns
722 : * -1 on error, 0 if not found and 1 if found.
723 : */
724 : static int
725 203762 : tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
726 : Py_ssize_t start, Py_ssize_t end, int direction)
727 : {
728 203762 : Py_buffer sub_view = {NULL, NULL};
729 : const char *sub;
730 : Py_ssize_t slen;
731 :
732 203762 : if (PyBytes_Check(substr)) {
733 203733 : sub = PyBytes_AS_STRING(substr);
734 203733 : slen = PyBytes_GET_SIZE(substr);
735 : }
736 : else {
737 29 : if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
738 5 : return -1;
739 24 : sub = sub_view.buf;
740 24 : slen = sub_view.len;
741 : }
742 :
743 203757 : ADJUST_INDICES(start, end, len);
744 :
745 203757 : if (direction < 0) {
746 : /* startswith */
747 60508 : if (start > len - slen)
748 892 : goto notfound;
749 : } else {
750 : /* endswith */
751 143249 : if (end - start < slen || start > len)
752 651 : goto notfound;
753 :
754 142598 : if (end - slen > start)
755 141287 : start = end - slen;
756 : }
757 202214 : if (end - start < slen)
758 0 : goto notfound;
759 202214 : if (memcmp(str + start, sub, slen) != 0)
760 116840 : goto notfound;
761 :
762 85374 : PyBuffer_Release(&sub_view);
763 85374 : return 1;
764 :
765 118383 : notfound:
766 118383 : PyBuffer_Release(&sub_view);
767 118383 : return 0;
768 : }
769 :
770 : static PyObject *
771 203549 : _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
772 : const char *function_name, PyObject *args,
773 : int direction)
774 : {
775 203549 : Py_ssize_t start = 0;
776 203549 : Py_ssize_t end = PY_SSIZE_T_MAX;
777 : PyObject *subobj;
778 : int result;
779 :
780 203549 : if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
781 4 : return NULL;
782 203545 : if (PyTuple_Check(subobj)) {
783 : Py_ssize_t i;
784 651 : for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
785 441 : result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
786 : start, end, direction);
787 441 : if (result == -1)
788 0 : return NULL;
789 441 : else if (result) {
790 14 : Py_RETURN_TRUE;
791 : }
792 : }
793 210 : Py_RETURN_FALSE;
794 : }
795 203321 : result = tailmatch(str, len, subobj, start, end, direction);
796 203321 : if (result == -1) {
797 5 : if (PyErr_ExceptionMatches(PyExc_TypeError))
798 5 : PyErr_Format(PyExc_TypeError,
799 : "%s first arg must be bytes or a tuple of bytes, "
800 : "not %s",
801 5 : function_name, Py_TYPE(subobj)->tp_name);
802 5 : return NULL;
803 : }
804 : else
805 203316 : return PyBool_FromLong(result);
806 : }
807 :
808 : PyDoc_STRVAR_shared(_Py_startswith__doc__,
809 : "B.startswith(prefix[, start[, end]]) -> bool\n\
810 : \n\
811 : Return True if B starts with the specified prefix, False otherwise.\n\
812 : With optional start, test B beginning at that position.\n\
813 : With optional end, stop comparing B at that position.\n\
814 : prefix can also be a tuple of bytes to try.");
815 :
816 : PyObject *
817 60296 : _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
818 : {
819 60296 : return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
820 : }
821 :
822 : PyDoc_STRVAR_shared(_Py_endswith__doc__,
823 : "B.endswith(suffix[, start[, end]]) -> bool\n\
824 : \n\
825 : Return True if B ends with the specified suffix, False otherwise.\n\
826 : With optional start, test B beginning at that position.\n\
827 : With optional end, stop comparing B at that position.\n\
828 : suffix can also be a tuple of bytes to try.");
829 :
830 : PyObject *
831 143253 : _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
832 : {
833 143253 : return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
834 : }
|