|           Line data    Source code 
       1             : #define PY_SSIZE_T_CLEAN
       2             : #include "Python.h"
       3             : #include "pycore_abstract.h"   // _PyIndex_Check()
       4             : #include "pycore_bytes_methods.h"
       5             : 
       6             : PyDoc_STRVAR_shared(_Py_isspace__doc__,
       7             : "B.isspace() -> bool\n\
       8             : \n\
       9             : Return True if all characters in B are whitespace\n\
      10             : and there is at least one character in B, False otherwise.");
      11             : 
      12             : PyObject*
      13         276 : _Py_bytes_isspace(const char *cptr, Py_ssize_t len)
      14             : {
      15         276 :     const unsigned char *p
      16             :         = (const unsigned char *) cptr;
      17             :     const unsigned char *e;
      18             : 
      19             :     /* Shortcut for single character strings */
      20         276 :     if (len == 1 && Py_ISSPACE(*p))
      21          14 :         Py_RETURN_TRUE;
      22             : 
      23             :     /* Special case for empty strings */
      24         262 :     if (len == 0)
      25           2 :         Py_RETURN_FALSE;
      26             : 
      27         260 :     e = p + len;
      28       20892 :     for (; p < e; p++) {
      29       20888 :         if (!Py_ISSPACE(*p))
      30         256 :             Py_RETURN_FALSE;
      31             :     }
      32           4 :     Py_RETURN_TRUE;
      33             : }
      34             : 
      35             : 
      36             : PyDoc_STRVAR_shared(_Py_isalpha__doc__,
      37             : "B.isalpha() -> bool\n\
      38             : \n\
      39             : Return True if all characters in B are alphabetic\n\
      40             : and there is at least one character in B, False otherwise.");
      41             : 
      42             : PyObject*
      43         274 : _Py_bytes_isalpha(const char *cptr, Py_ssize_t len)
      44             : {
      45         274 :     const unsigned char *p
      46             :         = (const unsigned char *) cptr;
      47             :     const unsigned char *e;
      48             : 
      49             :     /* Shortcut for single character strings */
      50         274 :     if (len == 1 && Py_ISALPHA(*p))
      51          56 :         Py_RETURN_TRUE;
      52             : 
      53             :     /* Special case for empty strings */
      54         218 :     if (len == 0)
      55           2 :         Py_RETURN_FALSE;
      56             : 
      57         216 :     e = p + len;
      58       20850 :     for (; p < e; p++) {
      59       20846 :         if (!Py_ISALPHA(*p))
      60         212 :             Py_RETURN_FALSE;
      61             :     }
      62           4 :     Py_RETURN_TRUE;
      63             : }
      64             : 
      65             : 
      66             : PyDoc_STRVAR_shared(_Py_isalnum__doc__,
      67             : "B.isalnum() -> bool\n\
      68             : \n\
      69             : Return True if all characters in B are alphanumeric\n\
      70             : and there is at least one character in B, False otherwise.");
      71             : 
      72             : PyObject*
      73         276 : _Py_bytes_isalnum(const char *cptr, Py_ssize_t len)
      74             : {
      75         276 :     const unsigned char *p
      76             :         = (const unsigned char *) cptr;
      77             :     const unsigned char *e;
      78             : 
      79             :     /* Shortcut for single character strings */
      80         276 :     if (len == 1 && Py_ISALNUM(*p))
      81          66 :         Py_RETURN_TRUE;
      82             : 
      83             :     /* Special case for empty strings */
      84         210 :     if (len == 0)
      85           2 :         Py_RETURN_FALSE;
      86             : 
      87         208 :     e = p + len;
      88       20866 :     for (; p < e; p++) {
      89       20860 :         if (!Py_ISALNUM(*p))
      90         202 :             Py_RETURN_FALSE;
      91             :     }
      92           6 :     Py_RETURN_TRUE;
      93             : }
      94             : 
      95             : 
      96             : PyDoc_STRVAR_shared(_Py_isascii__doc__,
      97             : "B.isascii() -> bool\n\
      98             : \n\
      99             : Return True if B is empty or all characters in B are ASCII,\n\
     100             : False otherwise.");
     101             : 
     102             : // Optimization is copied from ascii_decode in unicodeobject.c
     103             : /* Mask to quickly check whether a C 'size_t' contains a
     104             :    non-ASCII, UTF8-encoded char. */
     105             : #if (SIZEOF_SIZE_T == 8)
     106             : # define ASCII_CHAR_MASK 0x8080808080808080ULL
     107             : #elif (SIZEOF_SIZE_T == 4)
     108             : # define ASCII_CHAR_MASK 0x80808080U
     109             : #else
     110             : # error C 'size_t' size should be either 4 or 8!
     111             : #endif
     112             : 
     113             : PyObject*
     114          76 : _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
     115             : {
     116          76 :     const char *p = cptr;
     117          76 :     const char *end = p + len;
     118             : 
     119         226 :     while (p < end) {
     120             :         /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
     121             :            for an explanation. */
     122         190 :         if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
     123             :             /* Help allocation */
     124          74 :             const char *_p = p;
     125          94 :             while (_p + SIZEOF_SIZE_T <= end) {
     126          40 :                 size_t value = *(const size_t *) _p;
     127          40 :                 if (value & ASCII_CHAR_MASK) {
     128          20 :                     Py_RETURN_FALSE;
     129             :                 }
     130          20 :                 _p += SIZEOF_SIZE_T;
     131             :             }
     132          54 :             p = _p;
     133          54 :             if (_p == end)
     134           4 :                 break;
     135             :         }
     136         166 :         if ((unsigned char)*p & 0x80) {
     137          16 :             Py_RETURN_FALSE;
     138             :         }
     139         150 :         p++;
     140             :     }
     141          40 :     Py_RETURN_TRUE;
     142             : }
     143             : 
     144             : #undef ASCII_CHAR_MASK
     145             : 
     146             : 
     147             : PyDoc_STRVAR_shared(_Py_isdigit__doc__,
     148             : "B.isdigit() -> bool\n\
     149             : \n\
     150             : Return True if all characters in B are digits\n\
     151             : and there is at least one character in B, False otherwise.");
     152             : 
     153             : PyObject*
     154         270 : _Py_bytes_isdigit(const char *cptr, Py_ssize_t len)
     155             : {
     156         270 :     const unsigned char *p
     157             :         = (const unsigned char *) cptr;
     158             :     const unsigned char *e;
     159             : 
     160             :     /* Shortcut for single character strings */
     161         270 :     if (len == 1 && Py_ISDIGIT(*p))
     162          12 :         Py_RETURN_TRUE;
     163             : 
     164             :     /* Special case for empty strings */
     165         258 :     if (len == 0)
     166           2 :         Py_RETURN_FALSE;
     167             : 
     168         256 :     e = p + len;
     169       20908 :     for (; p < e; p++) {
     170       20904 :         if (!Py_ISDIGIT(*p))
     171         252 :             Py_RETURN_FALSE;
     172             :     }
     173           4 :     Py_RETURN_TRUE;
     174             : }
     175             : 
     176             : 
     177             : PyDoc_STRVAR_shared(_Py_islower__doc__,
     178             : "B.islower() -> bool\n\
     179             : \n\
     180             : Return True if all cased characters in B are lowercase and there is\n\
     181             : at least one cased character in B, False otherwise.");
     182             : 
     183             : PyObject*
     184         274 : _Py_bytes_islower(const char *cptr, Py_ssize_t len)
     185             : {
     186         274 :     const unsigned char *p
     187             :         = (const unsigned char *) cptr;
     188             :     const unsigned char *e;
     189             :     int cased;
     190             : 
     191             :     /* Shortcut for single character strings */
     192         274 :     if (len == 1)
     193         262 :         return PyBool_FromLong(Py_ISLOWER(*p));
     194             : 
     195             :     /* Special case for empty strings */
     196          12 :     if (len == 0)
     197           2 :         Py_RETURN_FALSE;
     198             : 
     199          10 :     e = p + len;
     200          10 :     cased = 0;
     201       21518 :     for (; p < e; p++) {
     202       21512 :         if (Py_ISUPPER(*p))
     203           4 :             Py_RETURN_FALSE;
     204       21508 :         else if (!cased && Py_ISLOWER(*p))
     205          10 :             cased = 1;
     206             :     }
     207           6 :     return PyBool_FromLong(cased);
     208             : }
     209             : 
     210             : 
     211             : PyDoc_STRVAR_shared(_Py_isupper__doc__,
     212             : "B.isupper() -> bool\n\
     213             : \n\
     214             : Return True if all cased characters in B are uppercase and there is\n\
     215             : at least one cased character in B, False otherwise.");
     216             : 
     217             : PyObject*
     218         274 : _Py_bytes_isupper(const char *cptr, Py_ssize_t len)
     219             : {
     220         274 :     const unsigned char *p
     221             :         = (const unsigned char *) cptr;
     222             :     const unsigned char *e;
     223             :     int cased;
     224             : 
     225             :     /* Shortcut for single character strings */
     226         274 :     if (len == 1)
     227         262 :         return PyBool_FromLong(Py_ISUPPER(*p));
     228             : 
     229             :     /* Special case for empty strings */
     230          12 :     if (len == 0)
     231           2 :         Py_RETURN_FALSE;
     232             : 
     233          10 :     e = p + len;
     234          10 :     cased = 0;
     235       21866 :     for (; p < e; p++) {
     236       21860 :         if (Py_ISLOWER(*p))
     237           4 :             Py_RETURN_FALSE;
     238       21856 :         else if (!cased && Py_ISUPPER(*p))
     239          10 :             cased = 1;
     240             :     }
     241           6 :     return PyBool_FromLong(cased);
     242             : }
     243             : 
     244             : 
     245             : PyDoc_STRVAR_shared(_Py_istitle__doc__,
     246             : "B.istitle() -> bool\n\
     247             : \n\
     248             : Return True if B is a titlecased string and there is at least one\n\
     249             : character in B, i.e. uppercase characters may only follow uncased\n\
     250             : characters and lowercase characters only cased ones. Return False\n\
     251             : otherwise.");
     252             : 
     253             : PyObject*
     254          28 : _Py_bytes_istitle(const char *cptr, Py_ssize_t len)
     255             : {
     256          28 :     const unsigned char *p
     257             :         = (const unsigned char *) cptr;
     258             :     const unsigned char *e;
     259             :     int cased, previous_is_cased;
     260             : 
     261             :     /* Shortcut for single character strings */
     262          28 :     if (len == 1)
     263           6 :         return PyBool_FromLong(Py_ISUPPER(*p));
     264             : 
     265             :     /* Special case for empty strings */
     266          22 :     if (len == 0)
     267           2 :         Py_RETURN_FALSE;
     268             : 
     269          20 :     e = p + len;
     270          20 :     cased = 0;
     271          20 :     previous_is_cased = 0;
     272       31082 :     for (; p < e; p++) {
     273       31072 :         const unsigned char ch = *p;
     274             : 
     275       31072 :         if (Py_ISUPPER(ch)) {
     276          40 :             if (previous_is_cased)
     277           4 :                 Py_RETURN_FALSE;
     278          36 :             previous_is_cased = 1;
     279          36 :             cased = 1;
     280             :         }
     281       31032 :         else if (Py_ISLOWER(ch)) {
     282       30974 :             if (!previous_is_cased)
     283           6 :                 Py_RETURN_FALSE;
     284       30968 :             previous_is_cased = 1;
     285       30968 :             cased = 1;
     286             :         }
     287             :         else
     288          58 :             previous_is_cased = 0;
     289             :     }
     290          10 :     return PyBool_FromLong(cased);
     291             : }
     292             : 
     293             : 
     294             : PyDoc_STRVAR_shared(_Py_lower__doc__,
     295             : "B.lower() -> copy of B\n\
     296             : \n\
     297             : Return a copy of B with all ASCII characters converted to lowercase.");
     298             : 
     299             : void
     300      695200 : _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len)
     301             : {
     302             :     Py_ssize_t i;
     303             : 
     304     7707870 :     for (i = 0; i < len; i++) {
     305     7012670 :         result[i] = Py_TOLOWER((unsigned char) cptr[i]);
     306             :     }
     307      695200 : }
     308             : 
     309             : 
     310             : PyDoc_STRVAR_shared(_Py_upper__doc__,
     311             : "B.upper() -> copy of B\n\
     312             : \n\
     313             : Return a copy of B with all ASCII characters converted to uppercase.");
     314             : 
     315             : void
     316       64235 : _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len)
     317             : {
     318             :     Py_ssize_t i;
     319             : 
     320      243360 :     for (i = 0; i < len; i++) {
     321      179125 :         result[i] = Py_TOUPPER((unsigned char) cptr[i]);
     322             :     }
     323       64235 : }
     324             : 
     325             : 
     326             : PyDoc_STRVAR_shared(_Py_title__doc__,
     327             : "B.title() -> copy of B\n\
     328             : \n\
     329             : Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
     330             : characters, all remaining cased characters have lowercase.");
     331             : 
     332             : void
     333          16 : _Py_bytes_title(char *result, const char *s, Py_ssize_t len)
     334             : {
     335             :     Py_ssize_t i;
     336          16 :     int previous_is_cased = 0;
     337             : 
     338       10574 :     for (i = 0; i < len; i++) {
     339       10558 :         int c = Py_CHARMASK(*s++);
     340       10558 :         if (Py_ISLOWER(c)) {
     341        7302 :             if (!previous_is_cased)
     342          22 :                 c = Py_TOUPPER(c);
     343        7302 :             previous_is_cased = 1;
     344        3256 :         } else if (Py_ISUPPER(c)) {
     345        3232 :             if (previous_is_cased)
     346        3222 :                 c = Py_TOLOWER(c);
     347        3232 :             previous_is_cased = 1;
     348             :         } else
     349          24 :             previous_is_cased = 0;
     350       10558 :         *result++ = c;
     351             :     }
     352          16 : }
     353             : 
     354             : 
     355             : PyDoc_STRVAR_shared(_Py_capitalize__doc__,
     356             : "B.capitalize() -> copy of B\n\
     357             : \n\
     358             : Return a copy of B with only its first character capitalized (ASCII)\n\
     359             : and the rest lower-cased.");
     360             : 
     361             : void
     362          14 : _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len)
     363             : {
     364          14 :     if (len > 0) {
     365          14 :         *result = Py_TOUPPER(*s);
     366          14 :         _Py_bytes_lower(result + 1, s + 1, len - 1);
     367             :     }
     368          14 : }
     369             : 
     370             : 
     371             : PyDoc_STRVAR_shared(_Py_swapcase__doc__,
     372             : "B.swapcase() -> copy of B\n\
     373             : \n\
     374             : Return a copy of B with uppercase ASCII characters converted\n\
     375             : to lowercase ASCII and vice versa.");
     376             : 
     377             : void
     378           8 : _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len)
     379             : {
     380             :     Py_ssize_t i;
     381             : 
     382       10412 :     for (i = 0; i < len; i++) {
     383       10404 :         int c = Py_CHARMASK(*s++);
     384       10404 :         if (Py_ISLOWER(c)) {
     385        2410 :             *result = Py_TOUPPER(c);
     386             :         }
     387        7994 :         else if (Py_ISUPPER(c)) {
     388        3204 :             *result = Py_TOLOWER(c);
     389             :         }
     390             :         else
     391        4790 :             *result = c;
     392       10404 :         result++;
     393             :     }
     394           8 : }
     395             : 
     396             : 
     397             : PyDoc_STRVAR_shared(_Py_maketrans__doc__,
     398             : "B.maketrans(frm, to) -> translation table\n\
     399             : \n\
     400             : Return a translation table (a bytes object of length 256) suitable\n\
     401             : for use in the bytes or bytearray translate method where each byte\n\
     402             : in frm is mapped to the byte at the same position in to.\n\
     403             : The bytes objects frm and to must be of the same length.");
     404             : 
     405             : PyObject *
     406        1053 : _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to)
     407             : {
     408        1053 :     PyObject *res = NULL;
     409             :     Py_ssize_t i;
     410             :     char *p;
     411             : 
     412        1053 :     if (frm->len != to->len) {
     413           2 :         PyErr_Format(PyExc_ValueError,
     414             :                      "maketrans arguments must have same length");
     415           2 :         return NULL;
     416             :     }
     417        1051 :     res = PyBytes_FromStringAndSize(NULL, 256);
     418        1051 :     if (!res)
     419           0 :         return NULL;
     420        1051 :     p = PyBytes_AS_STRING(res);
     421      270107 :     for (i = 0; i < 256; i++)
     422      269056 :         p[i] = (char) i;
     423        3231 :     for (i = 0; i < frm->len; i++) {
     424        2180 :         p[((unsigned char *)frm->buf)[i]] = ((char *)to->buf)[i];
     425             :     }
     426             : 
     427        1051 :     return res;
     428             : }
     429             : 
     430             : #define FASTSEARCH fastsearch
     431             : #define STRINGLIB(F) stringlib_##F
     432             : #define STRINGLIB_CHAR char
     433             : #define STRINGLIB_SIZEOF_CHAR 1
     434             : #define STRINGLIB_FAST_MEMCHR memchr
     435             : 
     436             : #include "stringlib/fastsearch.h"
     437             : #include "stringlib/count.h"
     438             : #include "stringlib/find.h"
     439             : 
     440             : /*
     441             : Wraps stringlib_parse_args_finds() and additionally checks the first
     442             : argument type.
     443             : 
     444             : In case the first argument is a bytes-like object, sets it to subobj,
     445             : and doesn't touch the byte parameter.
     446             : In case it is an integer in range(0, 256), writes the integer value
     447             : to byte, and sets subobj to NULL.
     448             : 
     449             : The other parameters are similar to those of
     450             : stringlib_parse_args_finds().
     451             : */
     452             : 
     453             : Py_LOCAL_INLINE(int)
     454     1377020 : parse_args_finds_byte(const char *function_name, PyObject *args,
     455             :                       PyObject **subobj, char *byte,
     456             :                       Py_ssize_t *start, Py_ssize_t *end)
     457             : {
     458             :     PyObject *tmp_subobj;
     459             :     Py_ssize_t ival;
     460             : 
     461     1377020 :     if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
     462             :                                    start, end))
     463          20 :         return 0;
     464             : 
     465     1377000 :     if (PyObject_CheckBuffer(tmp_subobj)) {
     466      910652 :         *subobj = tmp_subobj;
     467      910652 :         return 1;
     468             :     }
     469             : 
     470      466350 :     if (!_PyIndex_Check(tmp_subobj)) {
     471           0 :         PyErr_Format(PyExc_TypeError,
     472             :                      "argument should be integer or bytes-like object, "
     473             :                      "not '%.200s'",
     474           0 :                      Py_TYPE(tmp_subobj)->tp_name);
     475           0 :         return 0;
     476             :     }
     477             : 
     478      466350 :     ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
     479      466350 :     if (ival == -1 && PyErr_Occurred()) {
     480           0 :         return 0;
     481             :     }
     482      466350 :     if (ival < 0 || ival > 255) {
     483          36 :         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
     484          36 :         return 0;
     485             :     }
     486             : 
     487      466314 :     *subobj = NULL;
     488      466314 :     *byte = (char)ival;
     489      466314 :     return 1;
     490             : }
     491             : 
     492             : /* helper macro to fixup start/end slice values */
     493             : #define ADJUST_INDICES(start, end, len)         \
     494             :     if (end > len)                          \
     495             :         end = len;                          \
     496             :     else if (end < 0) {                     \
     497             :         end += len;                         \
     498             :         if (end < 0)                        \
     499             :         end = 0;                        \
     500             :     }                                       \
     501             :     if (start < 0) {                        \
     502             :         start += len;                       \
     503             :         if (start < 0)                      \
     504             :         start = 0;                      \
     505             :     }
     506             : 
     507             : Py_LOCAL_INLINE(Py_ssize_t)
     508     1232100 : find_internal(const char *str, Py_ssize_t len,
     509             :               const char *function_name, PyObject *args, int dir)
     510             : {
     511             :     PyObject *subobj;
     512             :     char byte;
     513             :     Py_buffer subbuf;
     514             :     const char *sub;
     515             :     Py_ssize_t sub_len;
     516     1232100 :     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
     517             :     Py_ssize_t res;
     518             : 
     519     1232100 :     if (!parse_args_finds_byte(function_name, args,
     520             :                                &subobj, &byte, &start, &end))
     521          46 :         return -2;
     522             : 
     523     1232060 :     if (subobj) {
     524      765755 :         if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
     525           0 :             return -2;
     526             : 
     527      765755 :         sub = subbuf.buf;
     528      765755 :         sub_len = subbuf.len;
     529             :     }
     530             :     else {
     531      466300 :         sub = &byte;
     532      466300 :         sub_len = 1;
     533             :     }
     534             : 
     535     1232060 :     ADJUST_INDICES(start, end, len);
     536     1232060 :     if (end - start < sub_len)
     537      132416 :         res = -1;
     538     1099640 :     else if (sub_len == 1) {
     539      701860 :         if (dir > 0)
     540      693700 :             res = stringlib_find_char(
     541             :                 str + start, end - start,
     542      693700 :                 *sub);
     543             :         else
     544        8160 :             res = stringlib_rfind_char(
     545             :                 str + start, end - start,
     546        8160 :                 *sub);
     547      701860 :         if (res >= 0)
     548      566843 :             res += start;
     549             :     }
     550             :     else {
     551      397779 :         if (dir > 0)
     552      200476 :             res = stringlib_find_slice(
     553             :                 str, len,
     554             :                 sub, sub_len, start, end);
     555             :         else
     556      197303 :             res = stringlib_rfind_slice(
     557             :                 str, len,
     558             :                 sub, sub_len, start, end);
     559             :     }
     560             : 
     561     1232060 :     if (subobj)
     562      765755 :         PyBuffer_Release(&subbuf);
     563             : 
     564     1232060 :     return res;
     565             : }
     566             : 
     567             : PyDoc_STRVAR_shared(_Py_find__doc__,
     568             : "B.find(sub[, start[, end]]) -> int\n\
     569             : \n\
     570             : Return the lowest index in B where subsection sub is found,\n\
     571             : such that sub is contained within B[start,end].  Optional\n\
     572             : arguments start and end are interpreted as in slice notation.\n\
     573             : \n\
     574             : Return -1 on failure.");
     575             : 
     576             : PyObject *
     577      960389 : _Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
     578             : {
     579      960389 :     Py_ssize_t result = find_internal(str, len, "find", args, +1);
     580      960389 :     if (result == -2)
     581          16 :         return NULL;
     582      960373 :     return PyLong_FromSsize_t(result);
     583             : }
     584             : 
     585             : PyDoc_STRVAR_shared(_Py_index__doc__,
     586             : "B.index(sub[, start[, end]]) -> int\n\
     587             : \n\
     588             : Return the lowest index in B where subsection sub is found,\n\
     589             : such that sub is contained within B[start,end].  Optional\n\
     590             : arguments start and end are interpreted as in slice notation.\n\
     591             : \n\
     592             : Raises ValueError when the subsection is not found.");
     593             : 
     594             : PyObject *
     595         137 : _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
     596             : {
     597         137 :     Py_ssize_t result = find_internal(str, len, "index", args, +1);
     598         137 :     if (result == -2)
     599          10 :         return NULL;
     600         127 :     if (result == -1) {
     601          33 :         PyErr_SetString(PyExc_ValueError,
     602             :                         "subsection not found");
     603          33 :         return NULL;
     604             :     }
     605          94 :     return PyLong_FromSsize_t(result);
     606             : }
     607             : 
     608             : PyDoc_STRVAR_shared(_Py_rfind__doc__,
     609             : "B.rfind(sub[, start[, end]]) -> int\n\
     610             : \n\
     611             : Return the highest index in B where subsection sub is found,\n\
     612             : such that sub is contained within B[start,end].  Optional\n\
     613             : arguments start and end are interpreted as in slice notation.\n\
     614             : \n\
     615             : Return -1 on failure.");
     616             : 
     617             : PyObject *
     618      271481 : _Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
     619             : {
     620      271481 :     Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
     621      271481 :     if (result == -2)
     622          10 :         return NULL;
     623      271471 :     return PyLong_FromSsize_t(result);
     624             : }
     625             : 
     626             : PyDoc_STRVAR_shared(_Py_rindex__doc__,
     627             : "B.rindex(sub[, start[, end]]) -> int\n\
     628             : \n\
     629             : Return the highest index in B where subsection sub is found,\n\
     630             : such that sub is contained within B[start,end].  Optional\n\
     631             : arguments start and end are interpreted as in slice notation.\n\
     632             : \n\
     633             : Raise ValueError when the subsection is not found.");
     634             : 
     635             : PyObject *
     636          94 : _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
     637             : {
     638          94 :     Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
     639          94 :     if (result == -2)
     640          10 :         return NULL;
     641          84 :     if (result == -1) {
     642          24 :         PyErr_SetString(PyExc_ValueError,
     643             :                         "subsection not found");
     644          24 :         return NULL;
     645             :     }
     646          60 :     return PyLong_FromSsize_t(result);
     647             : }
     648             : 
     649             : PyDoc_STRVAR_shared(_Py_count__doc__,
     650             : "B.count(sub[, start[, end]]) -> int\n\
     651             : \n\
     652             : Return the number of non-overlapping occurrences of subsection sub in\n\
     653             : bytes B[start:end].  Optional arguments start and end are interpreted\n\
     654             : as in slice notation.");
     655             : 
     656             : PyObject *
     657      144921 : _Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
     658             : {
     659             :     PyObject *sub_obj;
     660             :     const char *sub;
     661             :     Py_ssize_t sub_len;
     662             :     char byte;
     663      144921 :     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
     664             : 
     665             :     Py_buffer vsub;
     666             :     PyObject *count_obj;
     667             : 
     668      144921 :     if (!parse_args_finds_byte("count", args,
     669             :                                &sub_obj, &byte, &start, &end))
     670          10 :         return NULL;
     671             : 
     672      144911 :     if (sub_obj) {
     673      144897 :         if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
     674           0 :             return NULL;
     675             : 
     676      144897 :         sub = vsub.buf;
     677      144897 :         sub_len = vsub.len;
     678             :     }
     679             :     else {
     680          14 :         sub = &byte;
     681          14 :         sub_len = 1;
     682             :     }
     683             : 
     684      144911 :     ADJUST_INDICES(start, end, len);
     685             : 
     686      144911 :     count_obj = PyLong_FromSsize_t(
     687             :         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
     688             :         );
     689             : 
     690      144911 :     if (sub_obj)
     691      144897 :         PyBuffer_Release(&vsub);
     692             : 
     693      144911 :     return count_obj;
     694             : }
     695             : 
     696             : int
     697      760293 : _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg)
     698             : {
     699      760293 :     Py_ssize_t ival = PyNumber_AsSsize_t(arg, NULL);
     700      760293 :     if (ival == -1 && PyErr_Occurred()) {
     701             :         Py_buffer varg;
     702             :         Py_ssize_t pos;
     703      753185 :         PyErr_Clear();
     704      753185 :         if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0)
     705           9 :             return -1;
     706      753176 :         pos = stringlib_find(str, len,
     707      753176 :                              varg.buf, varg.len, 0);
     708      753176 :         PyBuffer_Release(&varg);
     709      753176 :         return pos >= 0;
     710             :     }
     711        7108 :     if (ival < 0 || ival >= 256) {
     712           6 :         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
     713           6 :         return -1;
     714             :     }
     715             : 
     716        7102 :     return memchr(str, (int) ival, len) != NULL;
     717             : }
     718             : 
     719             : 
     720             : /* Matches the end (direction >= 0) or start (direction < 0) of the buffer
     721             :  * against substr, using the start and end arguments. Returns
     722             :  * -1 on error, 0 if not found and 1 if found.
     723             :  */
     724             : static int
     725      203762 : tailmatch(const char *str, Py_ssize_t len, PyObject *substr,
     726             :           Py_ssize_t start, Py_ssize_t end, int direction)
     727             : {
     728      203762 :     Py_buffer sub_view = {NULL, NULL};
     729             :     const char *sub;
     730             :     Py_ssize_t slen;
     731             : 
     732      203762 :     if (PyBytes_Check(substr)) {
     733      203733 :         sub = PyBytes_AS_STRING(substr);
     734      203733 :         slen = PyBytes_GET_SIZE(substr);
     735             :     }
     736             :     else {
     737          29 :         if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0)
     738           5 :             return -1;
     739          24 :         sub = sub_view.buf;
     740          24 :         slen = sub_view.len;
     741             :     }
     742             : 
     743      203757 :     ADJUST_INDICES(start, end, len);
     744             : 
     745      203757 :     if (direction < 0) {
     746             :         /* startswith */
     747       60508 :         if (start > len - slen)
     748         892 :             goto notfound;
     749             :     } else {
     750             :         /* endswith */
     751      143249 :         if (end - start < slen || start > len)
     752         651 :             goto notfound;
     753             : 
     754      142598 :         if (end - slen > start)
     755      141287 :             start = end - slen;
     756             :     }
     757      202214 :     if (end - start < slen)
     758           0 :         goto notfound;
     759      202214 :     if (memcmp(str + start, sub, slen) != 0)
     760      116840 :         goto notfound;
     761             : 
     762       85374 :     PyBuffer_Release(&sub_view);
     763       85374 :     return 1;
     764             : 
     765      118383 : notfound:
     766      118383 :     PyBuffer_Release(&sub_view);
     767      118383 :     return 0;
     768             : }
     769             : 
     770             : static PyObject *
     771      203549 : _Py_bytes_tailmatch(const char *str, Py_ssize_t len,
     772             :                     const char *function_name, PyObject *args,
     773             :                     int direction)
     774             : {
     775      203549 :     Py_ssize_t start = 0;
     776      203549 :     Py_ssize_t end = PY_SSIZE_T_MAX;
     777             :     PyObject *subobj;
     778             :     int result;
     779             : 
     780      203549 :     if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end))
     781           4 :         return NULL;
     782      203545 :     if (PyTuple_Check(subobj)) {
     783             :         Py_ssize_t i;
     784         651 :         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
     785         441 :             result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i),
     786             :                                start, end, direction);
     787         441 :             if (result == -1)
     788           0 :                 return NULL;
     789         441 :             else if (result) {
     790          14 :                 Py_RETURN_TRUE;
     791             :             }
     792             :         }
     793         210 :         Py_RETURN_FALSE;
     794             :     }
     795      203321 :     result = tailmatch(str, len, subobj, start, end, direction);
     796      203321 :     if (result == -1) {
     797           5 :         if (PyErr_ExceptionMatches(PyExc_TypeError))
     798           5 :             PyErr_Format(PyExc_TypeError,
     799             :                          "%s first arg must be bytes or a tuple of bytes, "
     800             :                          "not %s",
     801           5 :                          function_name, Py_TYPE(subobj)->tp_name);
     802           5 :         return NULL;
     803             :     }
     804             :     else
     805      203316 :         return PyBool_FromLong(result);
     806             : }
     807             : 
     808             : PyDoc_STRVAR_shared(_Py_startswith__doc__,
     809             : "B.startswith(prefix[, start[, end]]) -> bool\n\
     810             : \n\
     811             : Return True if B starts with the specified prefix, False otherwise.\n\
     812             : With optional start, test B beginning at that position.\n\
     813             : With optional end, stop comparing B at that position.\n\
     814             : prefix can also be a tuple of bytes to try.");
     815             : 
     816             : PyObject *
     817       60296 : _Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args)
     818             : {
     819       60296 :     return _Py_bytes_tailmatch(str, len, "startswith", args, -1);
     820             : }
     821             : 
     822             : PyDoc_STRVAR_shared(_Py_endswith__doc__,
     823             : "B.endswith(suffix[, start[, end]]) -> bool\n\
     824             : \n\
     825             : Return True if B ends with the specified suffix, False otherwise.\n\
     826             : With optional start, test B beginning at that position.\n\
     827             : With optional end, stop comparing B at that position.\n\
     828             : suffix can also be a tuple of bytes to try.");
     829             : 
     830             : PyObject *
     831      143253 : _Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args)
     832             : {
     833      143253 :     return _Py_bytes_tailmatch(str, len, "endswith", args, +1);
     834             : }
 |