LCOV - code coverage report
Current view: top level - Objects/stringlib - fastsearch.h (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 278 335 83.0 %
Date: 2022-07-07 18:19:46 Functions: 45 65 69.2 %

          Line data    Source code
       1             : /* stringlib: fastsearch implementation */
       2             : 
       3             : #define STRINGLIB_FASTSEARCH_H
       4             : 
       5             : /* fast search/count implementation, based on a mix between boyer-
       6             :    moore and horspool, with a few more bells and whistles on the top.
       7             :    for some more background, see:
       8             :    https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
       9             : 
      10             : /* note: fastsearch may access s[n], which isn't a problem when using
      11             :    Python's ordinary string types, but may cause problems if you're
      12             :    using this code in other contexts.  also, the count mode returns -1
      13             :    if there cannot possibly be a match in the target string, and 0 if
      14             :    it has actually checked for matches, but didn't find any.  callers
      15             :    beware! */
      16             : 
      17             : /* If the strings are long enough, use Crochemore and Perrin's Two-Way
      18             :    algorithm, which has worst-case O(n) runtime and best-case O(n/k).
      19             :    Also compute a table of shifts to achieve O(n/k) in more cases,
      20             :    and often (data dependent) deduce larger shifts than pure C&P can
      21             :    deduce. */
      22             : 
      23             : #define FAST_COUNT 0
      24             : #define FAST_SEARCH 1
      25             : #define FAST_RSEARCH 2
      26             : 
      27             : #if LONG_BIT >= 128
      28             : #define STRINGLIB_BLOOM_WIDTH 128
      29             : #elif LONG_BIT >= 64
      30             : #define STRINGLIB_BLOOM_WIDTH 64
      31             : #elif LONG_BIT >= 32
      32             : #define STRINGLIB_BLOOM_WIDTH 32
      33             : #else
      34             : #error "LONG_BIT is smaller than 32"
      35             : #endif
      36             : 
      37             : #define STRINGLIB_BLOOM_ADD(mask, ch) \
      38             :     ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      39             : #define STRINGLIB_BLOOM(mask, ch)     \
      40             :     ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
      41             : 
      42             : #ifdef STRINGLIB_FAST_MEMCHR
      43             : #  define MEMCHR_CUT_OFF 15
      44             : #else
      45             : #  define MEMCHR_CUT_OFF 40
      46             : #endif
      47             : 
      48             : Py_LOCAL_INLINE(Py_ssize_t)
      49    35726931 : STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
      50             : {
      51             :     const STRINGLIB_CHAR *p, *e;
      52             : 
      53    35726931 :     p = s;
      54    35726931 :     e = s + n;
      55    35726931 :     if (n > MEMCHR_CUT_OFF) {
      56             : #ifdef STRINGLIB_FAST_MEMCHR
      57     1953926 :         p = STRINGLIB_FAST_MEMCHR(s, ch, n);
      58     1953926 :         if (p != NULL)
      59     1159296 :             return (p - s);
      60      794630 :         return -1;
      61             : #else
      62             :         /* use memchr if we can choose a needle without too many likely
      63             :            false positives */
      64             :         const STRINGLIB_CHAR *s1, *e1;
      65       16809 :         unsigned char needle = ch & 0xff;
      66             :         /* If looking for a multiple of 256, we'd have too
      67             :            many false positives looking for the '\0' byte in UCS2
      68             :            and UCS4 representations. */
      69       16809 :         if (needle != 0) {
      70             :             do {
      71       16801 :                 void *candidate = memchr(p, needle,
      72       16801 :                                          (e - p) * sizeof(STRINGLIB_CHAR));
      73       16801 :                 if (candidate == NULL)
      74          21 :                     return -1;
      75       16780 :                 s1 = p;
      76       16780 :                 p = (const STRINGLIB_CHAR *)
      77       16780 :                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
      78       16780 :                 if (*p == ch)
      79       16776 :                     return (p - s);
      80             :                 /* False positive */
      81           4 :                 p++;
      82           4 :                 if (p - s1 > MEMCHR_CUT_OFF)
      83           2 :                     continue;
      84           2 :                 if (e - p <= MEMCHR_CUT_OFF)
      85           0 :                     break;
      86           2 :                 e1 = p + MEMCHR_CUT_OFF;
      87           6 :                 while (p != e1) {
      88           6 :                     if (*p == ch)
      89           2 :                         return (p - s);
      90           4 :                     p++;
      91             :                 }
      92             :             }
      93           2 :             while (e - p > MEMCHR_CUT_OFF);
      94             :         }
      95             : #endif
      96             :     }
      97   170655167 :     while (p < e) {
      98   142103331 :         if (*p == ch)
      99     5204509 :             return (p - s);
     100   136899137 :         p++;
     101             :     }
     102    28551622 :     return -1;
     103             : }
     104             : 
     105             : #undef MEMCHR_CUT_OFF
     106             : 
     107             : #if STRINGLIB_SIZEOF_CHAR == 1
     108             : #  define MEMRCHR_CUT_OFF 15
     109             : #else
     110             : #  define MEMRCHR_CUT_OFF 40
     111             : #endif
     112             : 
     113             : 
     114             : Py_LOCAL_INLINE(Py_ssize_t)
     115     2987253 : STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
     116             : {
     117             :     const STRINGLIB_CHAR *p;
     118             : #ifdef HAVE_MEMRCHR
     119             :     /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
     120             :        doesn't seem as optimized as memchr(), but is still quite
     121             :        faster than our hand-written loop below. There is no wmemrchr
     122             :        for 4-byte chars. */
     123             : 
     124     2987253 :     if (n > MEMRCHR_CUT_OFF) {
     125             : #if STRINGLIB_SIZEOF_CHAR == 1
     126     1208071 :         p = memrchr(s, ch, n);
     127     1208071 :         if (p != NULL)
     128     1156831 :             return (p - s);
     129       51240 :         return -1;
     130             : #else
     131             :         /* use memrchr if we can choose a needle without too many likely
     132             :            false positives */
     133             :         const STRINGLIB_CHAR *s1;
     134             :         Py_ssize_t n1;
     135          68 :         unsigned char needle = ch & 0xff;
     136             :         /* If looking for a multiple of 256, we'd have too
     137             :            many false positives looking for the '\0' byte in UCS2
     138             :            and UCS4 representations. */
     139          68 :         if (needle != 0) {
     140             :             do {
     141          64 :                 void *candidate = memrchr(s, needle,
     142             :                                           n * sizeof(STRINGLIB_CHAR));
     143          64 :                 if (candidate == NULL)
     144           2 :                     return -1;
     145          62 :                 n1 = n;
     146          62 :                 p = (const STRINGLIB_CHAR *)
     147          62 :                         _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
     148          62 :                 n = p - s;
     149          62 :                 if (*p == ch)
     150          59 :                     return n;
     151             :                 /* False positive */
     152           3 :                 if (n1 - n > MEMRCHR_CUT_OFF)
     153           3 :                     continue;
     154           0 :                 if (n <= MEMRCHR_CUT_OFF)
     155           0 :                     break;
     156           0 :                 s1 = p - MEMRCHR_CUT_OFF;
     157           0 :                 while (p > s1) {
     158           0 :                     p--;
     159           0 :                     if (*p == ch)
     160           0 :                         return (p - s);
     161             :                 }
     162           0 :                 n = p - s;
     163             :             }
     164           3 :             while (n > MEMRCHR_CUT_OFF);
     165             :         }
     166             : #endif
     167             :     }
     168             : #endif  /* HAVE_MEMRCHR */
     169     1779122 :     p = s + n;
     170    11017146 :     while (p > s) {
     171    10020229 :         p--;
     172    10020229 :         if (*p == ch)
     173      782207 :             return (p - s);
     174             :     }
     175      996910 :     return -1;
     176             : }
     177             : 
     178             : #undef MEMRCHR_CUT_OFF
     179             : 
     180             : /* Change to a 1 to see logging comments walk through the algorithm. */
     181             : #if 0 && STRINGLIB_SIZEOF_CHAR == 1
     182             : # define LOG(...) printf(__VA_ARGS__)
     183             : # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
     184             : # define LOG_LINEUP() do {                                         \
     185             :     LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
     186             :     LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
     187             :     LOG_STRING(needle, len_needle); LOG("\n");                     \
     188             : } while(0)
     189             : #else
     190             : # define LOG(...)
     191             : # define LOG_STRING(s, n)
     192             : # define LOG_LINEUP()
     193             : #endif
     194             : 
     195             : Py_LOCAL_INLINE(Py_ssize_t)
     196        1252 : STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     197             :                        Py_ssize_t *return_period, int invert_alphabet)
     198             : {
     199             :     /* Do a lexicographic search. Essentially this:
     200             :            >>> max(needle[i:] for i in range(len(needle)+1))
     201             :        Also find the period of the right half.   */
     202        1252 :     Py_ssize_t max_suffix = 0;
     203        1252 :     Py_ssize_t candidate = 1;
     204        1252 :     Py_ssize_t k = 0;
     205             :     // The period of the right half.
     206        1252 :     Py_ssize_t period = 1;
     207             : 
     208      115686 :     while (candidate + k < len_needle) {
     209             :         // each loop increases candidate + k + max_suffix
     210      114434 :         STRINGLIB_CHAR a = needle[candidate + k];
     211      114434 :         STRINGLIB_CHAR b = needle[max_suffix + k];
     212             :         // check if the suffix at candidate is better than max_suffix
     213      114434 :         if (invert_alphabet ? (b < a) : (a < b)) {
     214             :             // Fell short of max_suffix.
     215             :             // The next k + 1 characters are non-increasing
     216             :             // from candidate, so they won't start a maximal suffix.
     217       31949 :             candidate += k + 1;
     218       31949 :             k = 0;
     219             :             // We've ruled out any period smaller than what's
     220             :             // been scanned since max_suffix.
     221       31949 :             period = candidate - max_suffix;
     222             :         }
     223       82485 :         else if (a == b) {
     224       80852 :             if (k + 1 != period) {
     225             :                 // Keep scanning the equal strings
     226       66816 :                 k++;
     227             :             }
     228             :             else {
     229             :                 // Matched a whole period.
     230             :                 // Start matching the next period.
     231       14036 :                 candidate += period;
     232       14036 :                 k = 0;
     233             :             }
     234             :         }
     235             :         else {
     236             :             // Did better than max_suffix, so replace it.
     237        1633 :             max_suffix = candidate;
     238        1633 :             candidate++;
     239        1633 :             k = 0;
     240        1633 :             period = 1;
     241             :         }
     242             :     }
     243        1252 :     *return_period = period;
     244        1252 :     return max_suffix;
     245             : }
     246             : 
     247             : Py_LOCAL_INLINE(Py_ssize_t)
     248         626 : STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
     249             :                       Py_ssize_t len_needle,
     250             :                       Py_ssize_t *return_period)
     251             : {
     252             :     /* Do a "critical factorization", making it so that:
     253             :        >>> needle = (left := needle[:cut]) + (right := needle[cut:])
     254             :        where the "local period" of the cut is maximal.
     255             : 
     256             :        The local period of the cut is the minimal length of a string w
     257             :        such that (left endswith w or w endswith left)
     258             :        and (right startswith w or w startswith left).
     259             : 
     260             :        The Critical Factorization Theorem says that this maximal local
     261             :        period is the global period of the string.
     262             : 
     263             :        Crochemore and Perrin (1991) show that this cut can be computed
     264             :        as the later of two cuts: one that gives a lexicographically
     265             :        maximal right half, and one that gives the same with the
     266             :        with respect to a reversed alphabet-ordering.
     267             : 
     268             :        This is what we want to happen:
     269             :            >>> x = "GCAGAGAG"
     270             :            >>> cut, period = factorize(x)
     271             :            >>> x[:cut], (right := x[cut:])
     272             :            ('GC', 'AGAGAG')
     273             :            >>> period  # right half period
     274             :            2
     275             :            >>> right[period:] == right[:-period]
     276             :            True
     277             : 
     278             :        This is how the local period lines up in the above example:
     279             :                 GC | AGAGAG
     280             :            AGAGAGC = AGAGAGC
     281             :        The length of this minimal repetition is 7, which is indeed the
     282             :        period of the original string. */
     283             : 
     284             :     Py_ssize_t cut1, period1, cut2, period2, cut, period;
     285         626 :     cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
     286         626 :     cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
     287             : 
     288             :     // Take the later cut.
     289         626 :     if (cut1 > cut2) {
     290         484 :         period = period1;
     291         484 :         cut = cut1;
     292             :     }
     293             :     else {
     294         142 :         period = period2;
     295         142 :         cut = cut2;
     296             :     }
     297             : 
     298             :     LOG("split: "); LOG_STRING(needle, cut);
     299             :     LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
     300             :     LOG("\n");
     301             : 
     302         626 :     *return_period = period;
     303         626 :     return cut;
     304             : }
     305             : 
     306             : 
     307             : #define SHIFT_TYPE uint8_t
     308             : #define MAX_SHIFT UINT8_MAX
     309             : 
     310             : #define TABLE_SIZE_BITS 6u
     311             : #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
     312             : #define TABLE_MASK (TABLE_SIZE - 1U)
     313             : 
     314             : typedef struct STRINGLIB(_pre) {
     315             :     const STRINGLIB_CHAR *needle;
     316             :     Py_ssize_t len_needle;
     317             :     Py_ssize_t cut;
     318             :     Py_ssize_t period;
     319             :     Py_ssize_t gap;
     320             :     int is_periodic;
     321             :     SHIFT_TYPE table[TABLE_SIZE];
     322             : } STRINGLIB(prework);
     323             : 
     324             : 
     325             : static void
     326         626 : STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
     327             :                        STRINGLIB(prework) *p)
     328             : {
     329         626 :     p->needle = needle;
     330         626 :     p->len_needle = len_needle;
     331         626 :     p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
     332         626 :     assert(p->period + p->cut <= len_needle);
     333         626 :     p->is_periodic = (0 == memcmp(needle,
     334         626 :                                   needle + p->period,
     335         626 :                                   p->cut * STRINGLIB_SIZEOF_CHAR));
     336         626 :     if (p->is_periodic) {
     337         280 :         assert(p->cut <= len_needle/2);
     338         280 :         assert(p->cut < p->period);
     339         280 :         p->gap = 0; // unused
     340             :     }
     341             :     else {
     342             :         // A lower bound on the period
     343         346 :         p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
     344             :         // The gap between the last character and the previous
     345             :         // occurrence of an equivalent character (modulo TABLE_SIZE)
     346         346 :         p->gap = len_needle;
     347         346 :         STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
     348        9745 :         for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
     349        9732 :             STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
     350        9732 :             if (x == last) {
     351         333 :                 p->gap = len_needle - 1 - i;
     352         333 :                 break;
     353             :             }
     354             :         }
     355             :     }
     356             :     // Fill up a compressed Boyer-Moore "Bad Character" table
     357         626 :     Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
     358       40690 :     for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
     359       40064 :         p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
     360             :                                        Py_ssize_t, SHIFT_TYPE);
     361             :     }
     362       42833 :     for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
     363       42207 :         SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
     364             :                                             Py_ssize_t, SHIFT_TYPE);
     365       42207 :         p->table[needle[i] & TABLE_MASK] = shift;
     366             :     }
     367         626 : }
     368             : 
     369             : static Py_ssize_t
     370         682 : STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
     371             :                     STRINGLIB(prework) *p)
     372             : {
     373             :     // Crochemore and Perrin's (1991) Two-Way algorithm.
     374             :     // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
     375         682 :     const Py_ssize_t len_needle = p->len_needle;
     376         682 :     const Py_ssize_t cut = p->cut;
     377         682 :     Py_ssize_t period = p->period;
     378         682 :     const STRINGLIB_CHAR *const needle = p->needle;
     379         682 :     const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
     380         682 :     const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
     381         682 :     SHIFT_TYPE *table = p->table;
     382             :     const STRINGLIB_CHAR *window;
     383             :     LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
     384             : 
     385         682 :     if (p->is_periodic) {
     386             :         LOG("Needle is periodic.\n");
     387         295 :         Py_ssize_t memory = 0;
     388        7065 :       periodicwindowloop:
     389        7065 :         while (window_last < haystack_end) {
     390        7065 :             assert(memory == 0);
     391       27721 :             for (;;) {
     392             :                 LOG_LINEUP();
     393       34786 :                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     394       34786 :                 window_last += shift;
     395       34786 :                 if (shift == 0) {
     396        7064 :                     break;
     397             :                 }
     398       27722 :                 if (window_last >= haystack_end) {
     399           1 :                     return -1;
     400             :                 }
     401             :                 LOG("Horspool skip");
     402             :             }
     403        7082 :           no_shift:
     404        7082 :             window = window_last - len_needle + 1;
     405        7082 :             assert((window[len_needle - 1] & TABLE_MASK) ==
     406             :                    (needle[len_needle - 1] & TABLE_MASK));
     407        7082 :             Py_ssize_t i = Py_MAX(cut, memory);
     408       43624 :             for (; i < len_needle; i++) {
     409       43312 :                 if (needle[i] != window[i]) {
     410             :                     LOG("Right half does not match.\n");
     411        6770 :                     window_last += i - cut + 1;
     412        6770 :                     memory = 0;
     413        6770 :                     goto periodicwindowloop;
     414             :                 }
     415             :             }
     416        3100 :             for (i = memory; i < cut; i++) {
     417        2806 :                 if (needle[i] != window[i]) {
     418             :                     LOG("Left half does not match.\n");
     419          18 :                     window_last += period;
     420          18 :                     memory = len_needle - period;
     421          18 :                     if (window_last >= haystack_end) {
     422           0 :                         return -1;
     423             :                     }
     424          18 :                     Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     425          18 :                     if (shift) {
     426             :                         // A mismatch has been identified to the right
     427             :                         // of where i will next start, so we can jump
     428             :                         // at least as far as if the mismatch occurred
     429             :                         // on the first comparison.
     430           0 :                         Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
     431             :                         LOG("Skip with Memory.\n");
     432           0 :                         memory = 0;
     433           0 :                         window_last += Py_MAX(shift, mem_jump);
     434           0 :                         goto periodicwindowloop;
     435             :                     }
     436          18 :                     goto no_shift;
     437             :                 }
     438             :             }
     439             :             LOG("Found a match!\n");
     440         294 :             return window - haystack;
     441             :         }
     442             :     }
     443             :     else {
     444         387 :         Py_ssize_t gap = p->gap;
     445         387 :         period = Py_MAX(gap, period);
     446             :         LOG("Needle is not periodic.\n");
     447         387 :         Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
     448       37212 :       windowloop:
     449       37212 :         while (window_last < haystack_end) {
     450     2129497 :             for (;;) {
     451             :                 LOG_LINEUP();
     452     2166705 :                 Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
     453     2166705 :                 window_last += shift;
     454     2166705 :                 if (shift == 0) {
     455       37032 :                     break;
     456             :                 }
     457     2129679 :                 if (window_last >= haystack_end) {
     458         180 :                     return -1;
     459             :                 }
     460             :                 LOG("Horspool skip");
     461             :             }
     462       37032 :             window = window_last - len_needle + 1;
     463       37032 :             assert((window[len_needle - 1] & TABLE_MASK) ==
     464             :                    (needle[len_needle - 1] & TABLE_MASK));
     465       49697 :             for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
     466       47210 :                 if (needle[i] != window[i]) {
     467             :                     LOG("Early right half mismatch: jump by gap.\n");
     468       34545 :                     assert(gap >= i - cut + 1);
     469       34545 :                     window_last += gap;
     470       34545 :                     goto windowloop;
     471             :                 }
     472             :             }
     473        6198 :             for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
     474        5808 :                 if (needle[i] != window[i]) {
     475             :                     LOG("Late right half mismatch.\n");
     476        2097 :                     assert(i - cut + 1 > gap);
     477        2097 :                     window_last += i - cut + 1;
     478        2097 :                     goto windowloop;
     479             :                 }
     480             :             }
     481       11490 :             for (Py_ssize_t i = 0; i < cut; i++) {
     482       11283 :                 if (needle[i] != window[i]) {
     483             :                     LOG("Left half does not match.\n");
     484         183 :                     window_last += period;
     485         183 :                     goto windowloop;
     486             :                 }
     487             :             }
     488             :             LOG("Found a match!\n");
     489         207 :             return window - haystack;
     490             :         }
     491             :     }
     492             :     LOG("Not found. Returning -1.\n");
     493           0 :     return -1;
     494             : }
     495             : 
     496             : 
     497             : static Py_ssize_t
     498         623 : STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
     499             :                          Py_ssize_t len_haystack,
     500             :                          const STRINGLIB_CHAR *needle,
     501             :                          Py_ssize_t len_needle)
     502             : {
     503             :     LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
     504             :     STRINGLIB(prework) p;
     505         623 :     STRINGLIB(_preprocess)(needle, len_needle, &p);
     506         623 :     return STRINGLIB(_two_way)(haystack, len_haystack, &p);
     507             : }
     508             : 
     509             : 
     510             : static Py_ssize_t
     511           3 : STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
     512             :                           Py_ssize_t len_haystack,
     513             :                           const STRINGLIB_CHAR *needle,
     514             :                           Py_ssize_t len_needle,
     515             :                           Py_ssize_t maxcount)
     516             : {
     517             :     LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
     518             :     STRINGLIB(prework) p;
     519           3 :     STRINGLIB(_preprocess)(needle, len_needle, &p);
     520           3 :     Py_ssize_t index = 0, count = 0;
     521          56 :     while (1) {
     522             :         Py_ssize_t result;
     523          59 :         result = STRINGLIB(_two_way)(haystack + index,
     524             :                                      len_haystack - index, &p);
     525          59 :         if (result == -1) {
     526           3 :             return count;
     527             :         }
     528          56 :         count++;
     529          56 :         if (count == maxcount) {
     530           0 :             return maxcount;
     531             :         }
     532          56 :         index += result + len_needle;
     533             :     }
     534             :     return count;
     535             : }
     536             : 
     537             : #undef SHIFT_TYPE
     538             : #undef NOT_FOUND
     539             : #undef SHIFT_OVERFLOW
     540             : #undef TABLE_SIZE_BITS
     541             : #undef TABLE_SIZE
     542             : #undef TABLE_MASK
     543             : 
     544             : #undef LOG
     545             : #undef LOG_STRING
     546             : #undef LOG_LINEUP
     547             : 
     548             : static inline Py_ssize_t
     549     4919682 : STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     550             :                         const STRINGLIB_CHAR* p, Py_ssize_t m,
     551             :                         Py_ssize_t maxcount, int mode)
     552             : {
     553     4919682 :     const Py_ssize_t w = n - m;
     554     4919682 :     Py_ssize_t mlast = m - 1, count = 0;
     555     4919682 :     Py_ssize_t gap = mlast;
     556     4919682 :     const STRINGLIB_CHAR last = p[mlast];
     557     4919682 :     const STRINGLIB_CHAR *const ss = &s[mlast];
     558             : 
     559     4919682 :     unsigned long mask = 0;
     560    18378323 :     for (Py_ssize_t i = 0; i < mlast; i++) {
     561    13458642 :         STRINGLIB_BLOOM_ADD(mask, p[i]);
     562    13458642 :         if (p[i] == last) {
     563     3492086 :             gap = mlast - i - 1;
     564             :         }
     565             :     }
     566     4919682 :     STRINGLIB_BLOOM_ADD(mask, last);
     567             : 
     568    51379195 :     for (Py_ssize_t i = 0; i <= w; i++) {
     569    47172052 :         if (ss[i] == last) {
     570             :             /* candidate match */
     571             :             Py_ssize_t j;
     572     5449830 :             for (j = 0; j < mlast; j++) {
     573     4674464 :                 if (s[i+j] != p[j]) {
     574     2433297 :                     break;
     575             :                 }
     576             :             }
     577     3208658 :             if (j == mlast) {
     578             :                 /* got a match! */
     579      775360 :                 if (mode != FAST_COUNT) {
     580      712470 :                     return i;
     581             :                 }
     582       62890 :                 count++;
     583       62890 :                 if (count == maxcount) {
     584          24 :                     return maxcount;
     585             :                 }
     586       62866 :                 i = i + mlast;
     587       62866 :                 continue;
     588             :             }
     589             :             /* miss: check if next character is part of pattern */
     590     2433297 :             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     591     1737259 :                 i = i + m;
     592             :             }
     593             :             else {
     594      696038 :                 i = i + gap;
     595             :             }
     596             :         }
     597             :         else {
     598             :             /* skip: check if next character is part of pattern */
     599    43963395 :             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     600    40763304 :                 i = i + m;
     601             :             }
     602             :         }
     603             :     }
     604     4207189 :     return mode == FAST_COUNT ? count : -1;
     605             : }
     606             : 
     607             : 
     608             : static Py_ssize_t
     609           0 : STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     610             :                          const STRINGLIB_CHAR* p, Py_ssize_t m,
     611             :                          Py_ssize_t maxcount, int mode)
     612             : {
     613           0 :     const Py_ssize_t w = n - m;
     614           0 :     Py_ssize_t mlast = m - 1, count = 0;
     615           0 :     Py_ssize_t gap = mlast;
     616           0 :     Py_ssize_t hits = 0, res;
     617           0 :     const STRINGLIB_CHAR last = p[mlast];
     618           0 :     const STRINGLIB_CHAR *const ss = &s[mlast];
     619             : 
     620           0 :     unsigned long mask = 0;
     621           0 :     for (Py_ssize_t i = 0; i < mlast; i++) {
     622           0 :         STRINGLIB_BLOOM_ADD(mask, p[i]);
     623           0 :         if (p[i] == last) {
     624           0 :             gap = mlast - i - 1;
     625             :         }
     626             :     }
     627           0 :     STRINGLIB_BLOOM_ADD(mask, last);
     628             : 
     629           0 :     for (Py_ssize_t i = 0; i <= w; i++) {
     630           0 :         if (ss[i] == last) {
     631             :             /* candidate match */
     632             :             Py_ssize_t j;
     633           0 :             for (j = 0; j < mlast; j++) {
     634           0 :                 if (s[i+j] != p[j]) {
     635           0 :                     break;
     636             :                 }
     637             :             }
     638           0 :             if (j == mlast) {
     639             :                 /* got a match! */
     640           0 :                 if (mode != FAST_COUNT) {
     641           0 :                     return i;
     642             :                 }
     643           0 :                 count++;
     644           0 :                 if (count == maxcount) {
     645           0 :                     return maxcount;
     646             :                 }
     647           0 :                 i = i + mlast;
     648           0 :                 continue;
     649             :             }
     650           0 :             hits += j + 1;
     651           0 :             if (hits > m / 4 && w - i > 2000) {
     652           0 :                 if (mode == FAST_SEARCH) {
     653           0 :                     res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
     654           0 :                     return res == -1 ? -1 : res + i;
     655             :                 }
     656             :                 else {
     657           0 :                     res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
     658             :                                                     maxcount - count);
     659           0 :                     return res + count;
     660             :                 }
     661             :             }
     662             :             /* miss: check if next character is part of pattern */
     663           0 :             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     664           0 :                 i = i + m;
     665             :             }
     666             :             else {
     667           0 :                 i = i + gap;
     668             :             }
     669             :         }
     670             :         else {
     671             :             /* skip: check if next character is part of pattern */
     672           0 :             if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
     673           0 :                 i = i + m;
     674             :             }
     675             :         }
     676             :     }
     677           0 :     return mode == FAST_COUNT ? count : -1;
     678             : }
     679             : 
     680             : 
     681             : static Py_ssize_t
     682      393083 : STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
     683             :                          const STRINGLIB_CHAR* p, Py_ssize_t m,
     684             :                          Py_ssize_t maxcount, int mode)
     685             : {
     686             :     /* create compressed boyer-moore delta 1 table */
     687      393083 :     unsigned long mask = 0;
     688      393083 :     Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
     689             : 
     690             :     /* process pattern[0] outside the loop */
     691      393083 :     STRINGLIB_BLOOM_ADD(mask, p[0]);
     692             :     /* process pattern[:0:-1] */
     693     1744518 :     for (i = mlast; i > 0; i--) {
     694     1351435 :         STRINGLIB_BLOOM_ADD(mask, p[i]);
     695     1351435 :         if (p[i] == p[0]) {
     696      449740 :             skip = i - 1;
     697             :         }
     698             :     }
     699             : 
     700     1400925 :     for (i = w; i >= 0; i--) {
     701     1019577 :         if (s[i] == p[0]) {
     702             :             /* candidate match */
     703      255590 :             for (j = mlast; j > 0; j--) {
     704      243855 :                 if (s[i+j] != p[j]) {
     705      159994 :                     break;
     706             :                 }
     707             :             }
     708      171729 :             if (j == 0) {
     709             :                 /* got a match! */
     710       11735 :                 return i;
     711             :             }
     712             :             /* miss: check if previous character is part of pattern */
     713      159994 :             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     714       13471 :                 i = i - m;
     715             :             }
     716             :             else {
     717      146523 :                 i = i - skip;
     718             :             }
     719             :         }
     720             :         else {
     721             :             /* skip: check if previous character is part of pattern */
     722      847848 :             if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
     723      558279 :                 i = i - m;
     724             :             }
     725             :         }
     726             :     }
     727      381348 :     return -1;
     728             : }
     729             : 
     730             : 
     731             : static inline Py_ssize_t
     732     1600223 : STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
     733             :                       const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
     734             : {
     735     1600223 :     Py_ssize_t i, count = 0;
     736   559313874 :     for (i = 0; i < n; i++) {
     737   557713485 :         if (s[i] == p0) {
     738     8014718 :             count++;
     739     8014718 :             if (count == maxcount) {
     740          20 :                 return maxcount;
     741             :             }
     742             :         }
     743             :     }
     744     1600203 :     return count;
     745             : }
     746             : 
     747             : 
     748             : Py_LOCAL_INLINE(Py_ssize_t)
     749     9743076 : FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
     750             :            const STRINGLIB_CHAR* p, Py_ssize_t m,
     751             :            Py_ssize_t maxcount, int mode)
     752             : {
     753     9743076 :     if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
     754      181560 :         return -1;
     755             :     }
     756             : 
     757             :     /* look for special cases */
     758     9561516 :     if (m <= 1) {
     759     4248127 :         if (m <= 0) {
     760           0 :             return -1;
     761             :         }
     762             :         /* use special case for 1-character strings */
     763     4248127 :         if (mode == FAST_SEARCH)
     764      911996 :             return STRINGLIB(find_char)(s, n, p[0]);
     765     3336129 :         else if (mode == FAST_RSEARCH)
     766     1735906 :             return STRINGLIB(rfind_char)(s, n, p[0]);
     767             :         else {
     768     1600223 :             return STRINGLIB(count_char)(s, n, p[0], maxcount);
     769             :         }
     770             :     }
     771             : 
     772     5313392 :     if (mode != FAST_RSEARCH) {
     773     4920312 :         if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
     774     4919682 :             return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
     775             :         }
     776         626 :         else if ((m >> 2) * 3 < (n >> 2)) {
     777             :             /* 33% threshold, but don't overflow. */
     778             :             /* For larger problems where the needle isn't a huge
     779             :                percentage of the size of the haystack, the relatively
     780             :                expensive O(m) startup cost of the two-way algorithm
     781             :                will surely pay off. */
     782         626 :             if (mode == FAST_SEARCH) {
     783         623 :                 return STRINGLIB(_two_way_find)(s, n, p, m);
     784             :             }
     785             :             else {
     786           3 :                 return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
     787             :             }
     788             :         }
     789             :         else {
     790             :             /* To ensure that we have good worst-case behavior,
     791             :                here's an adaptive version of the algorithm, where if
     792             :                we match O(m) characters without any matches of the
     793             :                entire needle, then we predict that the startup cost of
     794             :                the two-way algorithm will probably be worth it. */
     795           0 :             return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
     796             :         }
     797             :     }
     798             :     else {
     799             :         /* FAST_RSEARCH */
     800      393083 :         return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
     801             :     }
     802             : }
     803             : 

Generated by: LCOV version 1.14