Line data Source code
1 : /* stringlib: fastsearch implementation */
2 :
3 : #define STRINGLIB_FASTSEARCH_H
4 :
5 : /* fast search/count implementation, based on a mix between boyer-
6 : moore and horspool, with a few more bells and whistles on the top.
7 : for some more background, see:
8 : https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9 :
10 : /* note: fastsearch may access s[n], which isn't a problem when using
11 : Python's ordinary string types, but may cause problems if you're
12 : using this code in other contexts. also, the count mode returns -1
13 : if there cannot possibly be a match in the target string, and 0 if
14 : it has actually checked for matches, but didn't find any. callers
15 : beware! */
16 :
17 : /* If the strings are long enough, use Crochemore and Perrin's Two-Way
18 : algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19 : Also compute a table of shifts to achieve O(n/k) in more cases,
20 : and often (data dependent) deduce larger shifts than pure C&P can
21 : deduce. */
22 :
23 : #define FAST_COUNT 0
24 : #define FAST_SEARCH 1
25 : #define FAST_RSEARCH 2
26 :
27 : #if LONG_BIT >= 128
28 : #define STRINGLIB_BLOOM_WIDTH 128
29 : #elif LONG_BIT >= 64
30 : #define STRINGLIB_BLOOM_WIDTH 64
31 : #elif LONG_BIT >= 32
32 : #define STRINGLIB_BLOOM_WIDTH 32
33 : #else
34 : #error "LONG_BIT is smaller than 32"
35 : #endif
36 :
37 : #define STRINGLIB_BLOOM_ADD(mask, ch) \
38 : ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
39 : #define STRINGLIB_BLOOM(mask, ch) \
40 : ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
41 :
42 : #ifdef STRINGLIB_FAST_MEMCHR
43 : # define MEMCHR_CUT_OFF 15
44 : #else
45 : # define MEMCHR_CUT_OFF 40
46 : #endif
47 :
48 : Py_LOCAL_INLINE(Py_ssize_t)
49 35726931 : STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
50 : {
51 : const STRINGLIB_CHAR *p, *e;
52 :
53 35726931 : p = s;
54 35726931 : e = s + n;
55 35726931 : if (n > MEMCHR_CUT_OFF) {
56 : #ifdef STRINGLIB_FAST_MEMCHR
57 1953926 : p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58 1953926 : if (p != NULL)
59 1159296 : return (p - s);
60 794630 : return -1;
61 : #else
62 : /* use memchr if we can choose a needle without too many likely
63 : false positives */
64 : const STRINGLIB_CHAR *s1, *e1;
65 16809 : unsigned char needle = ch & 0xff;
66 : /* If looking for a multiple of 256, we'd have too
67 : many false positives looking for the '\0' byte in UCS2
68 : and UCS4 representations. */
69 16809 : if (needle != 0) {
70 : do {
71 16801 : void *candidate = memchr(p, needle,
72 16801 : (e - p) * sizeof(STRINGLIB_CHAR));
73 16801 : if (candidate == NULL)
74 21 : return -1;
75 16780 : s1 = p;
76 16780 : p = (const STRINGLIB_CHAR *)
77 16780 : _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78 16780 : if (*p == ch)
79 16776 : return (p - s);
80 : /* False positive */
81 4 : p++;
82 4 : if (p - s1 > MEMCHR_CUT_OFF)
83 2 : continue;
84 2 : if (e - p <= MEMCHR_CUT_OFF)
85 0 : break;
86 2 : e1 = p + MEMCHR_CUT_OFF;
87 6 : while (p != e1) {
88 6 : if (*p == ch)
89 2 : return (p - s);
90 4 : p++;
91 : }
92 : }
93 2 : while (e - p > MEMCHR_CUT_OFF);
94 : }
95 : #endif
96 : }
97 170655167 : while (p < e) {
98 142103331 : if (*p == ch)
99 5204509 : return (p - s);
100 136899137 : p++;
101 : }
102 28551622 : return -1;
103 : }
104 :
105 : #undef MEMCHR_CUT_OFF
106 :
107 : #if STRINGLIB_SIZEOF_CHAR == 1
108 : # define MEMRCHR_CUT_OFF 15
109 : #else
110 : # define MEMRCHR_CUT_OFF 40
111 : #endif
112 :
113 :
114 : Py_LOCAL_INLINE(Py_ssize_t)
115 2987253 : STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
116 : {
117 : const STRINGLIB_CHAR *p;
118 : #ifdef HAVE_MEMRCHR
119 : /* memrchr() is a GNU extension, available since glibc 2.1.91. it
120 : doesn't seem as optimized as memchr(), but is still quite
121 : faster than our hand-written loop below. There is no wmemrchr
122 : for 4-byte chars. */
123 :
124 2987253 : if (n > MEMRCHR_CUT_OFF) {
125 : #if STRINGLIB_SIZEOF_CHAR == 1
126 1208071 : p = memrchr(s, ch, n);
127 1208071 : if (p != NULL)
128 1156831 : return (p - s);
129 51240 : return -1;
130 : #else
131 : /* use memrchr if we can choose a needle without too many likely
132 : false positives */
133 : const STRINGLIB_CHAR *s1;
134 : Py_ssize_t n1;
135 68 : unsigned char needle = ch & 0xff;
136 : /* If looking for a multiple of 256, we'd have too
137 : many false positives looking for the '\0' byte in UCS2
138 : and UCS4 representations. */
139 68 : if (needle != 0) {
140 : do {
141 64 : void *candidate = memrchr(s, needle,
142 : n * sizeof(STRINGLIB_CHAR));
143 64 : if (candidate == NULL)
144 2 : return -1;
145 62 : n1 = n;
146 62 : p = (const STRINGLIB_CHAR *)
147 62 : _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148 62 : n = p - s;
149 62 : if (*p == ch)
150 59 : return n;
151 : /* False positive */
152 3 : if (n1 - n > MEMRCHR_CUT_OFF)
153 3 : continue;
154 0 : if (n <= MEMRCHR_CUT_OFF)
155 0 : break;
156 0 : s1 = p - MEMRCHR_CUT_OFF;
157 0 : while (p > s1) {
158 0 : p--;
159 0 : if (*p == ch)
160 0 : return (p - s);
161 : }
162 0 : n = p - s;
163 : }
164 3 : while (n > MEMRCHR_CUT_OFF);
165 : }
166 : #endif
167 : }
168 : #endif /* HAVE_MEMRCHR */
169 1779122 : p = s + n;
170 11017146 : while (p > s) {
171 10020229 : p--;
172 10020229 : if (*p == ch)
173 782207 : return (p - s);
174 : }
175 996910 : return -1;
176 : }
177 :
178 : #undef MEMRCHR_CUT_OFF
179 :
180 : /* Change to a 1 to see logging comments walk through the algorithm. */
181 : #if 0 && STRINGLIB_SIZEOF_CHAR == 1
182 : # define LOG(...) printf(__VA_ARGS__)
183 : # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
184 : # define LOG_LINEUP() do { \
185 : LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> "); \
186 : LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
187 : LOG_STRING(needle, len_needle); LOG("\n"); \
188 : } while(0)
189 : #else
190 : # define LOG(...)
191 : # define LOG_STRING(s, n)
192 : # define LOG_LINEUP()
193 : #endif
194 :
195 : Py_LOCAL_INLINE(Py_ssize_t)
196 1252 : STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
197 : Py_ssize_t *return_period, int invert_alphabet)
198 : {
199 : /* Do a lexicographic search. Essentially this:
200 : >>> max(needle[i:] for i in range(len(needle)+1))
201 : Also find the period of the right half. */
202 1252 : Py_ssize_t max_suffix = 0;
203 1252 : Py_ssize_t candidate = 1;
204 1252 : Py_ssize_t k = 0;
205 : // The period of the right half.
206 1252 : Py_ssize_t period = 1;
207 :
208 115686 : while (candidate + k < len_needle) {
209 : // each loop increases candidate + k + max_suffix
210 114434 : STRINGLIB_CHAR a = needle[candidate + k];
211 114434 : STRINGLIB_CHAR b = needle[max_suffix + k];
212 : // check if the suffix at candidate is better than max_suffix
213 114434 : if (invert_alphabet ? (b < a) : (a < b)) {
214 : // Fell short of max_suffix.
215 : // The next k + 1 characters are non-increasing
216 : // from candidate, so they won't start a maximal suffix.
217 31949 : candidate += k + 1;
218 31949 : k = 0;
219 : // We've ruled out any period smaller than what's
220 : // been scanned since max_suffix.
221 31949 : period = candidate - max_suffix;
222 : }
223 82485 : else if (a == b) {
224 80852 : if (k + 1 != period) {
225 : // Keep scanning the equal strings
226 66816 : k++;
227 : }
228 : else {
229 : // Matched a whole period.
230 : // Start matching the next period.
231 14036 : candidate += period;
232 14036 : k = 0;
233 : }
234 : }
235 : else {
236 : // Did better than max_suffix, so replace it.
237 1633 : max_suffix = candidate;
238 1633 : candidate++;
239 1633 : k = 0;
240 1633 : period = 1;
241 : }
242 : }
243 1252 : *return_period = period;
244 1252 : return max_suffix;
245 : }
246 :
247 : Py_LOCAL_INLINE(Py_ssize_t)
248 626 : STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
249 : Py_ssize_t len_needle,
250 : Py_ssize_t *return_period)
251 : {
252 : /* Do a "critical factorization", making it so that:
253 : >>> needle = (left := needle[:cut]) + (right := needle[cut:])
254 : where the "local period" of the cut is maximal.
255 :
256 : The local period of the cut is the minimal length of a string w
257 : such that (left endswith w or w endswith left)
258 : and (right startswith w or w startswith left).
259 :
260 : The Critical Factorization Theorem says that this maximal local
261 : period is the global period of the string.
262 :
263 : Crochemore and Perrin (1991) show that this cut can be computed
264 : as the later of two cuts: one that gives a lexicographically
265 : maximal right half, and one that gives the same with the
266 : with respect to a reversed alphabet-ordering.
267 :
268 : This is what we want to happen:
269 : >>> x = "GCAGAGAG"
270 : >>> cut, period = factorize(x)
271 : >>> x[:cut], (right := x[cut:])
272 : ('GC', 'AGAGAG')
273 : >>> period # right half period
274 : 2
275 : >>> right[period:] == right[:-period]
276 : True
277 :
278 : This is how the local period lines up in the above example:
279 : GC | AGAGAG
280 : AGAGAGC = AGAGAGC
281 : The length of this minimal repetition is 7, which is indeed the
282 : period of the original string. */
283 :
284 : Py_ssize_t cut1, period1, cut2, period2, cut, period;
285 626 : cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
286 626 : cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
287 :
288 : // Take the later cut.
289 626 : if (cut1 > cut2) {
290 484 : period = period1;
291 484 : cut = cut1;
292 : }
293 : else {
294 142 : period = period2;
295 142 : cut = cut2;
296 : }
297 :
298 : LOG("split: "); LOG_STRING(needle, cut);
299 : LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
300 : LOG("\n");
301 :
302 626 : *return_period = period;
303 626 : return cut;
304 : }
305 :
306 :
307 : #define SHIFT_TYPE uint8_t
308 : #define MAX_SHIFT UINT8_MAX
309 :
310 : #define TABLE_SIZE_BITS 6u
311 : #define TABLE_SIZE (1U << TABLE_SIZE_BITS)
312 : #define TABLE_MASK (TABLE_SIZE - 1U)
313 :
314 : typedef struct STRINGLIB(_pre) {
315 : const STRINGLIB_CHAR *needle;
316 : Py_ssize_t len_needle;
317 : Py_ssize_t cut;
318 : Py_ssize_t period;
319 : Py_ssize_t gap;
320 : int is_periodic;
321 : SHIFT_TYPE table[TABLE_SIZE];
322 : } STRINGLIB(prework);
323 :
324 :
325 : static void
326 626 : STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
327 : STRINGLIB(prework) *p)
328 : {
329 626 : p->needle = needle;
330 626 : p->len_needle = len_needle;
331 626 : p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
332 626 : assert(p->period + p->cut <= len_needle);
333 626 : p->is_periodic = (0 == memcmp(needle,
334 626 : needle + p->period,
335 626 : p->cut * STRINGLIB_SIZEOF_CHAR));
336 626 : if (p->is_periodic) {
337 280 : assert(p->cut <= len_needle/2);
338 280 : assert(p->cut < p->period);
339 280 : p->gap = 0; // unused
340 : }
341 : else {
342 : // A lower bound on the period
343 346 : p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344 : // The gap between the last character and the previous
345 : // occurrence of an equivalent character (modulo TABLE_SIZE)
346 346 : p->gap = len_needle;
347 346 : STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
348 9745 : for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
349 9732 : STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
350 9732 : if (x == last) {
351 333 : p->gap = len_needle - 1 - i;
352 333 : break;
353 : }
354 : }
355 : }
356 : // Fill up a compressed Boyer-Moore "Bad Character" table
357 626 : Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358 40690 : for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
359 40064 : p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360 : Py_ssize_t, SHIFT_TYPE);
361 : }
362 42833 : for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
363 42207 : SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364 : Py_ssize_t, SHIFT_TYPE);
365 42207 : p->table[needle[i] & TABLE_MASK] = shift;
366 : }
367 626 : }
368 :
369 : static Py_ssize_t
370 682 : STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371 : STRINGLIB(prework) *p)
372 : {
373 : // Crochemore and Perrin's (1991) Two-Way algorithm.
374 : // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375 682 : const Py_ssize_t len_needle = p->len_needle;
376 682 : const Py_ssize_t cut = p->cut;
377 682 : Py_ssize_t period = p->period;
378 682 : const STRINGLIB_CHAR *const needle = p->needle;
379 682 : const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380 682 : const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381 682 : SHIFT_TYPE *table = p->table;
382 : const STRINGLIB_CHAR *window;
383 : LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384 :
385 682 : if (p->is_periodic) {
386 : LOG("Needle is periodic.\n");
387 295 : Py_ssize_t memory = 0;
388 7065 : periodicwindowloop:
389 7065 : while (window_last < haystack_end) {
390 7065 : assert(memory == 0);
391 27721 : for (;;) {
392 : LOG_LINEUP();
393 34786 : Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
394 34786 : window_last += shift;
395 34786 : if (shift == 0) {
396 7064 : break;
397 : }
398 27722 : if (window_last >= haystack_end) {
399 1 : return -1;
400 : }
401 : LOG("Horspool skip");
402 : }
403 7082 : no_shift:
404 7082 : window = window_last - len_needle + 1;
405 7082 : assert((window[len_needle - 1] & TABLE_MASK) ==
406 : (needle[len_needle - 1] & TABLE_MASK));
407 7082 : Py_ssize_t i = Py_MAX(cut, memory);
408 43624 : for (; i < len_needle; i++) {
409 43312 : if (needle[i] != window[i]) {
410 : LOG("Right half does not match.\n");
411 6770 : window_last += i - cut + 1;
412 6770 : memory = 0;
413 6770 : goto periodicwindowloop;
414 : }
415 : }
416 3100 : for (i = memory; i < cut; i++) {
417 2806 : if (needle[i] != window[i]) {
418 : LOG("Left half does not match.\n");
419 18 : window_last += period;
420 18 : memory = len_needle - period;
421 18 : if (window_last >= haystack_end) {
422 0 : return -1;
423 : }
424 18 : Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
425 18 : if (shift) {
426 : // A mismatch has been identified to the right
427 : // of where i will next start, so we can jump
428 : // at least as far as if the mismatch occurred
429 : // on the first comparison.
430 0 : Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
431 : LOG("Skip with Memory.\n");
432 0 : memory = 0;
433 0 : window_last += Py_MAX(shift, mem_jump);
434 0 : goto periodicwindowloop;
435 : }
436 18 : goto no_shift;
437 : }
438 : }
439 : LOG("Found a match!\n");
440 294 : return window - haystack;
441 : }
442 : }
443 : else {
444 387 : Py_ssize_t gap = p->gap;
445 387 : period = Py_MAX(gap, period);
446 : LOG("Needle is not periodic.\n");
447 387 : Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
448 37212 : windowloop:
449 37212 : while (window_last < haystack_end) {
450 2129497 : for (;;) {
451 : LOG_LINEUP();
452 2166705 : Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
453 2166705 : window_last += shift;
454 2166705 : if (shift == 0) {
455 37032 : break;
456 : }
457 2129679 : if (window_last >= haystack_end) {
458 180 : return -1;
459 : }
460 : LOG("Horspool skip");
461 : }
462 37032 : window = window_last - len_needle + 1;
463 37032 : assert((window[len_needle - 1] & TABLE_MASK) ==
464 : (needle[len_needle - 1] & TABLE_MASK));
465 49697 : for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
466 47210 : if (needle[i] != window[i]) {
467 : LOG("Early right half mismatch: jump by gap.\n");
468 34545 : assert(gap >= i - cut + 1);
469 34545 : window_last += gap;
470 34545 : goto windowloop;
471 : }
472 : }
473 6198 : for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
474 5808 : if (needle[i] != window[i]) {
475 : LOG("Late right half mismatch.\n");
476 2097 : assert(i - cut + 1 > gap);
477 2097 : window_last += i - cut + 1;
478 2097 : goto windowloop;
479 : }
480 : }
481 11490 : for (Py_ssize_t i = 0; i < cut; i++) {
482 11283 : if (needle[i] != window[i]) {
483 : LOG("Left half does not match.\n");
484 183 : window_last += period;
485 183 : goto windowloop;
486 : }
487 : }
488 : LOG("Found a match!\n");
489 207 : return window - haystack;
490 : }
491 : }
492 : LOG("Not found. Returning -1.\n");
493 0 : return -1;
494 : }
495 :
496 :
497 : static Py_ssize_t
498 623 : STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
499 : Py_ssize_t len_haystack,
500 : const STRINGLIB_CHAR *needle,
501 : Py_ssize_t len_needle)
502 : {
503 : LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
504 : STRINGLIB(prework) p;
505 623 : STRINGLIB(_preprocess)(needle, len_needle, &p);
506 623 : return STRINGLIB(_two_way)(haystack, len_haystack, &p);
507 : }
508 :
509 :
510 : static Py_ssize_t
511 3 : STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
512 : Py_ssize_t len_haystack,
513 : const STRINGLIB_CHAR *needle,
514 : Py_ssize_t len_needle,
515 : Py_ssize_t maxcount)
516 : {
517 : LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
518 : STRINGLIB(prework) p;
519 3 : STRINGLIB(_preprocess)(needle, len_needle, &p);
520 3 : Py_ssize_t index = 0, count = 0;
521 56 : while (1) {
522 : Py_ssize_t result;
523 59 : result = STRINGLIB(_two_way)(haystack + index,
524 : len_haystack - index, &p);
525 59 : if (result == -1) {
526 3 : return count;
527 : }
528 56 : count++;
529 56 : if (count == maxcount) {
530 0 : return maxcount;
531 : }
532 56 : index += result + len_needle;
533 : }
534 : return count;
535 : }
536 :
537 : #undef SHIFT_TYPE
538 : #undef NOT_FOUND
539 : #undef SHIFT_OVERFLOW
540 : #undef TABLE_SIZE_BITS
541 : #undef TABLE_SIZE
542 : #undef TABLE_MASK
543 :
544 : #undef LOG
545 : #undef LOG_STRING
546 : #undef LOG_LINEUP
547 :
548 : static inline Py_ssize_t
549 4919682 : STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
550 : const STRINGLIB_CHAR* p, Py_ssize_t m,
551 : Py_ssize_t maxcount, int mode)
552 : {
553 4919682 : const Py_ssize_t w = n - m;
554 4919682 : Py_ssize_t mlast = m - 1, count = 0;
555 4919682 : Py_ssize_t gap = mlast;
556 4919682 : const STRINGLIB_CHAR last = p[mlast];
557 4919682 : const STRINGLIB_CHAR *const ss = &s[mlast];
558 :
559 4919682 : unsigned long mask = 0;
560 18378323 : for (Py_ssize_t i = 0; i < mlast; i++) {
561 13458642 : STRINGLIB_BLOOM_ADD(mask, p[i]);
562 13458642 : if (p[i] == last) {
563 3492086 : gap = mlast - i - 1;
564 : }
565 : }
566 4919682 : STRINGLIB_BLOOM_ADD(mask, last);
567 :
568 51379195 : for (Py_ssize_t i = 0; i <= w; i++) {
569 47172052 : if (ss[i] == last) {
570 : /* candidate match */
571 : Py_ssize_t j;
572 5449830 : for (j = 0; j < mlast; j++) {
573 4674464 : if (s[i+j] != p[j]) {
574 2433297 : break;
575 : }
576 : }
577 3208658 : if (j == mlast) {
578 : /* got a match! */
579 775360 : if (mode != FAST_COUNT) {
580 712470 : return i;
581 : }
582 62890 : count++;
583 62890 : if (count == maxcount) {
584 24 : return maxcount;
585 : }
586 62866 : i = i + mlast;
587 62866 : continue;
588 : }
589 : /* miss: check if next character is part of pattern */
590 2433297 : if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
591 1737259 : i = i + m;
592 : }
593 : else {
594 696038 : i = i + gap;
595 : }
596 : }
597 : else {
598 : /* skip: check if next character is part of pattern */
599 43963395 : if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
600 40763304 : i = i + m;
601 : }
602 : }
603 : }
604 4207189 : return mode == FAST_COUNT ? count : -1;
605 : }
606 :
607 :
608 : static Py_ssize_t
609 0 : STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
610 : const STRINGLIB_CHAR* p, Py_ssize_t m,
611 : Py_ssize_t maxcount, int mode)
612 : {
613 0 : const Py_ssize_t w = n - m;
614 0 : Py_ssize_t mlast = m - 1, count = 0;
615 0 : Py_ssize_t gap = mlast;
616 0 : Py_ssize_t hits = 0, res;
617 0 : const STRINGLIB_CHAR last = p[mlast];
618 0 : const STRINGLIB_CHAR *const ss = &s[mlast];
619 :
620 0 : unsigned long mask = 0;
621 0 : for (Py_ssize_t i = 0; i < mlast; i++) {
622 0 : STRINGLIB_BLOOM_ADD(mask, p[i]);
623 0 : if (p[i] == last) {
624 0 : gap = mlast - i - 1;
625 : }
626 : }
627 0 : STRINGLIB_BLOOM_ADD(mask, last);
628 :
629 0 : for (Py_ssize_t i = 0; i <= w; i++) {
630 0 : if (ss[i] == last) {
631 : /* candidate match */
632 : Py_ssize_t j;
633 0 : for (j = 0; j < mlast; j++) {
634 0 : if (s[i+j] != p[j]) {
635 0 : break;
636 : }
637 : }
638 0 : if (j == mlast) {
639 : /* got a match! */
640 0 : if (mode != FAST_COUNT) {
641 0 : return i;
642 : }
643 0 : count++;
644 0 : if (count == maxcount) {
645 0 : return maxcount;
646 : }
647 0 : i = i + mlast;
648 0 : continue;
649 : }
650 0 : hits += j + 1;
651 0 : if (hits > m / 4 && w - i > 2000) {
652 0 : if (mode == FAST_SEARCH) {
653 0 : res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
654 0 : return res == -1 ? -1 : res + i;
655 : }
656 : else {
657 0 : res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
658 : maxcount - count);
659 0 : return res + count;
660 : }
661 : }
662 : /* miss: check if next character is part of pattern */
663 0 : if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
664 0 : i = i + m;
665 : }
666 : else {
667 0 : i = i + gap;
668 : }
669 : }
670 : else {
671 : /* skip: check if next character is part of pattern */
672 0 : if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
673 0 : i = i + m;
674 : }
675 : }
676 : }
677 0 : return mode == FAST_COUNT ? count : -1;
678 : }
679 :
680 :
681 : static Py_ssize_t
682 393083 : STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
683 : const STRINGLIB_CHAR* p, Py_ssize_t m,
684 : Py_ssize_t maxcount, int mode)
685 : {
686 : /* create compressed boyer-moore delta 1 table */
687 393083 : unsigned long mask = 0;
688 393083 : Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689 :
690 : /* process pattern[0] outside the loop */
691 393083 : STRINGLIB_BLOOM_ADD(mask, p[0]);
692 : /* process pattern[:0:-1] */
693 1744518 : for (i = mlast; i > 0; i--) {
694 1351435 : STRINGLIB_BLOOM_ADD(mask, p[i]);
695 1351435 : if (p[i] == p[0]) {
696 449740 : skip = i - 1;
697 : }
698 : }
699 :
700 1400925 : for (i = w; i >= 0; i--) {
701 1019577 : if (s[i] == p[0]) {
702 : /* candidate match */
703 255590 : for (j = mlast; j > 0; j--) {
704 243855 : if (s[i+j] != p[j]) {
705 159994 : break;
706 : }
707 : }
708 171729 : if (j == 0) {
709 : /* got a match! */
710 11735 : return i;
711 : }
712 : /* miss: check if previous character is part of pattern */
713 159994 : if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
714 13471 : i = i - m;
715 : }
716 : else {
717 146523 : i = i - skip;
718 : }
719 : }
720 : else {
721 : /* skip: check if previous character is part of pattern */
722 847848 : if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
723 558279 : i = i - m;
724 : }
725 : }
726 : }
727 381348 : return -1;
728 : }
729 :
730 :
731 : static inline Py_ssize_t
732 1600223 : STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
733 : const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
734 : {
735 1600223 : Py_ssize_t i, count = 0;
736 559313874 : for (i = 0; i < n; i++) {
737 557713485 : if (s[i] == p0) {
738 8014718 : count++;
739 8014718 : if (count == maxcount) {
740 20 : return maxcount;
741 : }
742 : }
743 : }
744 1600203 : return count;
745 : }
746 :
747 :
748 : Py_LOCAL_INLINE(Py_ssize_t)
749 9743076 : FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
750 : const STRINGLIB_CHAR* p, Py_ssize_t m,
751 : Py_ssize_t maxcount, int mode)
752 : {
753 9743076 : if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
754 181560 : return -1;
755 : }
756 :
757 : /* look for special cases */
758 9561516 : if (m <= 1) {
759 4248127 : if (m <= 0) {
760 0 : return -1;
761 : }
762 : /* use special case for 1-character strings */
763 4248127 : if (mode == FAST_SEARCH)
764 911996 : return STRINGLIB(find_char)(s, n, p[0]);
765 3336129 : else if (mode == FAST_RSEARCH)
766 1735906 : return STRINGLIB(rfind_char)(s, n, p[0]);
767 : else {
768 1600223 : return STRINGLIB(count_char)(s, n, p[0], maxcount);
769 : }
770 : }
771 :
772 5313392 : if (mode != FAST_RSEARCH) {
773 4920312 : if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
774 4919682 : return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775 : }
776 626 : else if ((m >> 2) * 3 < (n >> 2)) {
777 : /* 33% threshold, but don't overflow. */
778 : /* For larger problems where the needle isn't a huge
779 : percentage of the size of the haystack, the relatively
780 : expensive O(m) startup cost of the two-way algorithm
781 : will surely pay off. */
782 626 : if (mode == FAST_SEARCH) {
783 623 : return STRINGLIB(_two_way_find)(s, n, p, m);
784 : }
785 : else {
786 3 : return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787 : }
788 : }
789 : else {
790 : /* To ensure that we have good worst-case behavior,
791 : here's an adaptive version of the algorithm, where if
792 : we match O(m) characters without any matches of the
793 : entire needle, then we predict that the startup cost of
794 : the two-way algorithm will probably be worth it. */
795 0 : return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796 : }
797 : }
798 : else {
799 : /* FAST_RSEARCH */
800 393083 : return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801 : }
802 : }
803 :
|