/home/mdboom/Work/builds/cpython/Objects/stringlib/fastsearch.h
Line | Count | Source (jump to first uncovered line) |
1 | /* stringlib: fastsearch implementation */ |
2 | |
3 | #define STRINGLIB_FASTSEARCH_H |
4 | |
5 | /* fast search/count implementation, based on a mix between boyer- |
6 | moore and horspool, with a few more bells and whistles on the top. |
7 | for some more background, see: |
8 | https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */ |
9 | |
10 | /* note: fastsearch may access s[n], which isn't a problem when using |
11 | Python's ordinary string types, but may cause problems if you're |
12 | using this code in other contexts. also, the count mode returns -1 |
13 | if there cannot possibly be a match in the target string, and 0 if |
14 | it has actually checked for matches, but didn't find any. callers |
15 | beware! */ |
16 | |
17 | /* If the strings are long enough, use Crochemore and Perrin's Two-Way |
18 | algorithm, which has worst-case O(n) runtime and best-case O(n/k). |
19 | Also compute a table of shifts to achieve O(n/k) in more cases, |
20 | and often (data dependent) deduce larger shifts than pure C&P can |
21 | deduce. */ |
22 | |
23 | #define FAST_COUNT 0 |
24 | #define FAST_SEARCH 1 |
25 | #define FAST_RSEARCH 2 |
26 | |
27 | #if LONG_BIT >= 128 |
28 | #define STRINGLIB_BLOOM_WIDTH 128 |
29 | #elif LONG_BIT >= 64 |
30 | #define STRINGLIB_BLOOM_WIDTH 64 |
31 | #elif LONG_BIT >= 32 |
32 | #define STRINGLIB_BLOOM_WIDTH 32 |
33 | #else |
34 | #error "LONG_BIT is smaller than 32" |
35 | #endif |
36 | |
37 | #define STRINGLIB_BLOOM_ADD(mask, ch) \ |
38 | ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
39 | #define STRINGLIB_BLOOM(mask, ch) \ |
40 | ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) |
41 | |
42 | #ifdef STRINGLIB_FAST_MEMCHR |
43 | # define MEMCHR_CUT_OFF 15 |
44 | #else |
45 | # define MEMCHR_CUT_OFF 40 |
46 | #endif |
47 | |
48 | Py_LOCAL_INLINE(Py_ssize_t) |
49 | STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
50 | { |
51 | const STRINGLIB_CHAR *p, *e; |
52 | |
53 | p = s; |
54 | e = s + n; |
55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 139k, False: 170k]
Branch (55:9): [True: 0, False: 13]
Branch (55:9): [True: 378, False: 500]
Branch (55:9): [True: 565k, False: 4.73M]
Branch (55:9): [True: 218, False: 73.2k]
Branch (55:9): [True: 48, False: 1.04M]
Branch (55:9): [True: 459k, False: 66.0k]
|
56 | #ifdef STRINGLIB_FAST_MEMCHR |
57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); |
58 | if (p != NULL) Branch (58:13): [True: 118k, False: 20.8k]
Branch (58:13): [True: 0, False: 0]
Branch (58:13): [True: 312, False: 66]
Branch (58:13): [True: 90.3k, False: 475k]
Branch (58:13): [True: 25, False: 23]
Branch (58:13): [True: 453k, False: 5.94k]
|
59 | return (p - s); |
60 | return -1; |
61 | #else |
62 | /* use memchr if we can choose a needle without too many likely |
63 | false positives */ |
64 | const STRINGLIB_CHAR *s1, *e1; |
65 | unsigned char needle = ch & 0xff; |
66 | /* If looking for a multiple of 256, we'd have too |
67 | many false positives looking for the '\0' byte in UCS2 |
68 | and UCS4 representations. */ |
69 | if (needle != 0) { Branch (69:13): [True: 217, False: 1]
|
70 | do { |
71 | void *candidate = memchr(p, needle, |
72 | (e - p) * sizeof(STRINGLIB_CHAR)); |
73 | if (candidate == NULL) Branch (73:21): [True: 19, False: 199]
|
74 | return -1; |
75 | s1 = p; |
76 | p = (const STRINGLIB_CHAR *) |
77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
78 | if (*p == ch) Branch (78:21): [True: 195, False: 4]
|
79 | return (p - s); |
80 | /* False positive */ |
81 | p++; |
82 | if (p - s1 > MEMCHR_CUT_OFF) Branch (82:21): [True: 2, False: 2]
|
83 | continue; |
84 | if (e - p <= MEMCHR_CUT_OFF) Branch (84:21): [True: 0, False: 2]
|
85 | break; |
86 | e1 = p + MEMCHR_CUT_OFF; |
87 | while (p != e1) { Branch (87:24): [True: 6, False: 0]
|
88 | if (*p == ch) Branch (88:25): [True: 2, False: 4]
|
89 | return (p - s); |
90 | p++; |
91 | } |
92 | } |
93 | while (e - p > 2 MEMCHR_CUT_OFF2 ); Branch (93:20): [True: 1, False: 1]
|
94 | } |
95 | #endif |
96 | } |
97 | while (6.09M p < e) { Branch (97:12): [True: 1.30M, False: 82.7k]
Branch (97:12): [True: 38, False: 5]
Branch (97:12): [True: 4.13k, False: 342]
Branch (97:12): [True: 19.6M, False: 3.46M]
Branch (97:12): [True: 233k, False: 72.1k]
Branch (97:12): [True: 3.14M, False: 1.04M]
Branch (97:12): [True: 378k, False: 14.2k]
|
98 | if (*p == ch) Branch (98:13): [True: 87.8k, False: 1.21M]
Branch (98:13): [True: 8, False: 30]
Branch (98:13): [True: 158, False: 3.98k]
Branch (98:13): [True: 1.27M, False: 18.3M]
Branch (98:13): [True: 1.08k, False: 232k]
Branch (98:13): [True: 10, False: 3.14M]
Branch (98:13): [True: 51.8k, False: 326k]
|
99 | return (p - s); |
100 | p++; |
101 | } |
102 | return -1; |
103 | } bytes_methods.c:stringlib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 139k, False: 170k]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 118k, False: 20.8k]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (170k p < e) { Branch (97:12): [True: 1.30M, False: 82.7k]
| 98 | if (*p == ch) Branch (98:13): [True: 87.8k, False: 1.21M]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
bytearrayobject.c:stringlib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 0, False: 13]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 0, False: 0]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (13 p < e) { Branch (97:12): [True: 38, False: 5]
| 98 | if (*p == ch) Branch (98:13): [True: 8, False: 30]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
bytesobject.c:stringlib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 378, False: 500]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 312, False: 66]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (500 p < e) { Branch (97:12): [True: 4.13k, False: 342]
| 98 | if (*p == ch) Branch (98:13): [True: 158, False: 3.98k]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
unicodeobject.c:ucs1lib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 565k, False: 4.73M]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 90.3k, False: 475k]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (4.73M p < e) { Branch (97:12): [True: 19.6M, False: 3.46M]
| 98 | if (*p == ch) Branch (98:13): [True: 1.27M, False: 18.3M]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
unicodeobject.c:ucs2lib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 218, False: 73.2k]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) | 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { Branch (69:13): [True: 217, False: 1]
| 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) Branch (73:21): [True: 19, False: 199]
| 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) Branch (78:21): [True: 195, False: 4]
| 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) Branch (82:21): [True: 2, False: 2]
| 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) Branch (84:21): [True: 0, False: 2]
| 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { Branch (87:24): [True: 6, False: 0]
| 88 | if (*p == ch) Branch (88:25): [True: 2, False: 4]
| 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > 2 MEMCHR_CUT_OFF2 ); Branch (93:20): [True: 1, False: 1]
| 94 | } | 95 | #endif | 96 | } | 97 | while (73.2k p < e) { Branch (97:12): [True: 233k, False: 72.1k]
| 98 | if (*p == ch) Branch (98:13): [True: 1.08k, False: 232k]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
unicodeobject.c:ucs4lib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 48, False: 1.04M]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 25, False: 23]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (1.04M p < e) { Branch (97:12): [True: 3.14M, False: 1.04M]
| 98 | if (*p == ch) Branch (98:13): [True: 10, False: 3.14M]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
unicodeobject.c:asciilib_find_char Line | Count | Source | 50 | { | 51 | const STRINGLIB_CHAR *p, *e; | 52 | | 53 | p = s; | 54 | e = s + n; | 55 | if (n > MEMCHR_CUT_OFF) { Branch (55:9): [True: 459k, False: 66.0k]
| 56 | #ifdef STRINGLIB_FAST_MEMCHR | 57 | p = STRINGLIB_FAST_MEMCHR(s, ch, n); | 58 | if (p != NULL) Branch (58:13): [True: 453k, False: 5.94k]
| 59 | return (p - s); | 60 | return -1; | 61 | #else | 62 | /* use memchr if we can choose a needle without too many likely | 63 | false positives */ | 64 | const STRINGLIB_CHAR *s1, *e1; | 65 | unsigned char needle = ch & 0xff; | 66 | /* If looking for a multiple of 256, we'd have too | 67 | many false positives looking for the '\0' byte in UCS2 | 68 | and UCS4 representations. */ | 69 | if (needle != 0) { | 70 | do { | 71 | void *candidate = memchr(p, needle, | 72 | (e - p) * sizeof(STRINGLIB_CHAR)); | 73 | if (candidate == NULL) | 74 | return -1; | 75 | s1 = p; | 76 | p = (const STRINGLIB_CHAR *) | 77 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 78 | if (*p == ch) | 79 | return (p - s); | 80 | /* False positive */ | 81 | p++; | 82 | if (p - s1 > MEMCHR_CUT_OFF) | 83 | continue; | 84 | if (e - p <= MEMCHR_CUT_OFF) | 85 | break; | 86 | e1 = p + MEMCHR_CUT_OFF; | 87 | while (p != e1) { | 88 | if (*p == ch) | 89 | return (p - s); | 90 | p++; | 91 | } | 92 | } | 93 | while (e - p > MEMCHR_CUT_OFF); | 94 | } | 95 | #endif | 96 | } | 97 | while (66.0k p < e) { Branch (97:12): [True: 378k, False: 14.2k]
| 98 | if (*p == ch) Branch (98:13): [True: 51.8k, False: 326k]
| 99 | return (p - s); | 100 | p++; | 101 | } | 102 | return -1; | 103 | } |
|
104 | |
105 | #undef MEMCHR_CUT_OFF |
106 | |
107 | #if STRINGLIB_SIZEOF_CHAR == 1 |
108 | # define MEMRCHR_CUT_OFF 15 |
109 | #else |
110 | # define MEMRCHR_CUT_OFF 40 |
111 | #endif |
112 | |
113 | |
114 | Py_LOCAL_INLINE(Py_ssize_t) |
115 | STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) |
116 | { |
117 | const STRINGLIB_CHAR *p; |
118 | #ifdef HAVE_MEMRCHR |
119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it |
120 | doesn't seem as optimized as memchr(), but is still quite |
121 | faster than our hand-written loop below. There is no wmemrchr |
122 | for 4-byte chars. */ |
123 | |
124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 4.31k, False: 3.07k]
Branch (124:9): [True: 0, False: 5]
Branch (124:9): [True: 60, False: 256]
Branch (124:9): [True: 375k, False: 329k]
Branch (124:9): [True: 23, False: 374]
Branch (124:9): [True: 7, False: 10]
Branch (124:9): [True: 33.9k, False: 106k]
|
125 | #if STRINGLIB_SIZEOF_CHAR == 1 |
126 | p = memrchr(s, ch, n); |
127 | if (p != NULL) Branch (127:13): [True: 2.23k, False: 2.07k]
Branch (127:13): [True: 0, False: 0]
Branch (127:13): [True: 8, False: 52]
Branch (127:13): [True: 367k, False: 8.24k]
Branch (127:13): [True: 30.7k, False: 3.15k]
|
128 | return (p - s); |
129 | return -1; |
130 | #else |
131 | /* use memrchr if we can choose a needle without too many likely |
132 | false positives */ |
133 | const STRINGLIB_CHAR *s1; |
134 | Py_ssize_t n1; |
135 | unsigned char needle = ch & 0xff; |
136 | /* If looking for a multiple of 256, we'd have too |
137 | many false positives looking for the '\0' byte in UCS2 |
138 | and UCS4 representations. */ |
139 | if (needle != 0) { Branch (139:13): [True: 19, False: 4]
Branch (139:13): [True: 7, False: 0]
|
140 | do { |
141 | void *candidate = memrchr(s, needle, |
142 | n * sizeof(STRINGLIB_CHAR)); |
143 | if (candidate == NULL) Branch (143:21): [True: 2, False: 17]
Branch (143:21): [True: 0, False: 7]
|
144 | return -1; |
145 | n1 = n; |
146 | p = (const STRINGLIB_CHAR *) |
147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); |
148 | n = p - s; |
149 | if (*p == ch) Branch (149:21): [True: 16, False: 1]
Branch (149:21): [True: 5, False: 2]
|
150 | return n; |
151 | /* False positive */ |
152 | if (n1 - n > MEMRCHR_CUT_OFF) Branch (152:21): [True: 1, False: 0]
Branch (152:21): [True: 2, False: 0]
|
153 | continue; |
154 | if (n <= MEMRCHR_CUT_OFF) Branch (154:21): [True: 0, False: 0]
Branch (154:21): [True: 0, False: 0]
|
155 | break; |
156 | s1 = p - MEMRCHR_CUT_OFF; |
157 | while (p > s1) { Branch (157:24): [True: 0, False: 0]
Branch (157:24): [True: 0, False: 0]
|
158 | p--; |
159 | if (*p == ch) Branch (159:25): [True: 0, False: 0]
Branch (159:25): [True: 0, False: 0]
|
160 | return (p - s); |
161 | } |
162 | n = p - s; |
163 | } |
164 | while (n > 3 MEMRCHR_CUT_OFF3 ); Branch (164:20): [True: 0, False: 1]
Branch (164:20): [True: 0, False: 2]
|
165 | } |
166 | #endif |
167 | } |
168 | #endif /* HAVE_MEMRCHR */ |
169 | p = s + n; |
170 | while (p > s) { Branch (170:12): [True: 9.03k, False: 752]
Branch (170:12): [True: 18, False: 4]
Branch (170:12): [True: 2.91k, False: 239]
Branch (170:12): [True: 1.66M, False: 146k]
Branch (170:12): [True: 2.72k, False: 264]
Branch (170:12): [True: 107, False: 5]
Branch (170:12): [True: 753k, False: 86.1k]
|
171 | p--; |
172 | if (*p == ch) Branch (172:13): [True: 2.32k, False: 6.71k]
Branch (172:13): [True: 1, False: 17]
Branch (172:13): [True: 17, False: 2.89k]
Branch (172:13): [True: 182k, False: 1.47M]
Branch (172:13): [True: 115, False: 2.61k]
Branch (172:13): [True: 7, False: 100]
Branch (172:13): [True: 20.8k, False: 732k]
|
173 | return (p - s); |
174 | } |
175 | return -1; |
176 | } bytes_methods.c:stringlib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 4.31k, False: 3.07k]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) Branch (127:13): [True: 2.23k, False: 2.07k]
| 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { | 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) | 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) | 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) | 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) | 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { | 158 | p--; | 159 | if (*p == ch) | 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > MEMRCHR_CUT_OFF); | 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 9.03k, False: 752]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 2.32k, False: 6.71k]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
bytearrayobject.c:stringlib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 0, False: 5]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) Branch (127:13): [True: 0, False: 0]
| 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { | 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) | 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) | 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) | 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) | 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { | 158 | p--; | 159 | if (*p == ch) | 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > MEMRCHR_CUT_OFF); | 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 18, False: 4]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 1, False: 17]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
bytesobject.c:stringlib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 60, False: 256]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) Branch (127:13): [True: 8, False: 52]
| 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { | 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) | 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) | 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) | 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) | 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { | 158 | p--; | 159 | if (*p == ch) | 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > MEMRCHR_CUT_OFF); | 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 2.91k, False: 239]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 17, False: 2.89k]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
unicodeobject.c:ucs1lib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 375k, False: 329k]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) Branch (127:13): [True: 367k, False: 8.24k]
| 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { | 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) | 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) | 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) | 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) | 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { | 158 | p--; | 159 | if (*p == ch) | 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > MEMRCHR_CUT_OFF); | 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 1.66M, False: 146k]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 182k, False: 1.47M]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
unicodeobject.c:ucs2lib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 23, False: 374]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) | 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { Branch (139:13): [True: 19, False: 4]
| 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) Branch (143:21): [True: 2, False: 17]
| 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) Branch (149:21): [True: 16, False: 1]
| 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) Branch (152:21): [True: 1, False: 0]
| 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) Branch (154:21): [True: 0, False: 0]
| 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { Branch (157:24): [True: 0, False: 0]
| 158 | p--; | 159 | if (*p == ch) Branch (159:25): [True: 0, False: 0]
| 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > 1 MEMRCHR_CUT_OFF1 ); Branch (164:20): [True: 0, False: 1]
| 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 2.72k, False: 264]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 115, False: 2.61k]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
unicodeobject.c:ucs4lib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 7, False: 10]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) | 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { Branch (139:13): [True: 7, False: 0]
| 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) Branch (143:21): [True: 0, False: 7]
| 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) Branch (149:21): [True: 5, False: 2]
| 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) Branch (152:21): [True: 2, False: 0]
| 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) Branch (154:21): [True: 0, False: 0]
| 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { Branch (157:24): [True: 0, False: 0]
| 158 | p--; | 159 | if (*p == ch) Branch (159:25): [True: 0, False: 0]
| 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > 2 MEMRCHR_CUT_OFF2 ); Branch (164:20): [True: 0, False: 2]
| 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 107, False: 5]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 7, False: 100]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
unicodeobject.c:asciilib_rfind_char Line | Count | Source | 116 | { | 117 | const STRINGLIB_CHAR *p; | 118 | #ifdef HAVE_MEMRCHR | 119 | /* memrchr() is a GNU extension, available since glibc 2.1.91. it | 120 | doesn't seem as optimized as memchr(), but is still quite | 121 | faster than our hand-written loop below. There is no wmemrchr | 122 | for 4-byte chars. */ | 123 | | 124 | if (n > MEMRCHR_CUT_OFF) { Branch (124:9): [True: 33.9k, False: 106k]
| 125 | #if STRINGLIB_SIZEOF_CHAR == 1 | 126 | p = memrchr(s, ch, n); | 127 | if (p != NULL) Branch (127:13): [True: 30.7k, False: 3.15k]
| 128 | return (p - s); | 129 | return -1; | 130 | #else | 131 | /* use memrchr if we can choose a needle without too many likely | 132 | false positives */ | 133 | const STRINGLIB_CHAR *s1; | 134 | Py_ssize_t n1; | 135 | unsigned char needle = ch & 0xff; | 136 | /* If looking for a multiple of 256, we'd have too | 137 | many false positives looking for the '\0' byte in UCS2 | 138 | and UCS4 representations. */ | 139 | if (needle != 0) { | 140 | do { | 141 | void *candidate = memrchr(s, needle, | 142 | n * sizeof(STRINGLIB_CHAR)); | 143 | if (candidate == NULL) | 144 | return -1; | 145 | n1 = n; | 146 | p = (const STRINGLIB_CHAR *) | 147 | _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); | 148 | n = p - s; | 149 | if (*p == ch) | 150 | return n; | 151 | /* False positive */ | 152 | if (n1 - n > MEMRCHR_CUT_OFF) | 153 | continue; | 154 | if (n <= MEMRCHR_CUT_OFF) | 155 | break; | 156 | s1 = p - MEMRCHR_CUT_OFF; | 157 | while (p > s1) { | 158 | p--; | 159 | if (*p == ch) | 160 | return (p - s); | 161 | } | 162 | n = p - s; | 163 | } | 164 | while (n > MEMRCHR_CUT_OFF); | 165 | } | 166 | #endif | 167 | } | 168 | #endif /* HAVE_MEMRCHR */ | 169 | p = s + n; | 170 | while (p > s) { Branch (170:12): [True: 753k, False: 86.1k]
| 171 | p--; | 172 | if (*p == ch) Branch (172:13): [True: 20.8k, False: 732k]
| 173 | return (p - s); | 174 | } | 175 | return -1; | 176 | } |
|
177 | |
178 | #undef MEMRCHR_CUT_OFF |
179 | |
180 | /* Change to a 1 to see logging comments walk through the algorithm. */ |
181 | #if 0 && STRINGLIB_SIZEOF_CHAR == 1 |
182 | # define LOG(...) printf(__VA_ARGS__) |
183 | # define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s) |
184 | # define LOG_LINEUP() do { \ |
185 | LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> "); \ |
186 | LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \ |
187 | LOG_STRING(needle, len_needle); LOG("\n"); \ |
188 | } while(0) |
189 | #else |
190 | # define LOG(...) |
191 | # define LOG_STRING(s, n) |
192 | # define LOG_LINEUP() |
193 | #endif |
194 | |
195 | Py_LOCAL_INLINE(Py_ssize_t) |
196 | STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
197 | Py_ssize_t *return_period, int invert_alphabet) |
198 | { |
199 | /* Do a lexicographic search. Essentially this: |
200 | >>> max(needle[i:] for i in range(len(needle)+1)) |
201 | Also find the period of the right half. */ |
202 | Py_ssize_t max_suffix = 0; |
203 | Py_ssize_t candidate = 1; |
204 | Py_ssize_t k = 0; |
205 | // The period of the right half. |
206 | Py_ssize_t period = 1; |
207 | |
208 | while (candidate + k < len_needle) { Branch (208:12): [True: 71.6k, False: 894]
Branch (208:12): [True: 0, False: 0]
Branch (208:12): [True: 0, False: 0]
Branch (208:12): [True: 50.2k, False: 332]
Branch (208:12): [True: 0, False: 0]
Branch (208:12): [True: 1.78k, False: 98]
Branch (208:12): [True: 0, False: 0]
|
209 | // each loop increases candidate + k + max_suffix |
210 | STRINGLIB_CHAR a = needle[candidate + k]; |
211 | STRINGLIB_CHAR b = needle[max_suffix + k]; |
212 | // check if the suffix at candidate is better than max_suffix |
213 | if (invert_alphabet ? (b < a)59.7k : (a < b)63.9k ) { Branch (213:13): [True: 33.8k, False: 37.8k]
Branch (213:13): [True: 23.7k, False: 47.8k]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 25.1k, False: 25.1k]
Branch (213:13): [True: 7.50k, False: 42.7k]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 827, False: 962]
Branch (213:13): [True: 1.23k, False: 552]
Branch (213:13): [True: 0, False: 0]
Branch (213:13): [True: 0, False: 0]
|
214 | // Fell short of max_suffix. |
215 | // The next k + 1 characters are non-increasing |
216 | // from candidate, so they won't start a maximal suffix. |
217 | candidate += k + 1; |
218 | k = 0; |
219 | // We've ruled out any period smaller than what's |
220 | // been scanned since max_suffix. |
221 | period = candidate - max_suffix; |
222 | } |
223 | else if (a == b) { Branch (223:18): [True: 46.7k, False: 1.11k]
Branch (223:18): [True: 0, False: 0]
Branch (223:18): [True: 0, False: 0]
Branch (223:18): [True: 42.2k, False: 507]
Branch (223:18): [True: 0, False: 0]
Branch (223:18): [True: 418, False: 134]
Branch (223:18): [True: 0, False: 0]
|
224 | if (k + 1 != period) { Branch (224:17): [True: 38.6k, False: 8.12k]
Branch (224:17): [True: 0, False: 0]
Branch (224:17): [True: 0, False: 0]
Branch (224:17): [True: 35.2k, False: 6.98k]
Branch (224:17): [True: 0, False: 0]
Branch (224:17): [True: 418, False: 0]
Branch (224:17): [True: 0, False: 0]
|
225 | // Keep scanning the equal strings |
226 | k++; |
227 | } |
228 | else { |
229 | // Matched a whole period. |
230 | // Start matching the next period. |
231 | candidate += period; |
232 | k = 0; |
233 | } |
234 | } |
235 | else { |
236 | // Did better than max_suffix, so replace it. |
237 | max_suffix = candidate; |
238 | candidate++; |
239 | k = 0; |
240 | period = 1; |
241 | } |
242 | } |
243 | *return_period = period; |
244 | return max_suffix; |
245 | } bytes_methods.c:stringlib__lex_search Line | Count | Source | 198 | { | 199 | /* Do a lexicographic search. Essentially this: | 200 | >>> max(needle[i:] for i in range(len(needle)+1)) | 201 | Also find the period of the right half. */ | 202 | Py_ssize_t max_suffix = 0; | 203 | Py_ssize_t candidate = 1; | 204 | Py_ssize_t k = 0; | 205 | // The period of the right half. | 206 | Py_ssize_t period = 1; | 207 | | 208 | while (candidate + k < len_needle) { Branch (208:12): [True: 71.6k, False: 894]
| 209 | // each loop increases candidate + k + max_suffix | 210 | STRINGLIB_CHAR a = needle[candidate + k]; | 211 | STRINGLIB_CHAR b = needle[max_suffix + k]; | 212 | // check if the suffix at candidate is better than max_suffix | 213 | if (invert_alphabet ? (b < a)33.8k : (a < b)37.8k ) { Branch (213:13): [True: 33.8k, False: 37.8k]
Branch (213:13): [True: 23.7k, False: 47.8k]
| 214 | // Fell short of max_suffix. | 215 | // The next k + 1 characters are non-increasing | 216 | // from candidate, so they won't start a maximal suffix. | 217 | candidate += k + 1; | 218 | k = 0; | 219 | // We've ruled out any period smaller than what's | 220 | // been scanned since max_suffix. | 221 | period = candidate - max_suffix; | 222 | } | 223 | else if (a == b) { Branch (223:18): [True: 46.7k, False: 1.11k]
| 224 | if (k + 1 != period) { Branch (224:17): [True: 38.6k, False: 8.12k]
| 225 | // Keep scanning the equal strings | 226 | k++; | 227 | } | 228 | else { | 229 | // Matched a whole period. | 230 | // Start matching the next period. | 231 | candidate += period; | 232 | k = 0; | 233 | } | 234 | } | 235 | else { | 236 | // Did better than max_suffix, so replace it. | 237 | max_suffix = candidate; | 238 | candidate++; | 239 | k = 0; | 240 | period = 1; | 241 | } | 242 | } | 243 | *return_period = period; | 244 | return max_suffix; | 245 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search Unexecuted instantiation: bytesobject.c:stringlib__lex_search unicodeobject.c:asciilib__lex_search Line | Count | Source | 198 | { | 199 | /* Do a lexicographic search. Essentially this: | 200 | >>> max(needle[i:] for i in range(len(needle)+1)) | 201 | Also find the period of the right half. */ | 202 | Py_ssize_t max_suffix = 0; | 203 | Py_ssize_t candidate = 1; | 204 | Py_ssize_t k = 0; | 205 | // The period of the right half. | 206 | Py_ssize_t period = 1; | 207 | | 208 | while (candidate + k < len_needle) { Branch (208:12): [True: 50.2k, False: 332]
| 209 | // each loop increases candidate + k + max_suffix | 210 | STRINGLIB_CHAR a = needle[candidate + k]; | 211 | STRINGLIB_CHAR b = needle[max_suffix + k]; | 212 | // check if the suffix at candidate is better than max_suffix | 213 | if (invert_alphabet ? (b < a)25.1k : (a < b)25.1k ) { Branch (213:13): [True: 25.1k, False: 25.1k]
Branch (213:13): [True: 7.50k, False: 42.7k]
| 214 | // Fell short of max_suffix. | 215 | // The next k + 1 characters are non-increasing | 216 | // from candidate, so they won't start a maximal suffix. | 217 | candidate += k + 1; | 218 | k = 0; | 219 | // We've ruled out any period smaller than what's | 220 | // been scanned since max_suffix. | 221 | period = candidate - max_suffix; | 222 | } | 223 | else if (a == b) { Branch (223:18): [True: 42.2k, False: 507]
| 224 | if (k + 1 != period) { Branch (224:17): [True: 35.2k, False: 6.98k]
| 225 | // Keep scanning the equal strings | 226 | k++; | 227 | } | 228 | else { | 229 | // Matched a whole period. | 230 | // Start matching the next period. | 231 | candidate += period; | 232 | k = 0; | 233 | } | 234 | } | 235 | else { | 236 | // Did better than max_suffix, so replace it. | 237 | max_suffix = candidate; | 238 | candidate++; | 239 | k = 0; | 240 | period = 1; | 241 | } | 242 | } | 243 | *return_period = period; | 244 | return max_suffix; | 245 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib__lex_search unicodeobject.c:ucs2lib__lex_search Line | Count | Source | 198 | { | 199 | /* Do a lexicographic search. Essentially this: | 200 | >>> max(needle[i:] for i in range(len(needle)+1)) | 201 | Also find the period of the right half. */ | 202 | Py_ssize_t max_suffix = 0; | 203 | Py_ssize_t candidate = 1; | 204 | Py_ssize_t k = 0; | 205 | // The period of the right half. | 206 | Py_ssize_t period = 1; | 207 | | 208 | while (candidate + k < len_needle) { Branch (208:12): [True: 1.78k, False: 98]
| 209 | // each loop increases candidate + k + max_suffix | 210 | STRINGLIB_CHAR a = needle[candidate + k]; | 211 | STRINGLIB_CHAR b = needle[max_suffix + k]; | 212 | // check if the suffix at candidate is better than max_suffix | 213 | if (invert_alphabet ? (b < a)827 : (a < b)962 ) { Branch (213:13): [True: 827, False: 962]
Branch (213:13): [True: 1.23k, False: 552]
| 214 | // Fell short of max_suffix. | 215 | // The next k + 1 characters are non-increasing | 216 | // from candidate, so they won't start a maximal suffix. | 217 | candidate += k + 1; | 218 | k = 0; | 219 | // We've ruled out any period smaller than what's | 220 | // been scanned since max_suffix. | 221 | period = candidate - max_suffix; | 222 | } | 223 | else if (a == b) { Branch (223:18): [True: 418, False: 134]
| 224 | if (k + 1 != period) { Branch (224:17): [True: 418, False: 0]
| 225 | // Keep scanning the equal strings | 226 | k++; | 227 | } | 228 | else { | 229 | // Matched a whole period. | 230 | // Start matching the next period. | 231 | candidate += period; | 232 | k = 0; | 233 | } | 234 | } | 235 | else { | 236 | // Did better than max_suffix, so replace it. | 237 | max_suffix = candidate; | 238 | candidate++; | 239 | k = 0; | 240 | period = 1; | 241 | } | 242 | } | 243 | *return_period = period; | 244 | return max_suffix; | 245 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search |
246 | |
247 | Py_LOCAL_INLINE(Py_ssize_t) |
248 | STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, |
249 | Py_ssize_t len_needle, |
250 | Py_ssize_t *return_period) |
251 | { |
252 | /* Do a "critical factorization", making it so that: |
253 | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) |
254 | where the "local period" of the cut is maximal. |
255 | |
256 | The local period of the cut is the minimal length of a string w |
257 | such that (left endswith w or w endswith left) |
258 | and (right startswith w or w startswith left). |
259 | |
260 | The Critical Factorization Theorem says that this maximal local |
261 | period is the global period of the string. |
262 | |
263 | Crochemore and Perrin (1991) show that this cut can be computed |
264 | as the later of two cuts: one that gives a lexicographically |
265 | maximal right half, and one that gives the same with the |
266 | with respect to a reversed alphabet-ordering. |
267 | |
268 | This is what we want to happen: |
269 | >>> x = "GCAGAGAG" |
270 | >>> cut, period = factorize(x) |
271 | >>> x[:cut], (right := x[cut:]) |
272 | ('GC', 'AGAGAG') |
273 | >>> period # right half period |
274 | 2 |
275 | >>> right[period:] == right[:-period] |
276 | True |
277 | |
278 | This is how the local period lines up in the above example: |
279 | GC | AGAGAG |
280 | AGAGAGC = AGAGAGC |
281 | The length of this minimal repetition is 7, which is indeed the |
282 | period of the original string. */ |
283 | |
284 | Py_ssize_t cut1, period1, cut2, period2, cut, period; |
285 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); |
286 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); |
287 | |
288 | // Take the later cut. |
289 | if (cut1 > cut2) { Branch (289:9): [True: 372, False: 75]
Branch (289:9): [True: 0, False: 0]
Branch (289:9): [True: 0, False: 0]
Branch (289:9): [True: 82, False: 84]
Branch (289:9): [True: 0, False: 0]
Branch (289:9): [True: 34, False: 15]
Branch (289:9): [True: 0, False: 0]
|
290 | period = period1; |
291 | cut = cut1; |
292 | } |
293 | else { |
294 | period = period2; |
295 | cut = cut2; |
296 | } |
297 | |
298 | LOG("split: "); LOG_STRING(needle, cut); |
299 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); |
300 | LOG("\n"); |
301 | |
302 | *return_period = period; |
303 | return cut; |
304 | } bytes_methods.c:stringlib__factorize Line | Count | Source | 251 | { | 252 | /* Do a "critical factorization", making it so that: | 253 | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) | 254 | where the "local period" of the cut is maximal. | 255 | | 256 | The local period of the cut is the minimal length of a string w | 257 | such that (left endswith w or w endswith left) | 258 | and (right startswith w or w startswith left). | 259 | | 260 | The Critical Factorization Theorem says that this maximal local | 261 | period is the global period of the string. | 262 | | 263 | Crochemore and Perrin (1991) show that this cut can be computed | 264 | as the later of two cuts: one that gives a lexicographically | 265 | maximal right half, and one that gives the same with the | 266 | with respect to a reversed alphabet-ordering. | 267 | | 268 | This is what we want to happen: | 269 | >>> x = "GCAGAGAG" | 270 | >>> cut, period = factorize(x) | 271 | >>> x[:cut], (right := x[cut:]) | 272 | ('GC', 'AGAGAG') | 273 | >>> period # right half period | 274 | 2 | 275 | >>> right[period:] == right[:-period] | 276 | True | 277 | | 278 | This is how the local period lines up in the above example: | 279 | GC | AGAGAG | 280 | AGAGAGC = AGAGAGC | 281 | The length of this minimal repetition is 7, which is indeed the | 282 | period of the original string. */ | 283 | | 284 | Py_ssize_t cut1, period1, cut2, period2, cut, period; | 285 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); | 286 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); | 287 | | 288 | // Take the later cut. | 289 | if (cut1 > cut2) { Branch (289:9): [True: 372, False: 75]
| 290 | period = period1; | 291 | cut = cut1; | 292 | } | 293 | else { | 294 | period = period2; | 295 | cut = cut2; | 296 | } | 297 | | 298 | LOG("split: "); LOG_STRING(needle, cut); | 299 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); | 300 | LOG("\n"); | 301 | | 302 | *return_period = period; | 303 | return cut; | 304 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize Unexecuted instantiation: bytesobject.c:stringlib__factorize unicodeobject.c:asciilib__factorize Line | Count | Source | 251 | { | 252 | /* Do a "critical factorization", making it so that: | 253 | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) | 254 | where the "local period" of the cut is maximal. | 255 | | 256 | The local period of the cut is the minimal length of a string w | 257 | such that (left endswith w or w endswith left) | 258 | and (right startswith w or w startswith left). | 259 | | 260 | The Critical Factorization Theorem says that this maximal local | 261 | period is the global period of the string. | 262 | | 263 | Crochemore and Perrin (1991) show that this cut can be computed | 264 | as the later of two cuts: one that gives a lexicographically | 265 | maximal right half, and one that gives the same with the | 266 | with respect to a reversed alphabet-ordering. | 267 | | 268 | This is what we want to happen: | 269 | >>> x = "GCAGAGAG" | 270 | >>> cut, period = factorize(x) | 271 | >>> x[:cut], (right := x[cut:]) | 272 | ('GC', 'AGAGAG') | 273 | >>> period # right half period | 274 | 2 | 275 | >>> right[period:] == right[:-period] | 276 | True | 277 | | 278 | This is how the local period lines up in the above example: | 279 | GC | AGAGAG | 280 | AGAGAGC = AGAGAGC | 281 | The length of this minimal repetition is 7, which is indeed the | 282 | period of the original string. */ | 283 | | 284 | Py_ssize_t cut1, period1, cut2, period2, cut, period; | 285 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); | 286 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); | 287 | | 288 | // Take the later cut. | 289 | if (cut1 > cut2) { Branch (289:9): [True: 82, False: 84]
| 290 | period = period1; | 291 | cut = cut1; | 292 | } | 293 | else { | 294 | period = period2; | 295 | cut = cut2; | 296 | } | 297 | | 298 | LOG("split: "); LOG_STRING(needle, cut); | 299 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); | 300 | LOG("\n"); | 301 | | 302 | *return_period = period; | 303 | return cut; | 304 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib__factorize unicodeobject.c:ucs2lib__factorize Line | Count | Source | 251 | { | 252 | /* Do a "critical factorization", making it so that: | 253 | >>> needle = (left := needle[:cut]) + (right := needle[cut:]) | 254 | where the "local period" of the cut is maximal. | 255 | | 256 | The local period of the cut is the minimal length of a string w | 257 | such that (left endswith w or w endswith left) | 258 | and (right startswith w or w startswith left). | 259 | | 260 | The Critical Factorization Theorem says that this maximal local | 261 | period is the global period of the string. | 262 | | 263 | Crochemore and Perrin (1991) show that this cut can be computed | 264 | as the later of two cuts: one that gives a lexicographically | 265 | maximal right half, and one that gives the same with the | 266 | with respect to a reversed alphabet-ordering. | 267 | | 268 | This is what we want to happen: | 269 | >>> x = "GCAGAGAG" | 270 | >>> cut, period = factorize(x) | 271 | >>> x[:cut], (right := x[cut:]) | 272 | ('GC', 'AGAGAG') | 273 | >>> period # right half period | 274 | 2 | 275 | >>> right[period:] == right[:-period] | 276 | True | 277 | | 278 | This is how the local period lines up in the above example: | 279 | GC | AGAGAG | 280 | AGAGAGC = AGAGAGC | 281 | The length of this minimal repetition is 7, which is indeed the | 282 | period of the original string. */ | 283 | | 284 | Py_ssize_t cut1, period1, cut2, period2, cut, period; | 285 | cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0); | 286 | cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1); | 287 | | 288 | // Take the later cut. | 289 | if (cut1 > cut2) { Branch (289:9): [True: 34, False: 15]
| 290 | period = period1; | 291 | cut = cut1; | 292 | } | 293 | else { | 294 | period = period2; | 295 | cut = cut2; | 296 | } | 297 | | 298 | LOG("split: "); LOG_STRING(needle, cut); | 299 | LOG(" + "); LOG_STRING(needle + cut, len_needle - cut); | 300 | LOG("\n"); | 301 | | 302 | *return_period = period; | 303 | return cut; | 304 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize |
305 | |
306 | |
307 | #define SHIFT_TYPE uint8_t |
308 | #define MAX_SHIFT UINT8_MAX |
309 | |
310 | #define TABLE_SIZE_BITS 6u |
311 | #define TABLE_SIZE (1U << TABLE_SIZE_BITS) |
312 | #define TABLE_MASK (TABLE_SIZE - 1U) |
313 | |
314 | typedef struct STRINGLIB(_pre) { |
315 | const STRINGLIB_CHAR *needle; |
316 | Py_ssize_t len_needle; |
317 | Py_ssize_t cut; |
318 | Py_ssize_t period; |
319 | Py_ssize_t gap; |
320 | int is_periodic; |
321 | SHIFT_TYPE table[TABLE_SIZE]; |
322 | } STRINGLIB(prework); |
323 | |
324 | |
325 | static void |
326 | STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, |
327 | STRINGLIB(prework) *p) |
328 | { |
329 | p->needle = needle; |
330 | p->len_needle = len_needle; |
331 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); |
332 | assert(p->period + p->cut <= len_needle); |
333 | p->is_periodic = (0 == memcmp(needle, |
334 | needle + p->period, |
335 | p->cut * STRINGLIB_SIZEOF_CHAR)); |
336 | if (p->is_periodic) { Branch (336:9): [True: 138, False: 309]
Branch (336:9): [True: 0, False: 0]
Branch (336:9): [True: 0, False: 0]
Branch (336:9): [True: 163, False: 3]
Branch (336:9): [True: 0, False: 0]
Branch (336:9): [True: 15, False: 34]
Branch (336:9): [True: 0, False: 0]
|
337 | assert(p->cut <= len_needle/2); |
338 | assert(p->cut < p->period); |
339 | p->gap = 0; // unused |
340 | } |
341 | else { |
342 | // A lower bound on the period |
343 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; |
344 | // The gap between the last character and the previous |
345 | // occurrence of an equivalent character (modulo TABLE_SIZE) |
346 | p->gap = len_needle; |
347 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; |
348 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--9.39k ) { Branch (348:45): [True: 9.62k, False: 13]
Branch (348:45): [True: 0, False: 0]
Branch (348:45): [True: 0, False: 0]
Branch (348:45): [True: 6, False: 0]
Branch (348:45): [True: 0, False: 0]
Branch (348:45): [True: 102, False: 0]
Branch (348:45): [True: 0, False: 0]
|
349 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; |
350 | if (x == last) { Branch (350:17): [True: 296, False: 9.32k]
Branch (350:17): [True: 0, False: 0]
Branch (350:17): [True: 0, False: 0]
Branch (350:17): [True: 3, False: 3]
Branch (350:17): [True: 0, False: 0]
Branch (350:17): [True: 34, False: 68]
Branch (350:17): [True: 0, False: 0]
|
351 | p->gap = len_needle - 1 - i; |
352 | break; |
353 | } |
354 | } |
355 | } |
356 | // Fill up a compressed Boyer-Moore "Bad Character" table |
357 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); |
358 | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++42.3k ) { Branch (358:28): [True: 28.6k, False: 447]
Branch (358:28): [True: 0, False: 0]
Branch (358:28): [True: 0, False: 0]
Branch (358:28): [True: 10.6k, False: 166]
Branch (358:28): [True: 0, False: 0]
Branch (358:28): [True: 3.13k, False: 49]
Branch (358:28): [True: 0, False: 0]
|
359 | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, |
360 | Py_ssize_t, SHIFT_TYPE); |
361 | } |
362 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++46.8k ) { Branch (362:55): [True: 24.3k, False: 447]
Branch (362:55): [True: 0, False: 0]
Branch (362:55): [True: 0, False: 0]
Branch (362:55): [True: 21.7k, False: 166]
Branch (362:55): [True: 0, False: 0]
Branch (362:55): [True: 875, False: 49]
Branch (362:55): [True: 0, False: 0]
|
363 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, |
364 | Py_ssize_t, SHIFT_TYPE); |
365 | p->table[needle[i] & TABLE_MASK] = shift; |
366 | } |
367 | } bytes_methods.c:stringlib__preprocess Line | Count | Source | 328 | { | 329 | p->needle = needle; | 330 | p->len_needle = len_needle; | 331 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); | 332 | assert(p->period + p->cut <= len_needle); | 333 | p->is_periodic = (0 == memcmp(needle, | 334 | needle + p->period, | 335 | p->cut * STRINGLIB_SIZEOF_CHAR)); | 336 | if (p->is_periodic) { Branch (336:9): [True: 138, False: 309]
| 337 | assert(p->cut <= len_needle/2); | 338 | assert(p->cut < p->period); | 339 | p->gap = 0; // unused | 340 | } | 341 | else { | 342 | // A lower bound on the period | 343 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; | 344 | // The gap between the last character and the previous | 345 | // occurrence of an equivalent character (modulo TABLE_SIZE) | 346 | p->gap = len_needle; | 347 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; | 348 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--9.32k ) { Branch (348:45): [True: 9.62k, False: 13]
| 349 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; | 350 | if (x == last) { Branch (350:17): [True: 296, False: 9.32k]
| 351 | p->gap = len_needle - 1 - i; | 352 | break; | 353 | } | 354 | } | 355 | } | 356 | // Fill up a compressed Boyer-Moore "Bad Character" table | 357 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); | 358 | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++28.6k ) { Branch (358:28): [True: 28.6k, False: 447]
| 359 | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, | 360 | Py_ssize_t, SHIFT_TYPE); | 361 | } | 362 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++24.3k ) { Branch (362:55): [True: 24.3k, False: 447]
| 363 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, | 364 | Py_ssize_t, SHIFT_TYPE); | 365 | p->table[needle[i] & TABLE_MASK] = shift; | 366 | } | 367 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess Unexecuted instantiation: bytesobject.c:stringlib__preprocess unicodeobject.c:asciilib__preprocess Line | Count | Source | 328 | { | 329 | p->needle = needle; | 330 | p->len_needle = len_needle; | 331 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); | 332 | assert(p->period + p->cut <= len_needle); | 333 | p->is_periodic = (0 == memcmp(needle, | 334 | needle + p->period, | 335 | p->cut * STRINGLIB_SIZEOF_CHAR)); | 336 | if (p->is_periodic) { Branch (336:9): [True: 163, False: 3]
| 337 | assert(p->cut <= len_needle/2); | 338 | assert(p->cut < p->period); | 339 | p->gap = 0; // unused | 340 | } | 341 | else { | 342 | // A lower bound on the period | 343 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; | 344 | // The gap between the last character and the previous | 345 | // occurrence of an equivalent character (modulo TABLE_SIZE) | 346 | p->gap = len_needle; | 347 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; | 348 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--3 ) { Branch (348:45): [True: 6, False: 0]
| 349 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; | 350 | if (x == last) { Branch (350:17): [True: 3, False: 3]
| 351 | p->gap = len_needle - 1 - i; | 352 | break; | 353 | } | 354 | } | 355 | } | 356 | // Fill up a compressed Boyer-Moore "Bad Character" table | 357 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); | 358 | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++10.6k ) { Branch (358:28): [True: 10.6k, False: 166]
| 359 | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, | 360 | Py_ssize_t, SHIFT_TYPE); | 361 | } | 362 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++21.7k ) { Branch (362:55): [True: 21.7k, False: 166]
| 363 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, | 364 | Py_ssize_t, SHIFT_TYPE); | 365 | p->table[needle[i] & TABLE_MASK] = shift; | 366 | } | 367 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib__preprocess unicodeobject.c:ucs2lib__preprocess Line | Count | Source | 328 | { | 329 | p->needle = needle; | 330 | p->len_needle = len_needle; | 331 | p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period)); | 332 | assert(p->period + p->cut <= len_needle); | 333 | p->is_periodic = (0 == memcmp(needle, | 334 | needle + p->period, | 335 | p->cut * STRINGLIB_SIZEOF_CHAR)); | 336 | if (p->is_periodic) { Branch (336:9): [True: 15, False: 34]
| 337 | assert(p->cut <= len_needle/2); | 338 | assert(p->cut < p->period); | 339 | p->gap = 0; // unused | 340 | } | 341 | else { | 342 | // A lower bound on the period | 343 | p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; | 344 | // The gap between the last character and the previous | 345 | // occurrence of an equivalent character (modulo TABLE_SIZE) | 346 | p->gap = len_needle; | 347 | STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; | 348 | for (Py_ssize_t i = len_needle - 2; i >= 0; i--68 ) { Branch (348:45): [True: 102, False: 0]
| 349 | STRINGLIB_CHAR x = needle[i] & TABLE_MASK; | 350 | if (x == last) { Branch (350:17): [True: 34, False: 68]
| 351 | p->gap = len_needle - 1 - i; | 352 | break; | 353 | } | 354 | } | 355 | } | 356 | // Fill up a compressed Boyer-Moore "Bad Character" table | 357 | Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT); | 358 | for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++3.13k ) { Branch (358:28): [True: 3.13k, False: 49]
| 359 | p->table[i] = Py_SAFE_DOWNCAST(not_found_shift, | 360 | Py_ssize_t, SHIFT_TYPE); | 361 | } | 362 | for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++875 ) { Branch (362:55): [True: 875, False: 49]
| 363 | SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i, | 364 | Py_ssize_t, SHIFT_TYPE); | 365 | p->table[needle[i] & TABLE_MASK] = shift; | 366 | } | 367 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess |
368 | |
369 | static Py_ssize_t |
370 | STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, |
371 | STRINGLIB(prework) *p) |
372 | { |
373 | // Crochemore and Perrin's (1991) Two-Way algorithm. |
374 | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 |
375 | const Py_ssize_t len_needle = p->len_needle; |
376 | const Py_ssize_t cut = p->cut; |
377 | Py_ssize_t period = p->period; |
378 | const STRINGLIB_CHAR *const needle = p->needle; |
379 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; |
380 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; |
381 | SHIFT_TYPE *table = p->table; |
382 | const STRINGLIB_CHAR *window; |
383 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); |
384 | |
385 | if (p->is_periodic) { Branch (385:9): [True: 138, False: 309]
Branch (385:9): [True: 0, False: 0]
Branch (385:9): [True: 0, False: 0]
Branch (385:9): [True: 163, False: 3]
Branch (385:9): [True: 0, False: 0]
Branch (385:9): [True: 30, False: 75]
Branch (385:9): [True: 0, False: 0]
|
386 | LOG("Needle is periodic.\n"); |
387 | Py_ssize_t memory = 0; |
388 | periodicwindowloop: |
389 | while (window_last < haystack_end) { Branch (389:16): [True: 2.83k, False: 0]
Branch (389:16): [True: 0, False: 0]
Branch (389:16): [True: 0, False: 0]
Branch (389:16): [True: 3.39k, False: 0]
Branch (389:16): [True: 0, False: 0]
Branch (389:16): [True: 1.73k, False: 0]
Branch (389:16): [True: 0, False: 0]
|
390 | assert(memory == 0); |
391 | for (;;) { |
392 | LOG_LINEUP(); |
393 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
394 | window_last += shift; |
395 | if (shift == 0) { Branch (395:21): [True: 2.83k, False: 6.62k]
Branch (395:21): [True: 0, False: 0]
Branch (395:21): [True: 0, False: 0]
Branch (395:21): [True: 3.39k, False: 8.07k]
Branch (395:21): [True: 0, False: 0]
Branch (395:21): [True: 1.73k, False: 14.4k]
Branch (395:21): [True: 0, False: 0]
|
396 | break; |
397 | } |
398 | if (window_last >= haystack_end) { Branch (398:21): [True: 0, False: 6.62k]
Branch (398:21): [True: 0, False: 0]
Branch (398:21): [True: 0, False: 0]
Branch (398:21): [True: 0, False: 8.07k]
Branch (398:21): [True: 0, False: 0]
Branch (398:21): [True: 1, False: 14.4k]
Branch (398:21): [True: 0, False: 0]
|
399 | return -1; |
400 | } |
401 | LOG("Horspool skip"); |
402 | } |
403 | no_shift: |
404 | window = window_last - len_needle + 1; |
405 | assert((window[len_needle - 1] & TABLE_MASK) == |
406 | (needle[len_needle - 1] & TABLE_MASK)); |
407 | Py_ssize_t i = Py_MAX(cut, memory); |
408 | for (; i < len_needle; i++41.2k ) { Branch (408:20): [True: 21.4k, False: 145]
Branch (408:20): [True: 0, False: 0]
Branch (408:20): [True: 0, False: 0]
Branch (408:20): [True: 25.1k, False: 176]
Branch (408:20): [True: 0, False: 0]
Branch (408:20): [True: 2.25k, False: 29]
Branch (408:20): [True: 0, False: 0]
|
409 | if (needle[i] != window[i]) { Branch (409:21): [True: 2.70k, False: 18.7k]
Branch (409:21): [True: 0, False: 0]
Branch (409:21): [True: 0, False: 0]
Branch (409:21): [True: 3.22k, False: 21.9k]
Branch (409:21): [True: 0, False: 0]
Branch (409:21): [True: 1.70k, False: 555]
Branch (409:21): [True: 0, False: 0]
|
410 | LOG("Right half does not match.\n"); |
411 | window_last += i - cut + 1; |
412 | memory = 0; |
413 | goto periodicwindowloop; |
414 | } |
415 | } |
416 | for (i = memory; 350 i < cut; i++2.95k ) { Branch (416:30): [True: 1.38k, False: 138]
Branch (416:30): [True: 0, False: 0]
Branch (416:30): [True: 0, False: 0]
Branch (416:30): [True: 1.47k, False: 163]
Branch (416:30): [True: 0, False: 0]
Branch (416:30): [True: 116, False: 29]
Branch (416:30): [True: 0, False: 0]
|
417 | if (needle[i] != window[i]) { Branch (417:21): [True: 7, False: 1.37k]
Branch (417:21): [True: 0, False: 0]
Branch (417:21): [True: 0, False: 0]
Branch (417:21): [True: 13, False: 1.46k]
Branch (417:21): [True: 0, False: 0]
Branch (417:21): [True: 0, False: 116]
Branch (417:21): [True: 0, False: 0]
|
418 | LOG("Left half does not match.\n"); |
419 | window_last += period; |
420 | memory = len_needle - period; |
421 | if (window_last >= haystack_end) { Branch (421:25): [True: 0, False: 7]
Branch (421:25): [True: 0, False: 0]
Branch (421:25): [True: 0, False: 0]
Branch (421:25): [True: 0, False: 13]
Branch (421:25): [True: 0, False: 0]
Branch (421:25): [True: 0, False: 0]
Branch (421:25): [True: 0, False: 0]
|
422 | return -1; |
423 | } |
424 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
425 | if (shift) { Branch (425:25): [True: 0, False: 7]
Branch (425:25): [True: 0, False: 0]
Branch (425:25): [True: 0, False: 0]
Branch (425:25): [True: 0, False: 13]
Branch (425:25): [True: 0, False: 0]
Branch (425:25): [True: 0, False: 0]
Branch (425:25): [True: 0, False: 0]
|
426 | // A mismatch has been identified to the right |
427 | // of where i will next start, so we can jump |
428 | // at least as far as if the mismatch occurred |
429 | // on the first comparison. |
430 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; |
431 | LOG("Skip with Memory.\n"); |
432 | memory = 0; |
433 | window_last += Py_MAX(shift, mem_jump); |
434 | goto periodicwindowloop; |
435 | } |
436 | goto no_shift; |
437 | } |
438 | } |
439 | LOG("Found a match!\n"); |
440 | return window - haystack; |
441 | } |
442 | } |
443 | else { |
444 | Py_ssize_t gap = p->gap; |
445 | period = Py_MAX(gap, period); |
446 | LOG("Needle is not periodic.\n"); |
447 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); |
448 | windowloop: |
449 | while (window_last < haystack_end) { Branch (449:16): [True: 34.6k, False: 0]
Branch (449:16): [True: 0, False: 0]
Branch (449:16): [True: 0, False: 0]
Branch (449:16): [True: 14, False: 0]
Branch (449:16): [True: 0, False: 0]
Branch (449:16): [True: 2.51k, False: 0]
Branch (449:16): [True: 0, False: 0]
|
450 | for (;;) { |
451 | LOG_LINEUP(); |
452 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; |
453 | window_last += shift; |
454 | if (shift == 0) { Branch (454:21): [True: 34.4k, False: 2.10M]
Branch (454:21): [True: 0, False: 0]
Branch (454:21): [True: 0, False: 0]
Branch (454:21): [True: 14, False: 770]
Branch (454:21): [True: 0, False: 0]
Branch (454:21): [True: 2.51k, False: 18.8k]
Branch (454:21): [True: 0, False: 0]
|
455 | break; |
456 | } |
457 | if (window_last >= haystack_end) { Branch (457:21): [True: 178, False: 2.10M]
Branch (457:21): [True: 0, False: 0]
Branch (457:21): [True: 0, False: 0]
Branch (457:21): [True: 0, False: 770]
Branch (457:21): [True: 0, False: 0]
Branch (457:21): [True: 2, False: 18.8k]
Branch (457:21): [True: 0, False: 0]
|
458 | return -1; |
459 | } |
460 | LOG("Horspool skip"); |
461 | } |
462 | window = window_last - len_needle + 1; |
463 | assert((window[len_needle - 1] & TABLE_MASK) == |
464 | (needle[len_needle - 1] & TABLE_MASK)); |
465 | for (Py_ssize_t i = cut; i < gap_jump_end; i++12.6k ) { Branch (465:38): [True: 43.6k, False: 2.03k]
Branch (465:38): [True: 0, False: 0]
Branch (465:38): [True: 0, False: 0]
Branch (465:38): [True: 17, False: 3]
Branch (465:38): [True: 0, False: 0]
Branch (465:38): [True: 3.50k, False: 431]
Branch (465:38): [True: 0, False: 0]
|
466 | if (needle[i] != window[i]) { Branch (466:21): [True: 32.4k, False: 11.2k]
Branch (466:21): [True: 0, False: 0]
Branch (466:21): [True: 0, False: 0]
Branch (466:21): [True: 11, False: 6]
Branch (466:21): [True: 0, False: 0]
Branch (466:21): [True: 2.08k, False: 1.42k]
Branch (466:21): [True: 0, False: 0]
|
467 | LOG("Early right half mismatch: jump by gap.\n"); |
468 | assert(gap >= i - cut + 1); |
469 | window_last += gap; |
470 | goto windowloop; |
471 | } |
472 | } |
473 | for (Py_ssize_t i = gap_jump_end; 2.46k i < len_needle; i++3.71k ) { Branch (473:47): [True: 2.90k, False: 188]
Branch (473:47): [True: 0, False: 0]
Branch (473:47): [True: 0, False: 0]
Branch (473:47): [True: 788, False: 3]
Branch (473:47): [True: 0, False: 0]
Branch (473:47): [True: 2.11k, False: 177]
Branch (473:47): [True: 0, False: 0]
|
474 | if (needle[i] != window[i]) { Branch (474:21): [True: 1.84k, False: 1.06k]
Branch (474:21): [True: 0, False: 0]
Branch (474:21): [True: 0, False: 0]
Branch (474:21): [True: 0, False: 788]
Branch (474:21): [True: 0, False: 0]
Branch (474:21): [True: 254, False: 1.86k]
Branch (474:21): [True: 0, False: 0]
|
475 | LOG("Late right half mismatch.\n"); |
476 | assert(i - cut + 1 > gap); |
477 | window_last += i - cut + 1; |
478 | goto windowloop; |
479 | } |
480 | } |
481 | for (Py_ssize_t i = 0; 368 i < cut; i++11.1k ) { Branch (481:36): [True: 8.22k, False: 131]
Branch (481:36): [True: 0, False: 0]
Branch (481:36): [True: 0, False: 0]
Branch (481:36): [True: 2.55k, False: 3]
Branch (481:36): [True: 0, False: 0]
Branch (481:36): [True: 478, False: 73]
Branch (481:36): [True: 0, False: 0]
|
482 | if (needle[i] != window[i]) { Branch (482:21): [True: 57, False: 8.17k]
Branch (482:21): [True: 0, False: 0]
Branch (482:21): [True: 0, False: 0]
Branch (482:21): [True: 0, False: 2.55k]
Branch (482:21): [True: 0, False: 0]
Branch (482:21): [True: 104, False: 374]
Branch (482:21): [True: 0, False: 0]
|
483 | LOG("Left half does not match.\n"); |
484 | window_last += period; |
485 | goto windowloop; |
486 | } |
487 | } |
488 | LOG("Found a match!\n"); |
489 | return window - haystack; |
490 | } |
491 | } |
492 | LOG("Not found. Returning -1.\n"); |
493 | return -1; |
494 | } bytes_methods.c:stringlib__two_way Line | Count | Source | 372 | { | 373 | // Crochemore and Perrin's (1991) Two-Way algorithm. | 374 | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 375 | const Py_ssize_t len_needle = p->len_needle; | 376 | const Py_ssize_t cut = p->cut; | 377 | Py_ssize_t period = p->period; | 378 | const STRINGLIB_CHAR *const needle = p->needle; | 379 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; | 380 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; | 381 | SHIFT_TYPE *table = p->table; | 382 | const STRINGLIB_CHAR *window; | 383 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); | 384 | | 385 | if (p->is_periodic) { Branch (385:9): [True: 138, False: 309]
| 386 | LOG("Needle is periodic.\n"); | 387 | Py_ssize_t memory = 0; | 388 | periodicwindowloop: | 389 | while (window_last < haystack_end) { Branch (389:16): [True: 2.83k, False: 0]
| 390 | assert(memory == 0); | 391 | for (;;) { | 392 | LOG_LINEUP(); | 393 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 394 | window_last += shift; | 395 | if (shift == 0) { Branch (395:21): [True: 2.83k, False: 6.62k]
| 396 | break; | 397 | } | 398 | if (window_last >= haystack_end) { Branch (398:21): [True: 0, False: 6.62k]
| 399 | return -1; | 400 | } | 401 | LOG("Horspool skip"); | 402 | } | 403 | no_shift: | 404 | window = window_last - len_needle + 1; | 405 | assert((window[len_needle - 1] & TABLE_MASK) == | 406 | (needle[len_needle - 1] & TABLE_MASK)); | 407 | Py_ssize_t i = Py_MAX(cut, memory); | 408 | for (; i < len_needle; i++18.7k ) { Branch (408:20): [True: 21.4k, False: 145]
| 409 | if (needle[i] != window[i]) { Branch (409:21): [True: 2.70k, False: 18.7k]
| 410 | LOG("Right half does not match.\n"); | 411 | window_last += i - cut + 1; | 412 | memory = 0; | 413 | goto periodicwindowloop; | 414 | } | 415 | } | 416 | for (i = memory; 145 i < cut; i++1.37k ) { Branch (416:30): [True: 1.38k, False: 138]
| 417 | if (needle[i] != window[i]) { Branch (417:21): [True: 7, False: 1.37k]
| 418 | LOG("Left half does not match.\n"); | 419 | window_last += period; | 420 | memory = len_needle - period; | 421 | if (window_last >= haystack_end) { Branch (421:25): [True: 0, False: 7]
| 422 | return -1; | 423 | } | 424 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 425 | if (shift) { Branch (425:25): [True: 0, False: 7]
| 426 | // A mismatch has been identified to the right | 427 | // of where i will next start, so we can jump | 428 | // at least as far as if the mismatch occurred | 429 | // on the first comparison. | 430 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; | 431 | LOG("Skip with Memory.\n"); | 432 | memory = 0; | 433 | window_last += Py_MAX(shift, mem_jump); | 434 | goto periodicwindowloop; | 435 | } | 436 | goto no_shift; | 437 | } | 438 | } | 439 | LOG("Found a match!\n"); | 440 | return window - haystack; | 441 | } | 442 | } | 443 | else { | 444 | Py_ssize_t gap = p->gap; | 445 | period = Py_MAX(gap, period); | 446 | LOG("Needle is not periodic.\n"); | 447 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); | 448 | windowloop: | 449 | while (window_last < haystack_end) { Branch (449:16): [True: 34.6k, False: 0]
| 450 | for (;;) { | 451 | LOG_LINEUP(); | 452 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 453 | window_last += shift; | 454 | if (shift == 0) { Branch (454:21): [True: 34.4k, False: 2.10M]
| 455 | break; | 456 | } | 457 | if (window_last >= haystack_end) { Branch (457:21): [True: 178, False: 2.10M]
| 458 | return -1; | 459 | } | 460 | LOG("Horspool skip"); | 461 | } | 462 | window = window_last - len_needle + 1; | 463 | assert((window[len_needle - 1] & TABLE_MASK) == | 464 | (needle[len_needle - 1] & TABLE_MASK)); | 465 | for (Py_ssize_t i = cut; i < gap_jump_end; i++11.2k ) { Branch (465:38): [True: 43.6k, False: 2.03k]
| 466 | if (needle[i] != window[i]) { Branch (466:21): [True: 32.4k, False: 11.2k]
| 467 | LOG("Early right half mismatch: jump by gap.\n"); | 468 | assert(gap >= i - cut + 1); | 469 | window_last += gap; | 470 | goto windowloop; | 471 | } | 472 | } | 473 | for (Py_ssize_t i = gap_jump_end; 2.03k i < len_needle; i++1.06k ) { Branch (473:47): [True: 2.90k, False: 188]
| 474 | if (needle[i] != window[i]) { Branch (474:21): [True: 1.84k, False: 1.06k]
| 475 | LOG("Late right half mismatch.\n"); | 476 | assert(i - cut + 1 > gap); | 477 | window_last += i - cut + 1; | 478 | goto windowloop; | 479 | } | 480 | } | 481 | for (Py_ssize_t i = 0; 188 i < cut; i++8.17k ) { Branch (481:36): [True: 8.22k, False: 131]
| 482 | if (needle[i] != window[i]) { Branch (482:21): [True: 57, False: 8.17k]
| 483 | LOG("Left half does not match.\n"); | 484 | window_last += period; | 485 | goto windowloop; | 486 | } | 487 | } | 488 | LOG("Found a match!\n"); | 489 | return window - haystack; | 490 | } | 491 | } | 492 | LOG("Not found. Returning -1.\n"); | 493 | return -1; | 494 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way Unexecuted instantiation: bytesobject.c:stringlib__two_way unicodeobject.c:asciilib__two_way Line | Count | Source | 372 | { | 373 | // Crochemore and Perrin's (1991) Two-Way algorithm. | 374 | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 375 | const Py_ssize_t len_needle = p->len_needle; | 376 | const Py_ssize_t cut = p->cut; | 377 | Py_ssize_t period = p->period; | 378 | const STRINGLIB_CHAR *const needle = p->needle; | 379 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; | 380 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; | 381 | SHIFT_TYPE *table = p->table; | 382 | const STRINGLIB_CHAR *window; | 383 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); | 384 | | 385 | if (p->is_periodic) { Branch (385:9): [True: 163, False: 3]
| 386 | LOG("Needle is periodic.\n"); | 387 | Py_ssize_t memory = 0; | 388 | periodicwindowloop: | 389 | while (window_last < haystack_end) { Branch (389:16): [True: 3.39k, False: 0]
| 390 | assert(memory == 0); | 391 | for (;;) { | 392 | LOG_LINEUP(); | 393 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 394 | window_last += shift; | 395 | if (shift == 0) { Branch (395:21): [True: 3.39k, False: 8.07k]
| 396 | break; | 397 | } | 398 | if (window_last >= haystack_end) { Branch (398:21): [True: 0, False: 8.07k]
| 399 | return -1; | 400 | } | 401 | LOG("Horspool skip"); | 402 | } | 403 | no_shift: | 404 | window = window_last - len_needle + 1; | 405 | assert((window[len_needle - 1] & TABLE_MASK) == | 406 | (needle[len_needle - 1] & TABLE_MASK)); | 407 | Py_ssize_t i = Py_MAX(cut, memory); | 408 | for (; i < len_needle; i++21.9k ) { Branch (408:20): [True: 25.1k, False: 176]
| 409 | if (needle[i] != window[i]) { Branch (409:21): [True: 3.22k, False: 21.9k]
| 410 | LOG("Right half does not match.\n"); | 411 | window_last += i - cut + 1; | 412 | memory = 0; | 413 | goto periodicwindowloop; | 414 | } | 415 | } | 416 | for (i = memory; 176 i < cut; i++1.46k ) { Branch (416:30): [True: 1.47k, False: 163]
| 417 | if (needle[i] != window[i]) { Branch (417:21): [True: 13, False: 1.46k]
| 418 | LOG("Left half does not match.\n"); | 419 | window_last += period; | 420 | memory = len_needle - period; | 421 | if (window_last >= haystack_end) { Branch (421:25): [True: 0, False: 13]
| 422 | return -1; | 423 | } | 424 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 425 | if (shift) { Branch (425:25): [True: 0, False: 13]
| 426 | // A mismatch has been identified to the right | 427 | // of where i will next start, so we can jump | 428 | // at least as far as if the mismatch occurred | 429 | // on the first comparison. | 430 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; | 431 | LOG("Skip with Memory.\n"); | 432 | memory = 0; | 433 | window_last += Py_MAX(shift, mem_jump); | 434 | goto periodicwindowloop; | 435 | } | 436 | goto no_shift; | 437 | } | 438 | } | 439 | LOG("Found a match!\n"); | 440 | return window - haystack; | 441 | } | 442 | } | 443 | else { | 444 | Py_ssize_t gap = p->gap; | 445 | period = Py_MAX(gap, period); | 446 | LOG("Needle is not periodic.\n"); | 447 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); | 448 | windowloop: | 449 | while (window_last < haystack_end) { Branch (449:16): [True: 14, False: 0]
| 450 | for (;;) { | 451 | LOG_LINEUP(); | 452 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 453 | window_last += shift; | 454 | if (shift == 0) { Branch (454:21): [True: 14, False: 770]
| 455 | break; | 456 | } | 457 | if (window_last >= haystack_end) { Branch (457:21): [True: 0, False: 770]
| 458 | return -1; | 459 | } | 460 | LOG("Horspool skip"); | 461 | } | 462 | window = window_last - len_needle + 1; | 463 | assert((window[len_needle - 1] & TABLE_MASK) == | 464 | (needle[len_needle - 1] & TABLE_MASK)); | 465 | for (Py_ssize_t i = cut; i < gap_jump_end; i++6 ) { Branch (465:38): [True: 17, False: 3]
| 466 | if (needle[i] != window[i]) { Branch (466:21): [True: 11, False: 6]
| 467 | LOG("Early right half mismatch: jump by gap.\n"); | 468 | assert(gap >= i - cut + 1); | 469 | window_last += gap; | 470 | goto windowloop; | 471 | } | 472 | } | 473 | for (Py_ssize_t i = gap_jump_end; 3 i < len_needle; i++788 ) { Branch (473:47): [True: 788, False: 3]
| 474 | if (needle[i] != window[i]) { Branch (474:21): [True: 0, False: 788]
| 475 | LOG("Late right half mismatch.\n"); | 476 | assert(i - cut + 1 > gap); | 477 | window_last += i - cut + 1; | 478 | goto windowloop; | 479 | } | 480 | } | 481 | for (Py_ssize_t i = 0; 3 i < cut; i++2.55k ) { Branch (481:36): [True: 2.55k, False: 3]
| 482 | if (needle[i] != window[i]) { Branch (482:21): [True: 0, False: 2.55k]
| 483 | LOG("Left half does not match.\n"); | 484 | window_last += period; | 485 | goto windowloop; | 486 | } | 487 | } | 488 | LOG("Found a match!\n"); | 489 | return window - haystack; | 490 | } | 491 | } | 492 | LOG("Not found. Returning -1.\n"); | 493 | return -1; | 494 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way unicodeobject.c:ucs2lib__two_way Line | Count | Source | 372 | { | 373 | // Crochemore and Perrin's (1991) Two-Way algorithm. | 374 | // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 375 | const Py_ssize_t len_needle = p->len_needle; | 376 | const Py_ssize_t cut = p->cut; | 377 | Py_ssize_t period = p->period; | 378 | const STRINGLIB_CHAR *const needle = p->needle; | 379 | const STRINGLIB_CHAR *window_last = haystack + len_needle - 1; | 380 | const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack; | 381 | SHIFT_TYPE *table = p->table; | 382 | const STRINGLIB_CHAR *window; | 383 | LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); | 384 | | 385 | if (p->is_periodic) { Branch (385:9): [True: 30, False: 75]
| 386 | LOG("Needle is periodic.\n"); | 387 | Py_ssize_t memory = 0; | 388 | periodicwindowloop: | 389 | while (window_last < haystack_end) { Branch (389:16): [True: 1.73k, False: 0]
| 390 | assert(memory == 0); | 391 | for (;;) { | 392 | LOG_LINEUP(); | 393 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 394 | window_last += shift; | 395 | if (shift == 0) { Branch (395:21): [True: 1.73k, False: 14.4k]
| 396 | break; | 397 | } | 398 | if (window_last >= haystack_end) { Branch (398:21): [True: 1, False: 14.4k]
| 399 | return -1; | 400 | } | 401 | LOG("Horspool skip"); | 402 | } | 403 | no_shift: | 404 | window = window_last - len_needle + 1; | 405 | assert((window[len_needle - 1] & TABLE_MASK) == | 406 | (needle[len_needle - 1] & TABLE_MASK)); | 407 | Py_ssize_t i = Py_MAX(cut, memory); | 408 | for (; i < len_needle; i++555 ) { Branch (408:20): [True: 2.25k, False: 29]
| 409 | if (needle[i] != window[i]) { Branch (409:21): [True: 1.70k, False: 555]
| 410 | LOG("Right half does not match.\n"); | 411 | window_last += i - cut + 1; | 412 | memory = 0; | 413 | goto periodicwindowloop; | 414 | } | 415 | } | 416 | for (i = memory; 29 i < cut; i++116 ) { Branch (416:30): [True: 116, False: 29]
| 417 | if (needle[i] != window[i]) { Branch (417:21): [True: 0, False: 116]
| 418 | LOG("Left half does not match.\n"); | 419 | window_last += period; | 420 | memory = len_needle - period; | 421 | if (window_last >= haystack_end) { Branch (421:25): [True: 0, False: 0]
| 422 | return -1; | 423 | } | 424 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 425 | if (shift) { Branch (425:25): [True: 0, False: 0]
| 426 | // A mismatch has been identified to the right | 427 | // of where i will next start, so we can jump | 428 | // at least as far as if the mismatch occurred | 429 | // on the first comparison. | 430 | Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1; | 431 | LOG("Skip with Memory.\n"); | 432 | memory = 0; | 433 | window_last += Py_MAX(shift, mem_jump); | 434 | goto periodicwindowloop; | 435 | } | 436 | goto no_shift; | 437 | } | 438 | } | 439 | LOG("Found a match!\n"); | 440 | return window - haystack; | 441 | } | 442 | } | 443 | else { | 444 | Py_ssize_t gap = p->gap; | 445 | period = Py_MAX(gap, period); | 446 | LOG("Needle is not periodic.\n"); | 447 | Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); | 448 | windowloop: | 449 | while (window_last < haystack_end) { Branch (449:16): [True: 2.51k, False: 0]
| 450 | for (;;) { | 451 | LOG_LINEUP(); | 452 | Py_ssize_t shift = table[(*window_last) & TABLE_MASK]; | 453 | window_last += shift; | 454 | if (shift == 0) { Branch (454:21): [True: 2.51k, False: 18.8k]
| 455 | break; | 456 | } | 457 | if (window_last >= haystack_end) { Branch (457:21): [True: 2, False: 18.8k]
| 458 | return -1; | 459 | } | 460 | LOG("Horspool skip"); | 461 | } | 462 | window = window_last - len_needle + 1; | 463 | assert((window[len_needle - 1] & TABLE_MASK) == | 464 | (needle[len_needle - 1] & TABLE_MASK)); | 465 | for (Py_ssize_t i = cut; i < gap_jump_end; i++1.42k ) { Branch (465:38): [True: 3.50k, False: 431]
| 466 | if (needle[i] != window[i]) { Branch (466:21): [True: 2.08k, False: 1.42k]
| 467 | LOG("Early right half mismatch: jump by gap.\n"); | 468 | assert(gap >= i - cut + 1); | 469 | window_last += gap; | 470 | goto windowloop; | 471 | } | 472 | } | 473 | for (Py_ssize_t i = gap_jump_end; 431 i < len_needle; i++1.86k ) { Branch (473:47): [True: 2.11k, False: 177]
| 474 | if (needle[i] != window[i]) { Branch (474:21): [True: 254, False: 1.86k]
| 475 | LOG("Late right half mismatch.\n"); | 476 | assert(i - cut + 1 > gap); | 477 | window_last += i - cut + 1; | 478 | goto windowloop; | 479 | } | 480 | } | 481 | for (Py_ssize_t i = 0; 177 i < cut; i++374 ) { Branch (481:36): [True: 478, False: 73]
| 482 | if (needle[i] != window[i]) { Branch (482:21): [True: 104, False: 374]
| 483 | LOG("Left half does not match.\n"); | 484 | window_last += period; | 485 | goto windowloop; | 486 | } | 487 | } | 488 | LOG("Found a match!\n"); | 489 | return window - haystack; | 490 | } | 491 | } | 492 | LOG("Not found. Returning -1.\n"); | 493 | return -1; | 494 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way |
495 | |
496 | |
497 | static Py_ssize_t |
498 | STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack, |
499 | Py_ssize_t len_haystack, |
500 | const STRINGLIB_CHAR *needle, |
501 | Py_ssize_t len_needle) |
502 | { |
503 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); |
504 | STRINGLIB(prework) p; |
505 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
506 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); |
507 | } bytes_methods.c:stringlib__two_way_find Line | Count | Source | 502 | { | 503 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); | 504 | STRINGLIB(prework) p; | 505 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 506 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); | 507 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find Unexecuted instantiation: bytesobject.c:stringlib__two_way_find unicodeobject.c:asciilib__two_way_find Line | Count | Source | 502 | { | 503 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); | 504 | STRINGLIB(prework) p; | 505 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 506 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); | 507 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_find unicodeobject.c:ucs2lib__two_way_find Line | Count | Source | 502 | { | 503 | LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack); | 504 | STRINGLIB(prework) p; | 505 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 506 | return STRINGLIB(_two_way)(haystack, len_haystack, &p); | 507 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find |
508 | |
509 | |
510 | static Py_ssize_t |
511 | STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack, |
512 | Py_ssize_t len_haystack, |
513 | const STRINGLIB_CHAR *needle, |
514 | Py_ssize_t len_needle, |
515 | Py_ssize_t maxcount) |
516 | { |
517 | LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack); |
518 | STRINGLIB(prework) p; |
519 | STRINGLIB(_preprocess)(needle, len_needle, &p); |
520 | Py_ssize_t index = 0, count = 0; |
521 | while (1) { Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
Branch (521:12): [Folded - Ignored]
|
522 | Py_ssize_t result; |
523 | result = STRINGLIB(_two_way)(haystack + index, |
524 | len_haystack - index, &p); |
525 | if (result == -1) { Branch (525:13): [True: 0, False: 0]
Branch (525:13): [True: 0, False: 0]
Branch (525:13): [True: 0, False: 0]
Branch (525:13): [True: 0, False: 0]
Branch (525:13): [True: 0, False: 0]
Branch (525:13): [True: 3, False: 56]
Branch (525:13): [True: 0, False: 0]
|
526 | return count; |
527 | } |
528 | count++; |
529 | if (count == maxcount) { Branch (529:13): [True: 0, False: 0]
Branch (529:13): [True: 0, False: 0]
Branch (529:13): [True: 0, False: 0]
Branch (529:13): [True: 0, False: 0]
Branch (529:13): [True: 0, False: 0]
Branch (529:13): [True: 0, False: 56]
Branch (529:13): [True: 0, False: 0]
|
530 | return maxcount; |
531 | } |
532 | index += result + len_needle; |
533 | } |
534 | return count; |
535 | } Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count Unexecuted instantiation: bytesobject.c:stringlib__two_way_count Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count unicodeobject.c:ucs2lib__two_way_count Line | Count | Source | 516 | { | 517 | LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack); | 518 | STRINGLIB(prework) p; | 519 | STRINGLIB(_preprocess)(needle, len_needle, &p); | 520 | Py_ssize_t index = 0, count = 0; | 521 | while (1) { Branch (521:12): [Folded - Ignored]
| 522 | Py_ssize_t result; | 523 | result = STRINGLIB(_two_way)(haystack + index, | 524 | len_haystack - index, &p); | 525 | if (result == -1) { Branch (525:13): [True: 3, False: 56]
| 526 | return count; | 527 | } | 528 | count++; | 529 | if (count == maxcount) { Branch (529:13): [True: 0, False: 56]
| 530 | return maxcount; | 531 | } | 532 | index += result + len_needle; | 533 | } | 534 | return count; | 535 | } |
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count |
536 | |
537 | #undef SHIFT_TYPE |
538 | #undef NOT_FOUND |
539 | #undef SHIFT_OVERFLOW |
540 | #undef TABLE_SIZE_BITS |
541 | #undef TABLE_SIZE |
542 | #undef TABLE_MASK |
543 | |
544 | #undef LOG |
545 | #undef LOG_STRING |
546 | #undef LOG_LINEUP |
547 | |
548 | static inline Py_ssize_t |
549 | STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
550 | const STRINGLIB_CHAR* p, Py_ssize_t m, |
551 | Py_ssize_t maxcount, int mode) |
552 | { |
553 | const Py_ssize_t w = n - m; |
554 | Py_ssize_t mlast = m - 1, count = 0; |
555 | Py_ssize_t gap = mlast; |
556 | const STRINGLIB_CHAR last = p[mlast]; |
557 | const STRINGLIB_CHAR *const ss = &s[mlast]; |
558 | |
559 | unsigned long mask = 0; |
560 | for (Py_ssize_t i = 0; i < mlast; i++10.8M ) { Branch (560:28): [True: 2.58M, False: 685k]
Branch (560:28): [True: 214k, False: 46.5k]
Branch (560:28): [True: 219k, False: 48.6k]
Branch (560:28): [True: 3.01M, False: 1.53M]
Branch (560:28): [True: 4.75M, False: 1.50M]
Branch (560:28): [True: 3.12k, False: 2.59k]
Branch (560:28): [True: 182, False: 100]
|
561 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
562 | if (p[i] == last) { Branch (562:13): [True: 921k, False: 1.66M]
Branch (562:13): [True: 107k, False: 107k]
Branch (562:13): [True: 107k, False: 112k]
Branch (562:13): [True: 938k, False: 2.07M]
Branch (562:13): [True: 665k, False: 4.09M]
Branch (562:13): [True: 145, False: 2.97k]
Branch (562:13): [True: 103, False: 79]
|
563 | gap = mlast - i - 1; |
564 | } |
565 | } |
566 | STRINGLIB_BLOOM_ADD(mask, last); |
567 | |
568 | for (Py_ssize_t i = 0; i <= w; i++35.3M ) { Branch (568:28): [True: 18.4M, False: 663k]
Branch (568:28): [True: 74.3k, False: 42.7k]
Branch (568:28): [True: 183k, False: 44.0k]
Branch (568:28): [True: 14.2M, False: 1.03M]
Branch (568:28): [True: 3.02M, False: 1.45M]
Branch (568:28): [True: 13.1k, False: 2.50k]
Branch (568:28): [True: 652, False: 61]
|
569 | if (ss[i] == last) { Branch (569:13): [True: 563k, False: 17.8M]
Branch (569:13): [True: 39.5k, False: 34.8k]
Branch (569:13): [True: 47.2k, False: 136k]
Branch (569:13): [True: 1.33M, False: 12.8M]
Branch (569:13): [True: 376k, False: 2.64M]
Branch (569:13): [True: 570, False: 12.5k]
Branch (569:13): [True: 76, False: 576]
|
570 | /* candidate match */ |
571 | Py_ssize_t j; |
572 | for (j = 0; j < mlast; j++2.00M ) { Branch (572:25): [True: 863k, False: 29.1k]
Branch (572:25): [True: 74.4k, False: 7.65k]
Branch (572:25): [True: 83.0k, False: 8.67k]
Branch (572:25): [True: 2.07M, False: 518k]
Branch (572:25): [True: 633k, False: 73.6k]
Branch (572:25): [True: 894, False: 139]
Branch (572:25): [True: 77, False: 74]
|
573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 534k, False: 329k]
Branch (573:21): [True: 31.8k, False: 42.5k]
Branch (573:21): [True: 38.5k, False: 44.4k]
Branch (573:21): [True: 818k, False: 1.25M]
Branch (573:21): [True: 303k, False: 330k]
Branch (573:21): [True: 431, False: 463]
Branch (573:21): [True: 2, False: 75]
|
574 | break; |
575 | } |
576 | } |
577 | if (j == mlast) { Branch (577:17): [True: 29.1k, False: 534k]
Branch (577:17): [True: 7.65k, False: 31.8k]
Branch (577:17): [True: 8.67k, False: 38.5k]
Branch (577:17): [True: 518k, False: 818k]
Branch (577:17): [True: 73.6k, False: 303k]
Branch (577:17): [True: 139, False: 431]
Branch (577:17): [True: 74, False: 2]
|
578 | /* got a match! */ |
579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 21.6k, False: 7.44k]
Branch (579:21): [True: 3.88k, False: 3.77k]
Branch (579:21): [True: 4.62k, False: 4.04k]
Branch (579:21): [True: 499k, False: 19.0k]
Branch (579:21): [True: 53.6k, False: 19.9k]
Branch (579:21): [True: 87, False: 52]
Branch (579:21): [True: 39, False: 35]
|
580 | return i; |
581 | } |
582 | count++; |
583 | if (count == maxcount) { Branch (583:21): [True: 0, False: 7.44k]
Branch (583:21): [True: 5, False: 3.77k]
Branch (583:21): [True: 5, False: 4.04k]
Branch (583:21): [True: 14, False: 19.0k]
Branch (583:21): [True: 0, False: 19.9k]
Branch (583:21): [True: 0, False: 52]
Branch (583:21): [True: 0, False: 35]
|
584 | return maxcount; |
585 | } |
586 | i = i + mlast; |
587 | continue; |
588 | } |
589 | /* miss: check if next character is part of pattern */ |
590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 420k, False: 113k]
Branch (590:17): [True: 17.7k, False: 14.0k]
Branch (590:17): [True: 23.1k, False: 15.4k]
Branch (590:17): [True: 423k, False: 395k]
Branch (590:17): [True: 227k, False: 75.6k]
Branch (590:17): [True: 371, False: 60]
Branch (590:17): [True: 0, False: 2]
|
591 | i = i + m; |
592 | } |
593 | else { |
594 | i = i + gap; |
595 | } |
596 | } |
597 | else { |
598 | /* skip: check if next character is part of pattern */ |
599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 16.5M, False: 1.29M]
Branch (599:17): [True: 18.3k, False: 16.5k]
Branch (599:17): [True: 113k, False: 23.4k]
Branch (599:17): [True: 11.7M, False: 1.07M]
Branch (599:17): [True: 2.26M, False: 379k]
Branch (599:17): [True: 12.0k, False: 565]
Branch (599:17): [True: 546, False: 30]
|
600 | i = i + m; |
601 | } |
602 | } |
603 | } |
604 | return mode == FAST_COUNT ? count654k : -12.58M ; Branch (604:12): [True: 85.3k, False: 578k]
Branch (604:12): [True: 42.7k, False: 10]
Branch (604:12): [True: 43.8k, False: 178]
Branch (604:12): [True: 379k, False: 655k]
Branch (604:12): [True: 101k, False: 1.35M]
Branch (604:12): [True: 2.45k, False: 47]
Branch (604:12): [True: 35, False: 26]
|
605 | } bytes_methods.c:stringlib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++2.58M ) { Branch (560:28): [True: 2.58M, False: 685k]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 921k, False: 1.66M]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++18.4M ) { Branch (568:28): [True: 18.4M, False: 663k]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 563k, False: 17.8M]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++329k ) { Branch (572:25): [True: 863k, False: 29.1k]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 534k, False: 329k]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 29.1k, False: 534k]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 21.6k, False: 7.44k]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 0, False: 7.44k]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 420k, False: 113k]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 16.5M, False: 1.29M]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count85.3k : -1578k ; Branch (604:12): [True: 85.3k, False: 578k]
| 605 | } |
bytearrayobject.c:stringlib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++214k ) { Branch (560:28): [True: 214k, False: 46.5k]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 107k, False: 107k]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++70.4k ) { Branch (568:28): [True: 74.3k, False: 42.7k]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 39.5k, False: 34.8k]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++42.5k ) { Branch (572:25): [True: 74.4k, False: 7.65k]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 31.8k, False: 42.5k]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 7.65k, False: 31.8k]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 3.88k, False: 3.77k]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 5, False: 3.77k]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 17.7k, False: 14.0k]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 18.3k, False: 16.5k]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count42.7k : -110 ; Branch (604:12): [True: 42.7k, False: 10]
| 605 | } |
bytesobject.c:stringlib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++219k ) { Branch (560:28): [True: 219k, False: 48.6k]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 107k, False: 112k]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++179k ) { Branch (568:28): [True: 183k, False: 44.0k]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 47.2k, False: 136k]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++44.4k ) { Branch (572:25): [True: 83.0k, False: 8.67k]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 38.5k, False: 44.4k]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 8.67k, False: 38.5k]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 4.62k, False: 4.04k]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 5, False: 4.04k]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 23.1k, False: 15.4k]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 113k, False: 23.4k]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count43.8k : -1178 ; Branch (604:12): [True: 43.8k, False: 178]
| 605 | } |
unicodeobject.c:asciilib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++3.01M ) { Branch (560:28): [True: 3.01M, False: 1.53M]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 938k, False: 2.07M]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++13.7M ) { Branch (568:28): [True: 14.2M, False: 1.03M]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 1.33M, False: 12.8M]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++1.25M ) { Branch (572:25): [True: 2.07M, False: 518k]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 818k, False: 1.25M]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 518k, False: 818k]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 499k, False: 19.0k]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 14, False: 19.0k]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 423k, False: 395k]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 11.7M, False: 1.07M]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count379k : -1655k ; Branch (604:12): [True: 379k, False: 655k]
| 605 | } |
unicodeobject.c:ucs1lib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++4.75M ) { Branch (560:28): [True: 4.75M, False: 1.50M]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 665k, False: 4.09M]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++2.96M ) { Branch (568:28): [True: 3.02M, False: 1.45M]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 376k, False: 2.64M]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++330k ) { Branch (572:25): [True: 633k, False: 73.6k]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 303k, False: 330k]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 73.6k, False: 303k]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 53.6k, False: 19.9k]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 0, False: 19.9k]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 227k, False: 75.6k]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 2.26M, False: 379k]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count101k : -11.35M ; Branch (604:12): [True: 101k, False: 1.35M]
| 605 | } |
unicodeobject.c:ucs2lib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++3.12k ) { Branch (560:28): [True: 3.12k, False: 2.59k]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 145, False: 2.97k]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++13.0k ) { Branch (568:28): [True: 13.1k, False: 2.50k]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 570, False: 12.5k]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++463 ) { Branch (572:25): [True: 894, False: 139]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 431, False: 463]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 139, False: 431]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 87, False: 52]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 0, False: 52]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 371, False: 60]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 12.0k, False: 565]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count2.45k : -147 ; Branch (604:12): [True: 2.45k, False: 47]
| 605 | } |
unicodeobject.c:ucs4lib_default_find Line | Count | Source | 552 | { | 553 | const Py_ssize_t w = n - m; | 554 | Py_ssize_t mlast = m - 1, count = 0; | 555 | Py_ssize_t gap = mlast; | 556 | const STRINGLIB_CHAR last = p[mlast]; | 557 | const STRINGLIB_CHAR *const ss = &s[mlast]; | 558 | | 559 | unsigned long mask = 0; | 560 | for (Py_ssize_t i = 0; i < mlast; i++182 ) { Branch (560:28): [True: 182, False: 100]
| 561 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 562 | if (p[i] == last) { Branch (562:13): [True: 103, False: 79]
| 563 | gap = mlast - i - 1; | 564 | } | 565 | } | 566 | STRINGLIB_BLOOM_ADD(mask, last); | 567 | | 568 | for (Py_ssize_t i = 0; i <= w; i++613 ) { Branch (568:28): [True: 652, False: 61]
| 569 | if (ss[i] == last) { Branch (569:13): [True: 76, False: 576]
| 570 | /* candidate match */ | 571 | Py_ssize_t j; | 572 | for (j = 0; j < mlast; j++75 ) { Branch (572:25): [True: 77, False: 74]
| 573 | if (s[i+j] != p[j]) { Branch (573:21): [True: 2, False: 75]
| 574 | break; | 575 | } | 576 | } | 577 | if (j == mlast) { Branch (577:17): [True: 74, False: 2]
| 578 | /* got a match! */ | 579 | if (mode != FAST_COUNT) { Branch (579:21): [True: 39, False: 35]
| 580 | return i; | 581 | } | 582 | count++; | 583 | if (count == maxcount) { Branch (583:21): [True: 0, False: 35]
| 584 | return maxcount; | 585 | } | 586 | i = i + mlast; | 587 | continue; | 588 | } | 589 | /* miss: check if next character is part of pattern */ | 590 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (590:17): [True: 0, False: 2]
| 591 | i = i + m; | 592 | } | 593 | else { | 594 | i = i + gap; | 595 | } | 596 | } | 597 | else { | 598 | /* skip: check if next character is part of pattern */ | 599 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (599:17): [True: 546, False: 30]
| 600 | i = i + m; | 601 | } | 602 | } | 603 | } | 604 | return mode == FAST_COUNT ? count35 : -126 ; Branch (604:12): [True: 35, False: 26]
| 605 | } |
|
606 | |
607 | |
608 | static Py_ssize_t |
609 | STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
610 | const STRINGLIB_CHAR* p, Py_ssize_t m, |
611 | Py_ssize_t maxcount, int mode) |
612 | { |
613 | const Py_ssize_t w = n - m; |
614 | Py_ssize_t mlast = m - 1, count = 0; |
615 | Py_ssize_t gap = mlast; |
616 | Py_ssize_t hits = 0, res; |
617 | const STRINGLIB_CHAR last = p[mlast]; |
618 | const STRINGLIB_CHAR *const ss = &s[mlast]; |
619 |
|
620 | unsigned long mask = 0; |
621 | for (Py_ssize_t i = 0; i < mlast; i++) { Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
Branch (621:28): [True: 0, False: 0]
|
622 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
623 | if (p[i] == last) { Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
Branch (623:13): [True: 0, False: 0]
|
624 | gap = mlast - i - 1; |
625 | } |
626 | } |
627 | STRINGLIB_BLOOM_ADD(mask, last); |
628 |
|
629 | for (Py_ssize_t i = 0; i <= w; i++) { Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
Branch (629:28): [True: 0, False: 0]
|
630 | if (ss[i] == last) { Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
Branch (630:13): [True: 0, False: 0]
|
631 | /* candidate match */ |
632 | Py_ssize_t j; |
633 | for (j = 0; j < mlast; j++) { Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
Branch (633:25): [True: 0, False: 0]
|
634 | if (s[i+j] != p[j]) { Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
Branch (634:21): [True: 0, False: 0]
|
635 | break; |
636 | } |
637 | } |
638 | if (j == mlast) { Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
Branch (638:17): [True: 0, False: 0]
|
639 | /* got a match! */ |
640 | if (mode != FAST_COUNT) { Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
Branch (640:21): [True: 0, False: 0]
|
641 | return i; |
642 | } |
643 | count++; |
644 | if (count == maxcount) { Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
Branch (644:21): [True: 0, False: 0]
|
645 | return maxcount; |
646 | } |
647 | i = i + mlast; |
648 | continue; |
649 | } |
650 | hits += j + 1; |
651 | if (hits > m / 4 && w - i > 2000) { Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
Branch (651:17): [True: 0, False: 0]
Branch (651:33): [True: 0, False: 0]
|
652 | if (mode == FAST_SEARCH) { Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
Branch (652:21): [True: 0, False: 0]
|
653 | res = STRINGLIB(_two_way_find)(s + i, n - i, p, m); |
654 | return res == -1 ? -1 : res + i; Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
Branch (654:28): [True: 0, False: 0]
|
655 | } |
656 | else { |
657 | res = STRINGLIB(_two_way_count)(s + i, n - i, p, m, |
658 | maxcount - count); |
659 | return res + count; |
660 | } |
661 | } |
662 | /* miss: check if next character is part of pattern */ |
663 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
Branch (663:17): [True: 0, False: 0]
|
664 | i = i + m; |
665 | } |
666 | else { |
667 | i = i + gap; |
668 | } |
669 | } |
670 | else { |
671 | /* skip: check if next character is part of pattern */ |
672 | if (!STRINGLIB_BLOOM(mask, ss[i+1])) { Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
Branch (672:17): [True: 0, False: 0]
|
673 | i = i + m; |
674 | } |
675 | } |
676 | } |
677 | return mode == FAST_COUNT ? count : -1; Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
Branch (677:12): [True: 0, False: 0]
|
678 | } Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find |
679 | |
680 | |
681 | static Py_ssize_t |
682 | STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n, |
683 | const STRINGLIB_CHAR* p, Py_ssize_t m, |
684 | Py_ssize_t maxcount, int mode) |
685 | { |
686 | /* create compressed boyer-moore delta 1 table */ |
687 | unsigned long mask = 0; |
688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; |
689 | |
690 | /* process pattern[0] outside the loop */ |
691 | STRINGLIB_BLOOM_ADD(mask, p[0]); |
692 | /* process pattern[:0:-1] */ |
693 | for (i = mlast; i > 0; i--1.35M ) { Branch (693:21): [True: 674k, False: 196k]
Branch (693:21): [True: 221, False: 88]
Branch (693:21): [True: 236, False: 95]
Branch (693:21): [True: 675k, False: 196k]
Branch (693:21): [True: 0, False: 0]
Branch (693:21): [True: 15, False: 15]
Branch (693:21): [True: 25, False: 25]
|
694 | STRINGLIB_BLOOM_ADD(mask, p[i]); |
695 | if (p[i] == p[0]) { Branch (695:13): [True: 224k, False: 450k]
Branch (695:13): [True: 37, False: 184]
Branch (695:13): [True: 37, False: 199]
Branch (695:13): [True: 224k, False: 450k]
Branch (695:13): [True: 0, False: 0]
Branch (695:13): [True: 13, False: 2]
Branch (695:13): [True: 21, False: 4]
|
696 | skip = i - 1; |
697 | } |
698 | } |
699 | |
700 | for (i = w; i >= 0; i--532k ) { Branch (700:17): [True: 303k, False: 190k]
Branch (700:17): [True: 173, False: 4]
Branch (700:17): [True: 194, False: 5]
Branch (700:17): [True: 240k, False: 190k]
Branch (700:17): [True: 0, False: 0]
Branch (700:17): [True: 127, False: 7]
Branch (700:17): [True: 231, False: 11]
|
701 | if (s[i] == p[0]) { Branch (701:13): [True: 84.9k, False: 218k]
Branch (701:13): [True: 107, False: 66]
Branch (701:13): [True: 116, False: 78]
Branch (701:13): [True: 85.0k, False: 155k]
Branch (701:13): [True: 0, False: 0]
Branch (701:13): [True: 8, False: 119]
Branch (701:13): [True: 14, False: 217]
|
702 | /* candidate match */ |
703 | for (j = mlast; j > 0; j--83.8k ) { Branch (703:29): [True: 120k, False: 5.68k]
Branch (703:29): [True: 237, False: 84]
Branch (703:29): [True: 253, False: 90]
Branch (703:29): [True: 121k, False: 5.85k]
Branch (703:29): [True: 0, False: 0]
Branch (703:29): [True: 8, False: 8]
Branch (703:29): [True: 14, False: 14]
|
704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 79.2k, False: 41.4k]
Branch (704:21): [True: 23, False: 214]
Branch (704:21): [True: 26, False: 227]
Branch (704:21): [True: 79.1k, False: 41.9k]
Branch (704:21): [True: 0, False: 0]
Branch (704:21): [True: 0, False: 8]
Branch (704:21): [True: 0, False: 14]
|
705 | break; |
706 | } |
707 | } |
708 | if (j == 0) { Branch (708:17): [True: 5.68k, False: 79.2k]
Branch (708:17): [True: 84, False: 23]
Branch (708:17): [True: 90, False: 26]
Branch (708:17): [True: 5.85k, False: 79.1k]
Branch (708:17): [True: 0, False: 0]
Branch (708:17): [True: 8, False: 0]
Branch (708:17): [True: 14, False: 0]
|
709 | /* got a match! */ |
710 | return i; |
711 | } |
712 | /* miss: check if previous character is part of pattern */ |
713 | if (i > 0 && !46.2k STRINGLIB_BLOOM46.2k (mask, s[i-1])) { Branch (713:17): [True: 23.1k, False: 56.1k]
Branch (713:26): [True: 6.09k, False: 17.0k]
Branch (713:17): [True: 22, False: 1]
Branch (713:26): [True: 0, False: 22]
Branch (713:17): [True: 25, False: 1]
Branch (713:26): [True: 2, False: 23]
Branch (713:17): [True: 23.0k, False: 56.0k]
Branch (713:26): [True: 6.03k, False: 17.0k]
Branch (713:17): [True: 0, False: 0]
Branch (713:26): [True: 0, False: 0]
Branch (713:17): [True: 0, False: 0]
Branch (713:26): [True: 0, False: 0]
Branch (713:17): [True: 0, False: 0]
Branch (713:26): [True: 0, False: 0]
|
714 | i = i - m; |
715 | } |
716 | else { |
717 | i = i - skip; |
718 | } |
719 | } |
720 | else { |
721 | /* skip: check if previous character is part of pattern */ |
722 | if (i > 0 && !159k STRINGLIB_BLOOM159k (mask, s[i-1])) { Branch (722:17): [True: 110k, False: 107k]
Branch (722:26): [True: 75.0k, False: 35.5k]
Branch (722:17): [True: 64, False: 2]
Branch (722:26): [True: 6, False: 58]
Branch (722:17): [True: 75, False: 3]
Branch (722:26): [True: 6, False: 69]
Branch (722:17): [True: 48.1k, False: 107k]
Branch (722:26): [True: 12.6k, False: 35.4k]
Branch (722:17): [True: 0, False: 0]
Branch (722:26): [True: 0, False: 0]
Branch (722:17): [True: 119, False: 0]
Branch (722:26): [True: 117, False: 2]
Branch (722:17): [True: 217, False: 0]
Branch (722:26): [True: 213, False: 4]
|
723 | i = i - m; |
724 | } |
725 | } |
726 | } |
727 | return -1; |
728 | } bytes_methods.c:stringlib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--674k ) { Branch (693:21): [True: 674k, False: 196k]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 224k, False: 450k]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--297k ) { Branch (700:17): [True: 303k, False: 190k]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 84.9k, False: 218k]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--41.4k ) { Branch (703:29): [True: 120k, False: 5.68k]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 79.2k, False: 41.4k]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 5.68k, False: 79.2k]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !23.1k STRINGLIB_BLOOM23.1k (mask, s[i-1])) { Branch (713:17): [True: 23.1k, False: 56.1k]
Branch (713:26): [True: 6.09k, False: 17.0k]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !110k STRINGLIB_BLOOM110k (mask, s[i-1])) { Branch (722:17): [True: 110k, False: 107k]
Branch (722:26): [True: 75.0k, False: 35.5k]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
bytearrayobject.c:stringlib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--221 ) { Branch (693:21): [True: 221, False: 88]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 37, False: 184]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--89 ) { Branch (700:17): [True: 173, False: 4]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 107, False: 66]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--214 ) { Branch (703:29): [True: 237, False: 84]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 23, False: 214]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 84, False: 23]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !22 STRINGLIB_BLOOM22 (mask, s[i-1])) { Branch (713:17): [True: 22, False: 1]
Branch (713:26): [True: 0, False: 22]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !64 STRINGLIB_BLOOM64 (mask, s[i-1])) { Branch (722:17): [True: 64, False: 2]
Branch (722:26): [True: 6, False: 58]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
bytesobject.c:stringlib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--236 ) { Branch (693:21): [True: 236, False: 95]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 37, False: 199]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--104 ) { Branch (700:17): [True: 194, False: 5]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 116, False: 78]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--227 ) { Branch (703:29): [True: 253, False: 90]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 26, False: 227]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 90, False: 26]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !25 STRINGLIB_BLOOM25 (mask, s[i-1])) { Branch (713:17): [True: 25, False: 1]
Branch (713:26): [True: 2, False: 23]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !75 STRINGLIB_BLOOM75 (mask, s[i-1])) { Branch (722:17): [True: 75, False: 3]
Branch (722:26): [True: 6, False: 69]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
unicodeobject.c:asciilib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--675k ) { Branch (693:21): [True: 675k, False: 196k]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 224k, False: 450k]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--234k ) { Branch (700:17): [True: 240k, False: 190k]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 85.0k, False: 155k]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--41.9k ) { Branch (703:29): [True: 121k, False: 5.85k]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 79.1k, False: 41.9k]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 5.85k, False: 79.1k]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !23.0k STRINGLIB_BLOOM23.0k (mask, s[i-1])) { Branch (713:17): [True: 23.0k, False: 56.0k]
Branch (713:26): [True: 6.03k, False: 17.0k]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !48.1k STRINGLIB_BLOOM48.1k (mask, s[i-1])) { Branch (722:17): [True: 48.1k, False: 107k]
Branch (722:26): [True: 12.6k, False: 35.4k]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind unicodeobject.c:ucs2lib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--15 ) { Branch (693:21): [True: 15, False: 15]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 13, False: 2]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--119 ) { Branch (700:17): [True: 127, False: 7]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 8, False: 119]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--8 ) { Branch (703:29): [True: 8, False: 8]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 0, False: 8]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 8, False: 0]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { Branch (713:17): [True: 0, False: 0]
Branch (713:26): [True: 0, False: 0]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { Branch (722:17): [True: 119, False: 0]
Branch (722:26): [True: 117, False: 2]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
unicodeobject.c:ucs4lib_default_rfind Line | Count | Source | 685 | { | 686 | /* create compressed boyer-moore delta 1 table */ | 687 | unsigned long mask = 0; | 688 | Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m; | 689 | | 690 | /* process pattern[0] outside the loop */ | 691 | STRINGLIB_BLOOM_ADD(mask, p[0]); | 692 | /* process pattern[:0:-1] */ | 693 | for (i = mlast; i > 0; i--25 ) { Branch (693:21): [True: 25, False: 25]
| 694 | STRINGLIB_BLOOM_ADD(mask, p[i]); | 695 | if (p[i] == p[0]) { Branch (695:13): [True: 21, False: 4]
| 696 | skip = i - 1; | 697 | } | 698 | } | 699 | | 700 | for (i = w; i >= 0; i--217 ) { Branch (700:17): [True: 231, False: 11]
| 701 | if (s[i] == p[0]) { Branch (701:13): [True: 14, False: 217]
| 702 | /* candidate match */ | 703 | for (j = mlast; j > 0; j--14 ) { Branch (703:29): [True: 14, False: 14]
| 704 | if (s[i+j] != p[j]) { Branch (704:21): [True: 0, False: 14]
| 705 | break; | 706 | } | 707 | } | 708 | if (j == 0) { Branch (708:17): [True: 14, False: 0]
| 709 | /* got a match! */ | 710 | return i; | 711 | } | 712 | /* miss: check if previous character is part of pattern */ | 713 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { Branch (713:17): [True: 0, False: 0]
Branch (713:26): [True: 0, False: 0]
| 714 | i = i - m; | 715 | } | 716 | else { | 717 | i = i - skip; | 718 | } | 719 | } | 720 | else { | 721 | /* skip: check if previous character is part of pattern */ | 722 | if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) { Branch (722:17): [True: 217, False: 0]
Branch (722:26): [True: 213, False: 4]
| 723 | i = i - m; | 724 | } | 725 | } | 726 | } | 727 | return -1; | 728 | } |
|
729 | |
730 | |
731 | static inline Py_ssize_t |
732 | STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n, |
733 | const STRINGLIB_CHAR p0, Py_ssize_t maxcount) |
734 | { |
735 | Py_ssize_t i, count = 0; |
736 | for (i = 0; i < n; i++23.6M ) { Branch (736:17): [True: 5.77M, False: 11.9k]
Branch (736:17): [True: 0, False: 0]
Branch (736:17): [True: 0, False: 0]
Branch (736:17): [True: 12.8M, False: 390k]
Branch (736:17): [True: 4.93M, False: 103k]
Branch (736:17): [True: 110k, False: 11.7k]
Branch (736:17): [True: 3.02k, False: 790]
|
737 | if (s[i] == p0) { Branch (737:13): [True: 4.49M, False: 1.28M]
Branch (737:13): [True: 0, False: 0]
Branch (737:13): [True: 0, False: 0]
Branch (737:13): [True: 464k, False: 12.3M]
Branch (737:13): [True: 164k, False: 4.76M]
Branch (737:13): [True: 2.43k, False: 108k]
Branch (737:13): [True: 22, False: 3.00k]
|
738 | count++; |
739 | if (count == maxcount) { Branch (739:17): [True: 0, False: 4.49M]
Branch (739:17): [True: 0, False: 0]
Branch (739:17): [True: 0, False: 0]
Branch (739:17): [True: 20, False: 464k]
Branch (739:17): [True: 0, False: 164k]
Branch (739:17): [True: 0, False: 2.43k]
Branch (739:17): [True: 0, False: 22]
|
740 | return maxcount; |
741 | } |
742 | } |
743 | } |
744 | return count; |
745 | } bytes_methods.c:stringlib_count_char Line | Count | Source | 734 | { | 735 | Py_ssize_t i, count = 0; | 736 | for (i = 0; i < n; i++5.77M ) { Branch (736:17): [True: 5.77M, False: 11.9k]
| 737 | if (s[i] == p0) { Branch (737:13): [True: 4.49M, False: 1.28M]
| 738 | count++; | 739 | if (count == maxcount) { Branch (739:17): [True: 0, False: 4.49M]
| 740 | return maxcount; | 741 | } | 742 | } | 743 | } | 744 | return count; | 745 | } |
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char Unexecuted instantiation: bytesobject.c:stringlib_count_char unicodeobject.c:asciilib_count_char Line | Count | Source | 734 | { | 735 | Py_ssize_t i, count = 0; | 736 | for (i = 0; i < n; i++12.8M ) { Branch (736:17): [True: 12.8M, False: 390k]
| 737 | if (s[i] == p0) { Branch (737:13): [True: 464k, False: 12.3M]
| 738 | count++; | 739 | if (count == maxcount) { Branch (739:17): [True: 20, False: 464k]
| 740 | return maxcount; | 741 | } | 742 | } | 743 | } | 744 | return count; | 745 | } |
unicodeobject.c:ucs1lib_count_char Line | Count | Source | 734 | { | 735 | Py_ssize_t i, count = 0; | 736 | for (i = 0; i < n; i++4.93M ) { Branch (736:17): [True: 4.93M, False: 103k]
| 737 | if (s[i] == p0) { Branch (737:13): [True: 164k, False: 4.76M]
| 738 | count++; | 739 | if (count == maxcount) { Branch (739:17): [True: 0, False: 164k]
| 740 | return maxcount; | 741 | } | 742 | } | 743 | } | 744 | return count; | 745 | } |
unicodeobject.c:ucs2lib_count_char Line | Count | Source | 734 | { | 735 | Py_ssize_t i, count = 0; | 736 | for (i = 0; i < n; i++110k ) { Branch (736:17): [True: 110k, False: 11.7k]
| 737 | if (s[i] == p0) { Branch (737:13): [True: 2.43k, False: 108k]
| 738 | count++; | 739 | if (count == maxcount) { Branch (739:17): [True: 0, False: 2.43k]
| 740 | return maxcount; | 741 | } | 742 | } | 743 | } | 744 | return count; | 745 | } |
unicodeobject.c:ucs4lib_count_char Line | Count | Source | 734 | { | 735 | Py_ssize_t i, count = 0; | 736 | for (i = 0; i < n; i++3.02k ) { Branch (736:17): [True: 3.02k, False: 790]
| 737 | if (s[i] == p0) { Branch (737:13): [True: 22, False: 3.00k]
| 738 | count++; | 739 | if (count == maxcount) { Branch (739:17): [True: 0, False: 22]
| 740 | return maxcount; | 741 | } | 742 | } | 743 | } | 744 | return count; | 745 | } |
|
746 | |
747 | |
748 | Py_LOCAL_INLINE(Py_ssize_t) |
749 | FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, |
750 | const STRINGLIB_CHAR* p, Py_ssize_t m, |
751 | Py_ssize_t maxcount, int mode) |
752 | { |
753 | if (n < m || (5.49M mode == 5.49M FAST_COUNT5.49M && maxcount == 01.17M )) { Branch (753:9): [True: 175k, False: 986k]
Branch (753:19): [True: 97.2k, False: 889k]
Branch (753:41): [True: 0, False: 97.2k]
Branch (753:9): [True: 24, False: 46.6k]
Branch (753:19): [True: 42.7k, False: 3.98k]
Branch (753:41): [True: 0, False: 42.7k]
Branch (753:9): [True: 126, False: 49.9k]
Branch (753:19): [True: 43.8k, False: 6.09k]
Branch (753:41): [True: 0, False: 43.8k]
Branch (753:9): [True: 5.81k, False: 2.78M]
Branch (753:19): [True: 769k, False: 2.01M]
Branch (753:41): [True: 0, False: 769k]
Branch (753:9): [True: 2, False: 1.61M]
Branch (753:19): [True: 205k, False: 1.40M]
Branch (753:41): [True: 0, False: 205k]
Branch (753:9): [True: 0, False: 14.7k]
Branch (753:19): [True: 14.2k, False: 565]
Branch (753:41): [True: 0, False: 14.2k]
Branch (753:9): [True: 0, False: 933]
Branch (753:19): [True: 825, False: 108]
Branch (753:41): [True: 0, False: 825]
|
754 | return -1; |
755 | } |
756 | |
757 | /* look for special cases */ |
758 | if (m <= 1) { Branch (758:9): [True: 104k, False: 882k]
Branch (758:9): [True: 9, False: 46.6k]
Branch (758:9): [True: 1.19k, False: 48.7k]
Branch (758:9): [True: 1.05M, False: 1.73M]
Branch (758:9): [True: 106k, False: 1.50M]
Branch (758:9): [True: 12.1k, False: 2.65k]
Branch (758:9): [True: 808, False: 125]
|
759 | if (m <= 0) { Branch (759:13): [True: 0, False: 104k]
Branch (759:13): [True: 0, False: 9]
Branch (759:13): [True: 0, False: 1.19k]
Branch (759:13): [True: 0, False: 1.05M]
Branch (759:13): [True: 0, False: 106k]
Branch (759:13): [True: 0, False: 12.1k]
Branch (759:13): [True: 0, False: 808]
|
760 | return -1; |
761 | } |
762 | /* use special case for 1-character strings */ |
763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 92.3k, False: 11.9k]
Branch (763:13): [True: 4, False: 5]
Branch (763:13): [True: 878, False: 316]
Branch (763:13): [True: 525k, False: 531k]
Branch (763:13): [True: 2.24k, False: 104k]
Branch (763:13): [True: 305, False: 11.8k]
Branch (763:13): [True: 10, False: 798]
|
764 | return STRINGLIB(find_char)(s, n, p[0]); |
765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 0, False: 11.9k]
Branch (765:18): [True: 5, False: 0]
Branch (765:18): [True: 316, False: 0]
Branch (765:18): [True: 140k, False: 390k]
Branch (765:18): [True: 490, False: 103k]
Branch (765:18): [True: 65, False: 11.7k]
Branch (765:18): [True: 8, False: 790]
|
766 | return STRINGLIB(rfind_char)(s, n, p[0]); |
767 | else { |
768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); |
769 | } |
770 | } |
771 | |
772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 685k, False: 196k]
Branch (772:9): [True: 46.5k, False: 88]
Branch (772:9): [True: 48.6k, False: 95]
Branch (772:9): [True: 1.53M, False: 196k]
Branch (772:9): [True: 1.50M, False: 0]
Branch (772:9): [True: 2.64k, False: 15]
Branch (772:9): [True: 100, False: 25]
|
773 | if (n < 2500 || (17.5k m < 10017.5k && n < 3000017.2k ) || m < 6679 ) { Branch (773:13): [True: 678k, False: 6.92k]
Branch (773:26): [True: 6.77k, False: 157]
Branch (773:37): [True: 6.47k, False: 298]
Branch (773:51): [True: 8, False: 447]
Branch (773:13): [True: 46.5k, False: 0]
Branch (773:26): [True: 0, False: 0]
Branch (773:37): [True: 0, False: 0]
Branch (773:51): [True: 0, False: 0]
Branch (773:13): [True: 48.6k, False: 3]
Branch (773:26): [True: 3, False: 0]
Branch (773:37): [True: 1, False: 2]
Branch (773:51): [True: 2, False: 0]
Branch (773:13): [True: 1.53M, False: 950]
Branch (773:26): [True: 784, False: 166]
Branch (773:37): [True: 779, False: 5]
Branch (773:51): [True: 5, False: 166]
Branch (773:13): [True: 1.49M, False: 9.64k]
Branch (773:26): [True: 9.64k, False: 0]
Branch (773:37): [True: 9.64k, False: 2]
Branch (773:51): [True: 2, False: 0]
Branch (773:13): [True: 2.58k, False: 60]
Branch (773:26): [True: 60, False: 0]
Branch (773:37): [True: 11, False: 49]
Branch (773:51): [True: 0, False: 49]
Branch (773:13): [True: 100, False: 0]
Branch (773:26): [True: 0, False: 0]
Branch (773:37): [True: 0, False: 0]
Branch (773:51): [True: 0, False: 0]
|
774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); |
775 | } |
776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 447, False: 0]
Branch (776:18): [True: 0, False: 0]
Branch (776:18): [True: 0, False: 0]
Branch (776:18): [True: 166, False: 0]
Branch (776:18): [True: 0, False: 0]
Branch (776:18): [True: 49, False: 0]
Branch (776:18): [True: 0, False: 0]
|
777 | /* 33% threshold, but don't overflow. */ |
778 | /* For larger problems where the needle isn't a huge |
779 | percentage of the size of the haystack, the relatively |
780 | expensive O(m) startup cost of the two-way algorithm |
781 | will surely pay off. */ |
782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 447, False: 0]
Branch (782:17): [True: 0, False: 0]
Branch (782:17): [True: 0, False: 0]
Branch (782:17): [True: 166, False: 0]
Branch (782:17): [True: 0, False: 0]
Branch (782:17): [True: 46, False: 3]
Branch (782:17): [True: 0, False: 0]
|
783 | return STRINGLIB(_two_way_find)(s, n, p, m); |
784 | } |
785 | else { |
786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); |
787 | } |
788 | } |
789 | else { |
790 | /* To ensure that we have good worst-case behavior, |
791 | here's an adaptive version of the algorithm, where if |
792 | we match O(m) characters without any matches of the |
793 | entire needle, then we predict that the startup cost of |
794 | the two-way algorithm will probably be worth it. */ |
795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); |
796 | } |
797 | } |
798 | else { |
799 | /* FAST_RSEARCH */ |
800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); |
801 | } |
802 | } bytes_methods.c:fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (986k mode == 986k FAST_COUNT986k && maxcount == 097.2k )) { Branch (753:9): [True: 175k, False: 986k]
Branch (753:19): [True: 97.2k, False: 889k]
Branch (753:41): [True: 0, False: 97.2k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 104k, False: 882k]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 104k]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 92.3k, False: 11.9k]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 0, False: 11.9k]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 685k, False: 196k]
| 773 | if (n < 2500 || (6.92k m < 1006.92k && n < 300006.77k ) || m < 6455 ) { Branch (773:13): [True: 678k, False: 6.92k]
Branch (773:26): [True: 6.77k, False: 157]
Branch (773:37): [True: 6.47k, False: 298]
Branch (773:51): [True: 8, False: 447]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 447, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 447, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
bytearrayobject.c:fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (46.6k mode == 46.6k FAST_COUNT46.6k && maxcount == 042.7k )) { Branch (753:9): [True: 24, False: 46.6k]
Branch (753:19): [True: 42.7k, False: 3.98k]
Branch (753:41): [True: 0, False: 42.7k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 9, False: 46.6k]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 9]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 4, False: 5]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 5, False: 0]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 46.5k, False: 88]
| 773 | if (n < 2500 || (0 m < 1000 && n < 300000 ) || m < 60 ) { Branch (773:13): [True: 46.5k, False: 0]
Branch (773:26): [True: 0, False: 0]
Branch (773:37): [True: 0, False: 0]
Branch (773:51): [True: 0, False: 0]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 0, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 0, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
Line | Count | Source | 752 | { | 753 | if (n < m || (49.9k mode == 49.9k FAST_COUNT49.9k && maxcount == 043.8k )) { Branch (753:9): [True: 126, False: 49.9k]
Branch (753:19): [True: 43.8k, False: 6.09k]
Branch (753:41): [True: 0, False: 43.8k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 1.19k, False: 48.7k]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 1.19k]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 878, False: 316]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 316, False: 0]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 48.6k, False: 95]
| 773 | if (n < 2500 || (3 m < 1003 && n < 300003 ) || m < 62 ) { Branch (773:13): [True: 48.6k, False: 3]
Branch (773:26): [True: 3, False: 0]
Branch (773:37): [True: 1, False: 2]
Branch (773:51): [True: 2, False: 0]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 0, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 0, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
unicodeobject.c:asciilib_fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (2.78M mode == 2.78M FAST_COUNT2.78M && maxcount == 0769k )) { Branch (753:9): [True: 5.81k, False: 2.78M]
Branch (753:19): [True: 769k, False: 2.01M]
Branch (753:41): [True: 0, False: 769k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 1.05M, False: 1.73M]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 1.05M]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 525k, False: 531k]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 140k, False: 390k]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 1.53M, False: 196k]
| 773 | if (n < 2500 || (950 m < 100950 && n < 30000784 ) || m < 6171 ) { Branch (773:13): [True: 1.53M, False: 950]
Branch (773:26): [True: 784, False: 166]
Branch (773:37): [True: 779, False: 5]
Branch (773:51): [True: 5, False: 166]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 166, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 166, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
unicodeobject.c:ucs1lib_fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (1.61M mode == 1.61M FAST_COUNT1.61M && maxcount == 0205k )) { Branch (753:9): [True: 2, False: 1.61M]
Branch (753:19): [True: 205k, False: 1.40M]
Branch (753:41): [True: 0, False: 205k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 106k, False: 1.50M]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 106k]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 2.24k, False: 104k]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 490, False: 103k]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 1.50M, False: 0]
| 773 | if (n < 2500 || (9.64k m < 1009.64k && n < 300009.64k ) || m < 62 ) { Branch (773:13): [True: 1.49M, False: 9.64k]
Branch (773:26): [True: 9.64k, False: 0]
Branch (773:37): [True: 9.64k, False: 2]
Branch (773:51): [True: 2, False: 0]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 0, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 0, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
unicodeobject.c:ucs2lib_fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (mode == FAST_COUNT && maxcount == 014.2k )) { Branch (753:9): [True: 0, False: 14.7k]
Branch (753:19): [True: 14.2k, False: 565]
Branch (753:41): [True: 0, False: 14.2k]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 12.1k, False: 2.65k]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 12.1k]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 305, False: 11.8k]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 65, False: 11.7k]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 2.64k, False: 15]
| 773 | if (n < 2500 || (60 m < 10060 && n < 3000060 ) || m < 649 ) { Branch (773:13): [True: 2.58k, False: 60]
Branch (773:26): [True: 60, False: 0]
Branch (773:37): [True: 11, False: 49]
Branch (773:51): [True: 0, False: 49]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 49, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 46, False: 3]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
unicodeobject.c:ucs4lib_fastsearch Line | Count | Source | 752 | { | 753 | if (n < m || (mode == FAST_COUNT && maxcount == 0825 )) { Branch (753:9): [True: 0, False: 933]
Branch (753:19): [True: 825, False: 108]
Branch (753:41): [True: 0, False: 825]
| 754 | return -1; | 755 | } | 756 | | 757 | /* look for special cases */ | 758 | if (m <= 1) { Branch (758:9): [True: 808, False: 125]
| 759 | if (m <= 0) { Branch (759:13): [True: 0, False: 808]
| 760 | return -1; | 761 | } | 762 | /* use special case for 1-character strings */ | 763 | if (mode == FAST_SEARCH) Branch (763:13): [True: 10, False: 798]
| 764 | return STRINGLIB(find_char)(s, n, p[0]); | 765 | else if (mode == FAST_RSEARCH) Branch (765:18): [True: 8, False: 790]
| 766 | return STRINGLIB(rfind_char)(s, n, p[0]); | 767 | else { | 768 | return STRINGLIB(count_char)(s, n, p[0], maxcount); | 769 | } | 770 | } | 771 | | 772 | if (mode != FAST_RSEARCH) { Branch (772:9): [True: 100, False: 25]
| 773 | if (n < 2500 || (0 m < 1000 && n < 300000 ) || m < 60 ) { Branch (773:13): [True: 100, False: 0]
Branch (773:26): [True: 0, False: 0]
Branch (773:37): [True: 0, False: 0]
Branch (773:51): [True: 0, False: 0]
| 774 | return STRINGLIB(default_find)(s, n, p, m, maxcount, mode); | 775 | } | 776 | else if ((m >> 2) * 3 < (n >> 2)) { Branch (776:18): [True: 0, False: 0]
| 777 | /* 33% threshold, but don't overflow. */ | 778 | /* For larger problems where the needle isn't a huge | 779 | percentage of the size of the haystack, the relatively | 780 | expensive O(m) startup cost of the two-way algorithm | 781 | will surely pay off. */ | 782 | if (mode == FAST_SEARCH) { Branch (782:17): [True: 0, False: 0]
| 783 | return STRINGLIB(_two_way_find)(s, n, p, m); | 784 | } | 785 | else { | 786 | return STRINGLIB(_two_way_count)(s, n, p, m, maxcount); | 787 | } | 788 | } | 789 | else { | 790 | /* To ensure that we have good worst-case behavior, | 791 | here's an adaptive version of the algorithm, where if | 792 | we match O(m) characters without any matches of the | 793 | entire needle, then we predict that the startup cost of | 794 | the two-way algorithm will probably be worth it. */ | 795 | return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode); | 796 | } | 797 | } | 798 | else { | 799 | /* FAST_RSEARCH */ | 800 | return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode); | 801 | } | 802 | } |
|
803 | |