Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Objects/stringlib/fastsearch.h
Line
Count
Source (jump to first uncovered line)
1
/* stringlib: fastsearch implementation */
2
3
#define STRINGLIB_FASTSEARCH_H
4
5
/* fast search/count implementation, based on a mix between boyer-
6
   moore and horspool, with a few more bells and whistles on the top.
7
   for some more background, see:
8
   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
9
10
/* note: fastsearch may access s[n], which isn't a problem when using
11
   Python's ordinary string types, but may cause problems if you're
12
   using this code in other contexts.  also, the count mode returns -1
13
   if there cannot possibly be a match in the target string, and 0 if
14
   it has actually checked for matches, but didn't find any.  callers
15
   beware! */
16
17
/* If the strings are long enough, use Crochemore and Perrin's Two-Way
18
   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
19
   Also compute a table of shifts to achieve O(n/k) in more cases,
20
   and often (data dependent) deduce larger shifts than pure C&P can
21
   deduce. */
22
23
#define FAST_COUNT 0
24
#define FAST_SEARCH 1
25
#define FAST_RSEARCH 2
26
27
#if LONG_BIT >= 128
28
#define STRINGLIB_BLOOM_WIDTH 128
29
#elif LONG_BIT >= 64
30
#define STRINGLIB_BLOOM_WIDTH 64
31
#elif LONG_BIT >= 32
32
#define STRINGLIB_BLOOM_WIDTH 32
33
#else
34
#error "LONG_BIT is smaller than 32"
35
#endif
36
37
#define STRINGLIB_BLOOM_ADD(mask, ch) \
38
    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
39
#define STRINGLIB_BLOOM(mask, ch)     \
40
    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
41
42
#ifdef STRINGLIB_FAST_MEMCHR
43
#  define MEMCHR_CUT_OFF 15
44
#else
45
#  define MEMCHR_CUT_OFF 40
46
#endif
47
48
Py_LOCAL_INLINE(Py_ssize_t)
49
STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 139k, False: 170k]
  Branch (55:9): [True: 0, False: 13]
  Branch (55:9): [True: 378, False: 500]
  Branch (55:9): [True: 565k, False: 4.73M]
  Branch (55:9): [True: 218, False: 73.2k]
  Branch (55:9): [True: 48, False: 1.04M]
  Branch (55:9): [True: 459k, False: 66.0k]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 118k, False: 20.8k]
  Branch (58:13): [True: 0, False: 0]
  Branch (58:13): [True: 312, False: 66]
  Branch (58:13): [True: 90.3k, False: 475k]
  Branch (58:13): [True: 25, False: 23]
  Branch (58:13): [True: 453k, False: 5.94k]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
  Branch (69:13): [True: 217, False: 1]
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
  Branch (73:21): [True: 19, False: 199]
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
  Branch (78:21): [True: 195, False: 4]
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
  Branch (82:21): [True: 2, False: 2]
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
  Branch (84:21): [True: 0, False: 2]
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
  Branch (87:24): [True: 6, False: 0]
88
                    if (*p == ch)
  Branch (88:25): [True: 2, False: 4]
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (
e - p > 2
MEMCHR_CUT_OFF2
);
  Branch (93:20): [True: 1, False: 1]
94
        }
95
#endif
96
    }
97
    
while (6.09M
p < e) {
  Branch (97:12): [True: 1.30M, False: 82.7k]
  Branch (97:12): [True: 38, False: 5]
  Branch (97:12): [True: 4.13k, False: 342]
  Branch (97:12): [True: 19.6M, False: 3.46M]
  Branch (97:12): [True: 233k, False: 72.1k]
  Branch (97:12): [True: 3.14M, False: 1.04M]
  Branch (97:12): [True: 378k, False: 14.2k]
98
        if (*p == ch)
  Branch (98:13): [True: 87.8k, False: 1.21M]
  Branch (98:13): [True: 8, False: 30]
  Branch (98:13): [True: 158, False: 3.98k]
  Branch (98:13): [True: 1.27M, False: 18.3M]
  Branch (98:13): [True: 1.08k, False: 232k]
  Branch (98:13): [True: 10, False: 3.14M]
  Branch (98:13): [True: 51.8k, False: 326k]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
bytes_methods.c:stringlib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 139k, False: 170k]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 118k, False: 20.8k]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (170k
p < e) {
  Branch (97:12): [True: 1.30M, False: 82.7k]
98
        if (*p == ch)
  Branch (98:13): [True: 87.8k, False: 1.21M]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
bytearrayobject.c:stringlib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 0, False: 13]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 0, False: 0]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (13
p < e) {
  Branch (97:12): [True: 38, False: 5]
98
        if (*p == ch)
  Branch (98:13): [True: 8, False: 30]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
bytesobject.c:stringlib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 378, False: 500]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 312, False: 66]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (500
p < e) {
  Branch (97:12): [True: 4.13k, False: 342]
98
        if (*p == ch)
  Branch (98:13): [True: 158, False: 3.98k]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
unicodeobject.c:ucs1lib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 565k, False: 4.73M]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 90.3k, False: 475k]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (4.73M
p < e) {
  Branch (97:12): [True: 19.6M, False: 3.46M]
98
        if (*p == ch)
  Branch (98:13): [True: 1.27M, False: 18.3M]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
unicodeobject.c:ucs2lib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 218, False: 73.2k]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
  Branch (69:13): [True: 217, False: 1]
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
  Branch (73:21): [True: 19, False: 199]
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
  Branch (78:21): [True: 195, False: 4]
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
  Branch (82:21): [True: 2, False: 2]
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
  Branch (84:21): [True: 0, False: 2]
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
  Branch (87:24): [True: 6, False: 0]
88
                    if (*p == ch)
  Branch (88:25): [True: 2, False: 4]
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (
e - p > 2
MEMCHR_CUT_OFF2
);
  Branch (93:20): [True: 1, False: 1]
94
        }
95
#endif
96
    }
97
    
while (73.2k
p < e) {
  Branch (97:12): [True: 233k, False: 72.1k]
98
        if (*p == ch)
  Branch (98:13): [True: 1.08k, False: 232k]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
unicodeobject.c:ucs4lib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 48, False: 1.04M]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 25, False: 23]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (1.04M
p < e) {
  Branch (97:12): [True: 3.14M, False: 1.04M]
98
        if (*p == ch)
  Branch (98:13): [True: 10, False: 3.14M]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
unicodeobject.c:asciilib_find_char
Line
Count
Source
50
{
51
    const STRINGLIB_CHAR *p, *e;
52
53
    p = s;
54
    e = s + n;
55
    if (n > MEMCHR_CUT_OFF) {
  Branch (55:9): [True: 459k, False: 66.0k]
56
#ifdef STRINGLIB_FAST_MEMCHR
57
        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
58
        if (p != NULL)
  Branch (58:13): [True: 453k, False: 5.94k]
59
            return (p - s);
60
        return -1;
61
#else
62
        /* use memchr if we can choose a needle without too many likely
63
           false positives */
64
        const STRINGLIB_CHAR *s1, *e1;
65
        unsigned char needle = ch & 0xff;
66
        /* If looking for a multiple of 256, we'd have too
67
           many false positives looking for the '\0' byte in UCS2
68
           and UCS4 representations. */
69
        if (needle != 0) {
70
            do {
71
                void *candidate = memchr(p, needle,
72
                                         (e - p) * sizeof(STRINGLIB_CHAR));
73
                if (candidate == NULL)
74
                    return -1;
75
                s1 = p;
76
                p = (const STRINGLIB_CHAR *)
77
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
78
                if (*p == ch)
79
                    return (p - s);
80
                /* False positive */
81
                p++;
82
                if (p - s1 > MEMCHR_CUT_OFF)
83
                    continue;
84
                if (e - p <= MEMCHR_CUT_OFF)
85
                    break;
86
                e1 = p + MEMCHR_CUT_OFF;
87
                while (p != e1) {
88
                    if (*p == ch)
89
                        return (p - s);
90
                    p++;
91
                }
92
            }
93
            while (e - p > MEMCHR_CUT_OFF);
94
        }
95
#endif
96
    }
97
    
while (66.0k
p < e) {
  Branch (97:12): [True: 378k, False: 14.2k]
98
        if (*p == ch)
  Branch (98:13): [True: 51.8k, False: 326k]
99
            return (p - s);
100
        p++;
101
    }
102
    return -1;
103
}
104
105
#undef MEMCHR_CUT_OFF
106
107
#if STRINGLIB_SIZEOF_CHAR == 1
108
#  define MEMRCHR_CUT_OFF 15
109
#else
110
#  define MEMRCHR_CUT_OFF 40
111
#endif
112
113
114
Py_LOCAL_INLINE(Py_ssize_t)
115
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 4.31k, False: 3.07k]
  Branch (124:9): [True: 0, False: 5]
  Branch (124:9): [True: 60, False: 256]
  Branch (124:9): [True: 375k, False: 329k]
  Branch (124:9): [True: 23, False: 374]
  Branch (124:9): [True: 7, False: 10]
  Branch (124:9): [True: 33.9k, False: 106k]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 2.23k, False: 2.07k]
  Branch (127:13): [True: 0, False: 0]
  Branch (127:13): [True: 8, False: 52]
  Branch (127:13): [True: 367k, False: 8.24k]
  Branch (127:13): [True: 30.7k, False: 3.15k]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
  Branch (139:13): [True: 19, False: 4]
  Branch (139:13): [True: 7, False: 0]
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
  Branch (143:21): [True: 2, False: 17]
  Branch (143:21): [True: 0, False: 7]
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
  Branch (149:21): [True: 16, False: 1]
  Branch (149:21): [True: 5, False: 2]
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
  Branch (152:21): [True: 1, False: 0]
  Branch (152:21): [True: 2, False: 0]
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
  Branch (154:21): [True: 0, False: 0]
  Branch (154:21): [True: 0, False: 0]
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
  Branch (157:24): [True: 0, False: 0]
  Branch (157:24): [True: 0, False: 0]
158
                    p--;
159
                    if (*p == ch)
  Branch (159:25): [True: 0, False: 0]
  Branch (159:25): [True: 0, False: 0]
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (
n > 3
MEMRCHR_CUT_OFF3
);
  Branch (164:20): [True: 0, False: 1]
  Branch (164:20): [True: 0, False: 2]
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 9.03k, False: 752]
  Branch (170:12): [True: 18, False: 4]
  Branch (170:12): [True: 2.91k, False: 239]
  Branch (170:12): [True: 1.66M, False: 146k]
  Branch (170:12): [True: 2.72k, False: 264]
  Branch (170:12): [True: 107, False: 5]
  Branch (170:12): [True: 753k, False: 86.1k]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 2.32k, False: 6.71k]
  Branch (172:13): [True: 1, False: 17]
  Branch (172:13): [True: 17, False: 2.89k]
  Branch (172:13): [True: 182k, False: 1.47M]
  Branch (172:13): [True: 115, False: 2.61k]
  Branch (172:13): [True: 7, False: 100]
  Branch (172:13): [True: 20.8k, False: 732k]
173
            return (p - s);
174
    }
175
    return -1;
176
}
bytes_methods.c:stringlib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 4.31k, False: 3.07k]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 2.23k, False: 2.07k]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
158
                    p--;
159
                    if (*p == ch)
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (n > MEMRCHR_CUT_OFF);
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 9.03k, False: 752]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 2.32k, False: 6.71k]
173
            return (p - s);
174
    }
175
    return -1;
176
}
bytearrayobject.c:stringlib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 0, False: 5]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 0, False: 0]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
158
                    p--;
159
                    if (*p == ch)
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (n > MEMRCHR_CUT_OFF);
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 18, False: 4]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 1, False: 17]
173
            return (p - s);
174
    }
175
    return -1;
176
}
bytesobject.c:stringlib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 60, False: 256]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 8, False: 52]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
158
                    p--;
159
                    if (*p == ch)
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (n > MEMRCHR_CUT_OFF);
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 2.91k, False: 239]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 17, False: 2.89k]
173
            return (p - s);
174
    }
175
    return -1;
176
}
unicodeobject.c:ucs1lib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 375k, False: 329k]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 367k, False: 8.24k]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
158
                    p--;
159
                    if (*p == ch)
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (n > MEMRCHR_CUT_OFF);
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 1.66M, False: 146k]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 182k, False: 1.47M]
173
            return (p - s);
174
    }
175
    return -1;
176
}
unicodeobject.c:ucs2lib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 23, False: 374]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
  Branch (139:13): [True: 19, False: 4]
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
  Branch (143:21): [True: 2, False: 17]
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
  Branch (149:21): [True: 16, False: 1]
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
  Branch (152:21): [True: 1, False: 0]
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
  Branch (154:21): [True: 0, False: 0]
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
  Branch (157:24): [True: 0, False: 0]
158
                    p--;
159
                    if (*p == ch)
  Branch (159:25): [True: 0, False: 0]
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (
n > 1
MEMRCHR_CUT_OFF1
);
  Branch (164:20): [True: 0, False: 1]
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 2.72k, False: 264]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 115, False: 2.61k]
173
            return (p - s);
174
    }
175
    return -1;
176
}
unicodeobject.c:ucs4lib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 7, False: 10]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
  Branch (139:13): [True: 7, False: 0]
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
  Branch (143:21): [True: 0, False: 7]
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
  Branch (149:21): [True: 5, False: 2]
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
  Branch (152:21): [True: 2, False: 0]
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
  Branch (154:21): [True: 0, False: 0]
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
  Branch (157:24): [True: 0, False: 0]
158
                    p--;
159
                    if (*p == ch)
  Branch (159:25): [True: 0, False: 0]
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (
n > 2
MEMRCHR_CUT_OFF2
);
  Branch (164:20): [True: 0, False: 2]
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 107, False: 5]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 7, False: 100]
173
            return (p - s);
174
    }
175
    return -1;
176
}
unicodeobject.c:asciilib_rfind_char
Line
Count
Source
116
{
117
    const STRINGLIB_CHAR *p;
118
#ifdef HAVE_MEMRCHR
119
    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
120
       doesn't seem as optimized as memchr(), but is still quite
121
       faster than our hand-written loop below. There is no wmemrchr
122
       for 4-byte chars. */
123
124
    if (n > MEMRCHR_CUT_OFF) {
  Branch (124:9): [True: 33.9k, False: 106k]
125
#if STRINGLIB_SIZEOF_CHAR == 1
126
        p = memrchr(s, ch, n);
127
        if (p != NULL)
  Branch (127:13): [True: 30.7k, False: 3.15k]
128
            return (p - s);
129
        return -1;
130
#else
131
        /* use memrchr if we can choose a needle without too many likely
132
           false positives */
133
        const STRINGLIB_CHAR *s1;
134
        Py_ssize_t n1;
135
        unsigned char needle = ch & 0xff;
136
        /* If looking for a multiple of 256, we'd have too
137
           many false positives looking for the '\0' byte in UCS2
138
           and UCS4 representations. */
139
        if (needle != 0) {
140
            do {
141
                void *candidate = memrchr(s, needle,
142
                                          n * sizeof(STRINGLIB_CHAR));
143
                if (candidate == NULL)
144
                    return -1;
145
                n1 = n;
146
                p = (const STRINGLIB_CHAR *)
147
                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
148
                n = p - s;
149
                if (*p == ch)
150
                    return n;
151
                /* False positive */
152
                if (n1 - n > MEMRCHR_CUT_OFF)
153
                    continue;
154
                if (n <= MEMRCHR_CUT_OFF)
155
                    break;
156
                s1 = p - MEMRCHR_CUT_OFF;
157
                while (p > s1) {
158
                    p--;
159
                    if (*p == ch)
160
                        return (p - s);
161
                }
162
                n = p - s;
163
            }
164
            while (n > MEMRCHR_CUT_OFF);
165
        }
166
#endif
167
    }
168
#endif  /* HAVE_MEMRCHR */
169
    p = s + n;
170
    while (p > s) {
  Branch (170:12): [True: 753k, False: 86.1k]
171
        p--;
172
        if (*p == ch)
  Branch (172:13): [True: 20.8k, False: 732k]
173
            return (p - s);
174
    }
175
    return -1;
176
}
177
178
#undef MEMRCHR_CUT_OFF
179
180
/* Change to a 1 to see logging comments walk through the algorithm. */
181
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
182
# define LOG(...) printf(__VA_ARGS__)
183
# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
184
# define LOG_LINEUP() do {                                         \
185
    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
186
    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
187
    LOG_STRING(needle, len_needle); LOG("\n");                     \
188
} while(0)
189
#else
190
# define LOG(...)
191
# define LOG_STRING(s, n)
192
# define LOG_LINEUP()
193
#endif
194
195
Py_LOCAL_INLINE(Py_ssize_t)
196
STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
197
                       Py_ssize_t *return_period, int invert_alphabet)
198
{
199
    /* Do a lexicographic search. Essentially this:
200
           >>> max(needle[i:] for i in range(len(needle)+1))
201
       Also find the period of the right half.   */
202
    Py_ssize_t max_suffix = 0;
203
    Py_ssize_t candidate = 1;
204
    Py_ssize_t k = 0;
205
    // The period of the right half.
206
    Py_ssize_t period = 1;
207
208
    while (candidate + k < len_needle) {
  Branch (208:12): [True: 71.6k, False: 894]
  Branch (208:12): [True: 0, False: 0]
  Branch (208:12): [True: 0, False: 0]
  Branch (208:12): [True: 50.2k, False: 332]
  Branch (208:12): [True: 0, False: 0]
  Branch (208:12): [True: 1.78k, False: 98]
  Branch (208:12): [True: 0, False: 0]
209
        // each loop increases candidate + k + max_suffix
210
        STRINGLIB_CHAR a = needle[candidate + k];
211
        STRINGLIB_CHAR b = needle[max_suffix + k];
212
        // check if the suffix at candidate is better than max_suffix
213
        if (invert_alphabet ? 
(b < a)59.7k
:
(a < b)63.9k
) {
  Branch (213:13): [True: 33.8k, False: 37.8k]
  Branch (213:13): [True: 23.7k, False: 47.8k]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 25.1k, False: 25.1k]
  Branch (213:13): [True: 7.50k, False: 42.7k]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 827, False: 962]
  Branch (213:13): [True: 1.23k, False: 552]
  Branch (213:13): [True: 0, False: 0]
  Branch (213:13): [True: 0, False: 0]
214
            // Fell short of max_suffix.
215
            // The next k + 1 characters are non-increasing
216
            // from candidate, so they won't start a maximal suffix.
217
            candidate += k + 1;
218
            k = 0;
219
            // We've ruled out any period smaller than what's
220
            // been scanned since max_suffix.
221
            period = candidate - max_suffix;
222
        }
223
        else if (a == b) {
  Branch (223:18): [True: 46.7k, False: 1.11k]
  Branch (223:18): [True: 0, False: 0]
  Branch (223:18): [True: 0, False: 0]
  Branch (223:18): [True: 42.2k, False: 507]
  Branch (223:18): [True: 0, False: 0]
  Branch (223:18): [True: 418, False: 134]
  Branch (223:18): [True: 0, False: 0]
224
            if (k + 1 != period) {
  Branch (224:17): [True: 38.6k, False: 8.12k]
  Branch (224:17): [True: 0, False: 0]
  Branch (224:17): [True: 0, False: 0]
  Branch (224:17): [True: 35.2k, False: 6.98k]
  Branch (224:17): [True: 0, False: 0]
  Branch (224:17): [True: 418, False: 0]
  Branch (224:17): [True: 0, False: 0]
225
                // Keep scanning the equal strings
226
                k++;
227
            }
228
            else {
229
                // Matched a whole period.
230
                // Start matching the next period.
231
                candidate += period;
232
                k = 0;
233
            }
234
        }
235
        else {
236
            // Did better than max_suffix, so replace it.
237
            max_suffix = candidate;
238
            candidate++;
239
            k = 0;
240
            period = 1;
241
        }
242
    }
243
    *return_period = period;
244
    return max_suffix;
245
}
bytes_methods.c:stringlib__lex_search
Line
Count
Source
198
{
199
    /* Do a lexicographic search. Essentially this:
200
           >>> max(needle[i:] for i in range(len(needle)+1))
201
       Also find the period of the right half.   */
202
    Py_ssize_t max_suffix = 0;
203
    Py_ssize_t candidate = 1;
204
    Py_ssize_t k = 0;
205
    // The period of the right half.
206
    Py_ssize_t period = 1;
207
208
    while (candidate + k < len_needle) {
  Branch (208:12): [True: 71.6k, False: 894]
209
        // each loop increases candidate + k + max_suffix
210
        STRINGLIB_CHAR a = needle[candidate + k];
211
        STRINGLIB_CHAR b = needle[max_suffix + k];
212
        // check if the suffix at candidate is better than max_suffix
213
        if (invert_alphabet ? 
(b < a)33.8k
:
(a < b)37.8k
) {
  Branch (213:13): [True: 33.8k, False: 37.8k]
  Branch (213:13): [True: 23.7k, False: 47.8k]
214
            // Fell short of max_suffix.
215
            // The next k + 1 characters are non-increasing
216
            // from candidate, so they won't start a maximal suffix.
217
            candidate += k + 1;
218
            k = 0;
219
            // We've ruled out any period smaller than what's
220
            // been scanned since max_suffix.
221
            period = candidate - max_suffix;
222
        }
223
        else if (a == b) {
  Branch (223:18): [True: 46.7k, False: 1.11k]
224
            if (k + 1 != period) {
  Branch (224:17): [True: 38.6k, False: 8.12k]
225
                // Keep scanning the equal strings
226
                k++;
227
            }
228
            else {
229
                // Matched a whole period.
230
                // Start matching the next period.
231
                candidate += period;
232
                k = 0;
233
            }
234
        }
235
        else {
236
            // Did better than max_suffix, so replace it.
237
            max_suffix = candidate;
238
            candidate++;
239
            k = 0;
240
            period = 1;
241
        }
242
    }
243
    *return_period = period;
244
    return max_suffix;
245
}
Unexecuted instantiation: bytearrayobject.c:stringlib__lex_search
Unexecuted instantiation: bytesobject.c:stringlib__lex_search
unicodeobject.c:asciilib__lex_search
Line
Count
Source
198
{
199
    /* Do a lexicographic search. Essentially this:
200
           >>> max(needle[i:] for i in range(len(needle)+1))
201
       Also find the period of the right half.   */
202
    Py_ssize_t max_suffix = 0;
203
    Py_ssize_t candidate = 1;
204
    Py_ssize_t k = 0;
205
    // The period of the right half.
206
    Py_ssize_t period = 1;
207
208
    while (candidate + k < len_needle) {
  Branch (208:12): [True: 50.2k, False: 332]
209
        // each loop increases candidate + k + max_suffix
210
        STRINGLIB_CHAR a = needle[candidate + k];
211
        STRINGLIB_CHAR b = needle[max_suffix + k];
212
        // check if the suffix at candidate is better than max_suffix
213
        if (invert_alphabet ? 
(b < a)25.1k
:
(a < b)25.1k
) {
  Branch (213:13): [True: 25.1k, False: 25.1k]
  Branch (213:13): [True: 7.50k, False: 42.7k]
214
            // Fell short of max_suffix.
215
            // The next k + 1 characters are non-increasing
216
            // from candidate, so they won't start a maximal suffix.
217
            candidate += k + 1;
218
            k = 0;
219
            // We've ruled out any period smaller than what's
220
            // been scanned since max_suffix.
221
            period = candidate - max_suffix;
222
        }
223
        else if (a == b) {
  Branch (223:18): [True: 42.2k, False: 507]
224
            if (k + 1 != period) {
  Branch (224:17): [True: 35.2k, False: 6.98k]
225
                // Keep scanning the equal strings
226
                k++;
227
            }
228
            else {
229
                // Matched a whole period.
230
                // Start matching the next period.
231
                candidate += period;
232
                k = 0;
233
            }
234
        }
235
        else {
236
            // Did better than max_suffix, so replace it.
237
            max_suffix = candidate;
238
            candidate++;
239
            k = 0;
240
            period = 1;
241
        }
242
    }
243
    *return_period = period;
244
    return max_suffix;
245
}
Unexecuted instantiation: unicodeobject.c:ucs1lib__lex_search
unicodeobject.c:ucs2lib__lex_search
Line
Count
Source
198
{
199
    /* Do a lexicographic search. Essentially this:
200
           >>> max(needle[i:] for i in range(len(needle)+1))
201
       Also find the period of the right half.   */
202
    Py_ssize_t max_suffix = 0;
203
    Py_ssize_t candidate = 1;
204
    Py_ssize_t k = 0;
205
    // The period of the right half.
206
    Py_ssize_t period = 1;
207
208
    while (candidate + k < len_needle) {
  Branch (208:12): [True: 1.78k, False: 98]
209
        // each loop increases candidate + k + max_suffix
210
        STRINGLIB_CHAR a = needle[candidate + k];
211
        STRINGLIB_CHAR b = needle[max_suffix + k];
212
        // check if the suffix at candidate is better than max_suffix
213
        if (invert_alphabet ? 
(b < a)827
:
(a < b)962
) {
  Branch (213:13): [True: 827, False: 962]
  Branch (213:13): [True: 1.23k, False: 552]
214
            // Fell short of max_suffix.
215
            // The next k + 1 characters are non-increasing
216
            // from candidate, so they won't start a maximal suffix.
217
            candidate += k + 1;
218
            k = 0;
219
            // We've ruled out any period smaller than what's
220
            // been scanned since max_suffix.
221
            period = candidate - max_suffix;
222
        }
223
        else if (a == b) {
  Branch (223:18): [True: 418, False: 134]
224
            if (k + 1 != period) {
  Branch (224:17): [True: 418, False: 0]
225
                // Keep scanning the equal strings
226
                k++;
227
            }
228
            else {
229
                // Matched a whole period.
230
                // Start matching the next period.
231
                candidate += period;
232
                k = 0;
233
            }
234
        }
235
        else {
236
            // Did better than max_suffix, so replace it.
237
            max_suffix = candidate;
238
            candidate++;
239
            k = 0;
240
            period = 1;
241
        }
242
    }
243
    *return_period = period;
244
    return max_suffix;
245
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__lex_search
246
247
Py_LOCAL_INLINE(Py_ssize_t)
248
STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
249
                      Py_ssize_t len_needle,
250
                      Py_ssize_t *return_period)
251
{
252
    /* Do a "critical factorization", making it so that:
253
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
254
       where the "local period" of the cut is maximal.
255
256
       The local period of the cut is the minimal length of a string w
257
       such that (left endswith w or w endswith left)
258
       and (right startswith w or w startswith left).
259
260
       The Critical Factorization Theorem says that this maximal local
261
       period is the global period of the string.
262
263
       Crochemore and Perrin (1991) show that this cut can be computed
264
       as the later of two cuts: one that gives a lexicographically
265
       maximal right half, and one that gives the same with the
266
       with respect to a reversed alphabet-ordering.
267
268
       This is what we want to happen:
269
           >>> x = "GCAGAGAG"
270
           >>> cut, period = factorize(x)
271
           >>> x[:cut], (right := x[cut:])
272
           ('GC', 'AGAGAG')
273
           >>> period  # right half period
274
           2
275
           >>> right[period:] == right[:-period]
276
           True
277
278
       This is how the local period lines up in the above example:
279
                GC | AGAGAG
280
           AGAGAGC = AGAGAGC
281
       The length of this minimal repetition is 7, which is indeed the
282
       period of the original string. */
283
284
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
285
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
286
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
287
288
    // Take the later cut.
289
    if (cut1 > cut2) {
  Branch (289:9): [True: 372, False: 75]
  Branch (289:9): [True: 0, False: 0]
  Branch (289:9): [True: 0, False: 0]
  Branch (289:9): [True: 82, False: 84]
  Branch (289:9): [True: 0, False: 0]
  Branch (289:9): [True: 34, False: 15]
  Branch (289:9): [True: 0, False: 0]
290
        period = period1;
291
        cut = cut1;
292
    }
293
    else {
294
        period = period2;
295
        cut = cut2;
296
    }
297
298
    LOG("split: "); LOG_STRING(needle, cut);
299
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
300
    LOG("\n");
301
302
    *return_period = period;
303
    return cut;
304
}
bytes_methods.c:stringlib__factorize
Line
Count
Source
251
{
252
    /* Do a "critical factorization", making it so that:
253
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
254
       where the "local period" of the cut is maximal.
255
256
       The local period of the cut is the minimal length of a string w
257
       such that (left endswith w or w endswith left)
258
       and (right startswith w or w startswith left).
259
260
       The Critical Factorization Theorem says that this maximal local
261
       period is the global period of the string.
262
263
       Crochemore and Perrin (1991) show that this cut can be computed
264
       as the later of two cuts: one that gives a lexicographically
265
       maximal right half, and one that gives the same with the
266
       with respect to a reversed alphabet-ordering.
267
268
       This is what we want to happen:
269
           >>> x = "GCAGAGAG"
270
           >>> cut, period = factorize(x)
271
           >>> x[:cut], (right := x[cut:])
272
           ('GC', 'AGAGAG')
273
           >>> period  # right half period
274
           2
275
           >>> right[period:] == right[:-period]
276
           True
277
278
       This is how the local period lines up in the above example:
279
                GC | AGAGAG
280
           AGAGAGC = AGAGAGC
281
       The length of this minimal repetition is 7, which is indeed the
282
       period of the original string. */
283
284
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
285
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
286
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
287
288
    // Take the later cut.
289
    if (cut1 > cut2) {
  Branch (289:9): [True: 372, False: 75]
290
        period = period1;
291
        cut = cut1;
292
    }
293
    else {
294
        period = period2;
295
        cut = cut2;
296
    }
297
298
    LOG("split: "); LOG_STRING(needle, cut);
299
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
300
    LOG("\n");
301
302
    *return_period = period;
303
    return cut;
304
}
Unexecuted instantiation: bytearrayobject.c:stringlib__factorize
Unexecuted instantiation: bytesobject.c:stringlib__factorize
unicodeobject.c:asciilib__factorize
Line
Count
Source
251
{
252
    /* Do a "critical factorization", making it so that:
253
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
254
       where the "local period" of the cut is maximal.
255
256
       The local period of the cut is the minimal length of a string w
257
       such that (left endswith w or w endswith left)
258
       and (right startswith w or w startswith left).
259
260
       The Critical Factorization Theorem says that this maximal local
261
       period is the global period of the string.
262
263
       Crochemore and Perrin (1991) show that this cut can be computed
264
       as the later of two cuts: one that gives a lexicographically
265
       maximal right half, and one that gives the same with the
266
       with respect to a reversed alphabet-ordering.
267
268
       This is what we want to happen:
269
           >>> x = "GCAGAGAG"
270
           >>> cut, period = factorize(x)
271
           >>> x[:cut], (right := x[cut:])
272
           ('GC', 'AGAGAG')
273
           >>> period  # right half period
274
           2
275
           >>> right[period:] == right[:-period]
276
           True
277
278
       This is how the local period lines up in the above example:
279
                GC | AGAGAG
280
           AGAGAGC = AGAGAGC
281
       The length of this minimal repetition is 7, which is indeed the
282
       period of the original string. */
283
284
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
285
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
286
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
287
288
    // Take the later cut.
289
    if (cut1 > cut2) {
  Branch (289:9): [True: 82, False: 84]
290
        period = period1;
291
        cut = cut1;
292
    }
293
    else {
294
        period = period2;
295
        cut = cut2;
296
    }
297
298
    LOG("split: "); LOG_STRING(needle, cut);
299
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
300
    LOG("\n");
301
302
    *return_period = period;
303
    return cut;
304
}
Unexecuted instantiation: unicodeobject.c:ucs1lib__factorize
unicodeobject.c:ucs2lib__factorize
Line
Count
Source
251
{
252
    /* Do a "critical factorization", making it so that:
253
       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
254
       where the "local period" of the cut is maximal.
255
256
       The local period of the cut is the minimal length of a string w
257
       such that (left endswith w or w endswith left)
258
       and (right startswith w or w startswith left).
259
260
       The Critical Factorization Theorem says that this maximal local
261
       period is the global period of the string.
262
263
       Crochemore and Perrin (1991) show that this cut can be computed
264
       as the later of two cuts: one that gives a lexicographically
265
       maximal right half, and one that gives the same with the
266
       with respect to a reversed alphabet-ordering.
267
268
       This is what we want to happen:
269
           >>> x = "GCAGAGAG"
270
           >>> cut, period = factorize(x)
271
           >>> x[:cut], (right := x[cut:])
272
           ('GC', 'AGAGAG')
273
           >>> period  # right half period
274
           2
275
           >>> right[period:] == right[:-period]
276
           True
277
278
       This is how the local period lines up in the above example:
279
                GC | AGAGAG
280
           AGAGAGC = AGAGAGC
281
       The length of this minimal repetition is 7, which is indeed the
282
       period of the original string. */
283
284
    Py_ssize_t cut1, period1, cut2, period2, cut, period;
285
    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
286
    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
287
288
    // Take the later cut.
289
    if (cut1 > cut2) {
  Branch (289:9): [True: 34, False: 15]
290
        period = period1;
291
        cut = cut1;
292
    }
293
    else {
294
        period = period2;
295
        cut = cut2;
296
    }
297
298
    LOG("split: "); LOG_STRING(needle, cut);
299
    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
300
    LOG("\n");
301
302
    *return_period = period;
303
    return cut;
304
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__factorize
305
306
307
#define SHIFT_TYPE uint8_t
308
#define MAX_SHIFT UINT8_MAX
309
310
#define TABLE_SIZE_BITS 6u
311
#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
312
#define TABLE_MASK (TABLE_SIZE - 1U)
313
314
typedef struct STRINGLIB(_pre) {
315
    const STRINGLIB_CHAR *needle;
316
    Py_ssize_t len_needle;
317
    Py_ssize_t cut;
318
    Py_ssize_t period;
319
    Py_ssize_t gap;
320
    int is_periodic;
321
    SHIFT_TYPE table[TABLE_SIZE];
322
} STRINGLIB(prework);
323
324
325
static void
326
STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
327
                       STRINGLIB(prework) *p)
328
{
329
    p->needle = needle;
330
    p->len_needle = len_needle;
331
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
332
    assert(p->period + p->cut <= len_needle);
333
    p->is_periodic = (0 == memcmp(needle,
334
                                  needle + p->period,
335
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
336
    if (p->is_periodic) {
  Branch (336:9): [True: 138, False: 309]
  Branch (336:9): [True: 0, False: 0]
  Branch (336:9): [True: 0, False: 0]
  Branch (336:9): [True: 163, False: 3]
  Branch (336:9): [True: 0, False: 0]
  Branch (336:9): [True: 15, False: 34]
  Branch (336:9): [True: 0, False: 0]
337
        assert(p->cut <= len_needle/2);
338
        assert(p->cut < p->period);
339
        p->gap = 0; // unused
340
    }
341
    else {
342
        // A lower bound on the period
343
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
        // The gap between the last character and the previous
345
        // occurrence of an equivalent character (modulo TABLE_SIZE)
346
        p->gap = len_needle;
347
        STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
348
        for (Py_ssize_t i = len_needle - 2; i >= 0; 
i--9.39k
) {
  Branch (348:45): [True: 9.62k, False: 13]
  Branch (348:45): [True: 0, False: 0]
  Branch (348:45): [True: 0, False: 0]
  Branch (348:45): [True: 6, False: 0]
  Branch (348:45): [True: 0, False: 0]
  Branch (348:45): [True: 102, False: 0]
  Branch (348:45): [True: 0, False: 0]
349
            STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
350
            if (x == last) {
  Branch (350:17): [True: 296, False: 9.32k]
  Branch (350:17): [True: 0, False: 0]
  Branch (350:17): [True: 0, False: 0]
  Branch (350:17): [True: 3, False: 3]
  Branch (350:17): [True: 0, False: 0]
  Branch (350:17): [True: 34, False: 68]
  Branch (350:17): [True: 0, False: 0]
351
                p->gap = len_needle - 1 - i;
352
                break;
353
            }
354
        }
355
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; 
i++42.3k
) {
  Branch (358:28): [True: 28.6k, False: 447]
  Branch (358:28): [True: 0, False: 0]
  Branch (358:28): [True: 0, False: 0]
  Branch (358:28): [True: 10.6k, False: 166]
  Branch (358:28): [True: 0, False: 0]
  Branch (358:28): [True: 3.13k, False: 49]
  Branch (358:28): [True: 0, False: 0]
359
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
                                       Py_ssize_t, SHIFT_TYPE);
361
    }
362
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; 
i++46.8k
) {
  Branch (362:55): [True: 24.3k, False: 447]
  Branch (362:55): [True: 0, False: 0]
  Branch (362:55): [True: 0, False: 0]
  Branch (362:55): [True: 21.7k, False: 166]
  Branch (362:55): [True: 0, False: 0]
  Branch (362:55): [True: 875, False: 49]
  Branch (362:55): [True: 0, False: 0]
363
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
                                            Py_ssize_t, SHIFT_TYPE);
365
        p->table[needle[i] & TABLE_MASK] = shift;
366
    }
367
}
bytes_methods.c:stringlib__preprocess
Line
Count
Source
328
{
329
    p->needle = needle;
330
    p->len_needle = len_needle;
331
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
332
    assert(p->period + p->cut <= len_needle);
333
    p->is_periodic = (0 == memcmp(needle,
334
                                  needle + p->period,
335
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
336
    if (p->is_periodic) {
  Branch (336:9): [True: 138, False: 309]
337
        assert(p->cut <= len_needle/2);
338
        assert(p->cut < p->period);
339
        p->gap = 0; // unused
340
    }
341
    else {
342
        // A lower bound on the period
343
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
        // The gap between the last character and the previous
345
        // occurrence of an equivalent character (modulo TABLE_SIZE)
346
        p->gap = len_needle;
347
        STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
348
        for (Py_ssize_t i = len_needle - 2; i >= 0; 
i--9.32k
) {
  Branch (348:45): [True: 9.62k, False: 13]
349
            STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
350
            if (x == last) {
  Branch (350:17): [True: 296, False: 9.32k]
351
                p->gap = len_needle - 1 - i;
352
                break;
353
            }
354
        }
355
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; 
i++28.6k
) {
  Branch (358:28): [True: 28.6k, False: 447]
359
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
                                       Py_ssize_t, SHIFT_TYPE);
361
    }
362
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; 
i++24.3k
) {
  Branch (362:55): [True: 24.3k, False: 447]
363
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
                                            Py_ssize_t, SHIFT_TYPE);
365
        p->table[needle[i] & TABLE_MASK] = shift;
366
    }
367
}
Unexecuted instantiation: bytearrayobject.c:stringlib__preprocess
Unexecuted instantiation: bytesobject.c:stringlib__preprocess
unicodeobject.c:asciilib__preprocess
Line
Count
Source
328
{
329
    p->needle = needle;
330
    p->len_needle = len_needle;
331
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
332
    assert(p->period + p->cut <= len_needle);
333
    p->is_periodic = (0 == memcmp(needle,
334
                                  needle + p->period,
335
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
336
    if (p->is_periodic) {
  Branch (336:9): [True: 163, False: 3]
337
        assert(p->cut <= len_needle/2);
338
        assert(p->cut < p->period);
339
        p->gap = 0; // unused
340
    }
341
    else {
342
        // A lower bound on the period
343
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
        // The gap between the last character and the previous
345
        // occurrence of an equivalent character (modulo TABLE_SIZE)
346
        p->gap = len_needle;
347
        STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
348
        for (Py_ssize_t i = len_needle - 2; i >= 0; 
i--3
) {
  Branch (348:45): [True: 6, False: 0]
349
            STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
350
            if (x == last) {
  Branch (350:17): [True: 3, False: 3]
351
                p->gap = len_needle - 1 - i;
352
                break;
353
            }
354
        }
355
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; 
i++10.6k
) {
  Branch (358:28): [True: 10.6k, False: 166]
359
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
                                       Py_ssize_t, SHIFT_TYPE);
361
    }
362
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; 
i++21.7k
) {
  Branch (362:55): [True: 21.7k, False: 166]
363
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
                                            Py_ssize_t, SHIFT_TYPE);
365
        p->table[needle[i] & TABLE_MASK] = shift;
366
    }
367
}
Unexecuted instantiation: unicodeobject.c:ucs1lib__preprocess
unicodeobject.c:ucs2lib__preprocess
Line
Count
Source
328
{
329
    p->needle = needle;
330
    p->len_needle = len_needle;
331
    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
332
    assert(p->period + p->cut <= len_needle);
333
    p->is_periodic = (0 == memcmp(needle,
334
                                  needle + p->period,
335
                                  p->cut * STRINGLIB_SIZEOF_CHAR));
336
    if (p->is_periodic) {
  Branch (336:9): [True: 15, False: 34]
337
        assert(p->cut <= len_needle/2);
338
        assert(p->cut < p->period);
339
        p->gap = 0; // unused
340
    }
341
    else {
342
        // A lower bound on the period
343
        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
344
        // The gap between the last character and the previous
345
        // occurrence of an equivalent character (modulo TABLE_SIZE)
346
        p->gap = len_needle;
347
        STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
348
        for (Py_ssize_t i = len_needle - 2; i >= 0; 
i--68
) {
  Branch (348:45): [True: 102, False: 0]
349
            STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
350
            if (x == last) {
  Branch (350:17): [True: 34, False: 68]
351
                p->gap = len_needle - 1 - i;
352
                break;
353
            }
354
        }
355
    }
356
    // Fill up a compressed Boyer-Moore "Bad Character" table
357
    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
358
    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; 
i++3.13k
) {
  Branch (358:28): [True: 3.13k, False: 49]
359
        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
360
                                       Py_ssize_t, SHIFT_TYPE);
361
    }
362
    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; 
i++875
) {
  Branch (362:55): [True: 875, False: 49]
363
        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
364
                                            Py_ssize_t, SHIFT_TYPE);
365
        p->table[needle[i] & TABLE_MASK] = shift;
366
    }
367
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__preprocess
368
369
static Py_ssize_t
370
STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
371
                    STRINGLIB(prework) *p)
372
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
    const Py_ssize_t len_needle = p->len_needle;
376
    const Py_ssize_t cut = p->cut;
377
    Py_ssize_t period = p->period;
378
    const STRINGLIB_CHAR *const needle = p->needle;
379
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
    SHIFT_TYPE *table = p->table;
382
    const STRINGLIB_CHAR *window;
383
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
    if (p->is_periodic) {
  Branch (385:9): [True: 138, False: 309]
  Branch (385:9): [True: 0, False: 0]
  Branch (385:9): [True: 0, False: 0]
  Branch (385:9): [True: 163, False: 3]
  Branch (385:9): [True: 0, False: 0]
  Branch (385:9): [True: 30, False: 75]
  Branch (385:9): [True: 0, False: 0]
386
        LOG("Needle is periodic.\n");
387
        Py_ssize_t memory = 0;
388
      periodicwindowloop:
389
        while (window_last < haystack_end) {
  Branch (389:16): [True: 2.83k, False: 0]
  Branch (389:16): [True: 0, False: 0]
  Branch (389:16): [True: 0, False: 0]
  Branch (389:16): [True: 3.39k, False: 0]
  Branch (389:16): [True: 0, False: 0]
  Branch (389:16): [True: 1.73k, False: 0]
  Branch (389:16): [True: 0, False: 0]
390
            assert(memory == 0);
391
            for (;;) {
392
                LOG_LINEUP();
393
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
394
                window_last += shift;
395
                if (shift == 0) {
  Branch (395:21): [True: 2.83k, False: 6.62k]
  Branch (395:21): [True: 0, False: 0]
  Branch (395:21): [True: 0, False: 0]
  Branch (395:21): [True: 3.39k, False: 8.07k]
  Branch (395:21): [True: 0, False: 0]
  Branch (395:21): [True: 1.73k, False: 14.4k]
  Branch (395:21): [True: 0, False: 0]
396
                    break;
397
                }
398
                if (window_last >= haystack_end) {
  Branch (398:21): [True: 0, False: 6.62k]
  Branch (398:21): [True: 0, False: 0]
  Branch (398:21): [True: 0, False: 0]
  Branch (398:21): [True: 0, False: 8.07k]
  Branch (398:21): [True: 0, False: 0]
  Branch (398:21): [True: 1, False: 14.4k]
  Branch (398:21): [True: 0, False: 0]
399
                    return -1;
400
                }
401
                LOG("Horspool skip");
402
            }
403
          no_shift:
404
            window = window_last - len_needle + 1;
405
            assert((window[len_needle - 1] & TABLE_MASK) ==
406
                   (needle[len_needle - 1] & TABLE_MASK));
407
            Py_ssize_t i = Py_MAX(cut, memory);
408
            for (; i < len_needle; 
i++41.2k
) {
  Branch (408:20): [True: 21.4k, False: 145]
  Branch (408:20): [True: 0, False: 0]
  Branch (408:20): [True: 0, False: 0]
  Branch (408:20): [True: 25.1k, False: 176]
  Branch (408:20): [True: 0, False: 0]
  Branch (408:20): [True: 2.25k, False: 29]
  Branch (408:20): [True: 0, False: 0]
409
                if (needle[i] != window[i]) {
  Branch (409:21): [True: 2.70k, False: 18.7k]
  Branch (409:21): [True: 0, False: 0]
  Branch (409:21): [True: 0, False: 0]
  Branch (409:21): [True: 3.22k, False: 21.9k]
  Branch (409:21): [True: 0, False: 0]
  Branch (409:21): [True: 1.70k, False: 555]
  Branch (409:21): [True: 0, False: 0]
410
                    LOG("Right half does not match.\n");
411
                    window_last += i - cut + 1;
412
                    memory = 0;
413
                    goto periodicwindowloop;
414
                }
415
            }
416
            
for (i = memory; 350
i < cut;
i++2.95k
) {
  Branch (416:30): [True: 1.38k, False: 138]
  Branch (416:30): [True: 0, False: 0]
  Branch (416:30): [True: 0, False: 0]
  Branch (416:30): [True: 1.47k, False: 163]
  Branch (416:30): [True: 0, False: 0]
  Branch (416:30): [True: 116, False: 29]
  Branch (416:30): [True: 0, False: 0]
417
                if (needle[i] != window[i]) {
  Branch (417:21): [True: 7, False: 1.37k]
  Branch (417:21): [True: 0, False: 0]
  Branch (417:21): [True: 0, False: 0]
  Branch (417:21): [True: 13, False: 1.46k]
  Branch (417:21): [True: 0, False: 0]
  Branch (417:21): [True: 0, False: 116]
  Branch (417:21): [True: 0, False: 0]
418
                    LOG("Left half does not match.\n");
419
                    window_last += period;
420
                    memory = len_needle - period;
421
                    if (window_last >= haystack_end) {
  Branch (421:25): [True: 0, False: 7]
  Branch (421:25): [True: 0, False: 0]
  Branch (421:25): [True: 0, False: 0]
  Branch (421:25): [True: 0, False: 13]
  Branch (421:25): [True: 0, False: 0]
  Branch (421:25): [True: 0, False: 0]
  Branch (421:25): [True: 0, False: 0]
422
                        return -1;
423
                    }
424
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
425
                    if (shift) {
  Branch (425:25): [True: 0, False: 7]
  Branch (425:25): [True: 0, False: 0]
  Branch (425:25): [True: 0, False: 0]
  Branch (425:25): [True: 0, False: 13]
  Branch (425:25): [True: 0, False: 0]
  Branch (425:25): [True: 0, False: 0]
  Branch (425:25): [True: 0, False: 0]
426
                        // A mismatch has been identified to the right
427
                        // of where i will next start, so we can jump
428
                        // at least as far as if the mismatch occurred
429
                        // on the first comparison.
430
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
431
                        LOG("Skip with Memory.\n");
432
                        memory = 0;
433
                        window_last += Py_MAX(shift, mem_jump);
434
                        goto periodicwindowloop;
435
                    }
436
                    goto no_shift;
437
                }
438
            }
439
            LOG("Found a match!\n");
440
            return window - haystack;
441
        }
442
    }
443
    else {
444
        Py_ssize_t gap = p->gap;
445
        period = Py_MAX(gap, period);
446
        LOG("Needle is not periodic.\n");
447
        Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
448
      windowloop:
449
        while (window_last < haystack_end) {
  Branch (449:16): [True: 34.6k, False: 0]
  Branch (449:16): [True: 0, False: 0]
  Branch (449:16): [True: 0, False: 0]
  Branch (449:16): [True: 14, False: 0]
  Branch (449:16): [True: 0, False: 0]
  Branch (449:16): [True: 2.51k, False: 0]
  Branch (449:16): [True: 0, False: 0]
450
            for (;;) {
451
                LOG_LINEUP();
452
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
453
                window_last += shift;
454
                if (shift == 0) {
  Branch (454:21): [True: 34.4k, False: 2.10M]
  Branch (454:21): [True: 0, False: 0]
  Branch (454:21): [True: 0, False: 0]
  Branch (454:21): [True: 14, False: 770]
  Branch (454:21): [True: 0, False: 0]
  Branch (454:21): [True: 2.51k, False: 18.8k]
  Branch (454:21): [True: 0, False: 0]
455
                    break;
456
                }
457
                if (window_last >= haystack_end) {
  Branch (457:21): [True: 178, False: 2.10M]
  Branch (457:21): [True: 0, False: 0]
  Branch (457:21): [True: 0, False: 0]
  Branch (457:21): [True: 0, False: 770]
  Branch (457:21): [True: 0, False: 0]
  Branch (457:21): [True: 2, False: 18.8k]
  Branch (457:21): [True: 0, False: 0]
458
                    return -1;
459
                }
460
                LOG("Horspool skip");
461
            }
462
            window = window_last - len_needle + 1;
463
            assert((window[len_needle - 1] & TABLE_MASK) ==
464
                   (needle[len_needle - 1] & TABLE_MASK));
465
            for (Py_ssize_t i = cut; i < gap_jump_end; 
i++12.6k
) {
  Branch (465:38): [True: 43.6k, False: 2.03k]
  Branch (465:38): [True: 0, False: 0]
  Branch (465:38): [True: 0, False: 0]
  Branch (465:38): [True: 17, False: 3]
  Branch (465:38): [True: 0, False: 0]
  Branch (465:38): [True: 3.50k, False: 431]
  Branch (465:38): [True: 0, False: 0]
466
                if (needle[i] != window[i]) {
  Branch (466:21): [True: 32.4k, False: 11.2k]
  Branch (466:21): [True: 0, False: 0]
  Branch (466:21): [True: 0, False: 0]
  Branch (466:21): [True: 11, False: 6]
  Branch (466:21): [True: 0, False: 0]
  Branch (466:21): [True: 2.08k, False: 1.42k]
  Branch (466:21): [True: 0, False: 0]
467
                    LOG("Early right half mismatch: jump by gap.\n");
468
                    assert(gap >= i - cut + 1);
469
                    window_last += gap;
470
                    goto windowloop;
471
                }
472
            }
473
            
for (Py_ssize_t i = gap_jump_end; 2.46k
i < len_needle;
i++3.71k
) {
  Branch (473:47): [True: 2.90k, False: 188]
  Branch (473:47): [True: 0, False: 0]
  Branch (473:47): [True: 0, False: 0]
  Branch (473:47): [True: 788, False: 3]
  Branch (473:47): [True: 0, False: 0]
  Branch (473:47): [True: 2.11k, False: 177]
  Branch (473:47): [True: 0, False: 0]
474
                if (needle[i] != window[i]) {
  Branch (474:21): [True: 1.84k, False: 1.06k]
  Branch (474:21): [True: 0, False: 0]
  Branch (474:21): [True: 0, False: 0]
  Branch (474:21): [True: 0, False: 788]
  Branch (474:21): [True: 0, False: 0]
  Branch (474:21): [True: 254, False: 1.86k]
  Branch (474:21): [True: 0, False: 0]
475
                    LOG("Late right half mismatch.\n");
476
                    assert(i - cut + 1 > gap);
477
                    window_last += i - cut + 1;
478
                    goto windowloop;
479
                }
480
            }
481
            
for (Py_ssize_t i = 0; 368
i < cut;
i++11.1k
) {
  Branch (481:36): [True: 8.22k, False: 131]
  Branch (481:36): [True: 0, False: 0]
  Branch (481:36): [True: 0, False: 0]
  Branch (481:36): [True: 2.55k, False: 3]
  Branch (481:36): [True: 0, False: 0]
  Branch (481:36): [True: 478, False: 73]
  Branch (481:36): [True: 0, False: 0]
482
                if (needle[i] != window[i]) {
  Branch (482:21): [True: 57, False: 8.17k]
  Branch (482:21): [True: 0, False: 0]
  Branch (482:21): [True: 0, False: 0]
  Branch (482:21): [True: 0, False: 2.55k]
  Branch (482:21): [True: 0, False: 0]
  Branch (482:21): [True: 104, False: 374]
  Branch (482:21): [True: 0, False: 0]
483
                    LOG("Left half does not match.\n");
484
                    window_last += period;
485
                    goto windowloop;
486
                }
487
            }
488
            LOG("Found a match!\n");
489
            return window - haystack;
490
        }
491
    }
492
    LOG("Not found. Returning -1.\n");
493
    return -1;
494
}
bytes_methods.c:stringlib__two_way
Line
Count
Source
372
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
    const Py_ssize_t len_needle = p->len_needle;
376
    const Py_ssize_t cut = p->cut;
377
    Py_ssize_t period = p->period;
378
    const STRINGLIB_CHAR *const needle = p->needle;
379
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
    SHIFT_TYPE *table = p->table;
382
    const STRINGLIB_CHAR *window;
383
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
    if (p->is_periodic) {
  Branch (385:9): [True: 138, False: 309]
386
        LOG("Needle is periodic.\n");
387
        Py_ssize_t memory = 0;
388
      periodicwindowloop:
389
        while (window_last < haystack_end) {
  Branch (389:16): [True: 2.83k, False: 0]
390
            assert(memory == 0);
391
            for (;;) {
392
                LOG_LINEUP();
393
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
394
                window_last += shift;
395
                if (shift == 0) {
  Branch (395:21): [True: 2.83k, False: 6.62k]
396
                    break;
397
                }
398
                if (window_last >= haystack_end) {
  Branch (398:21): [True: 0, False: 6.62k]
399
                    return -1;
400
                }
401
                LOG("Horspool skip");
402
            }
403
          no_shift:
404
            window = window_last - len_needle + 1;
405
            assert((window[len_needle - 1] & TABLE_MASK) ==
406
                   (needle[len_needle - 1] & TABLE_MASK));
407
            Py_ssize_t i = Py_MAX(cut, memory);
408
            for (; i < len_needle; 
i++18.7k
) {
  Branch (408:20): [True: 21.4k, False: 145]
409
                if (needle[i] != window[i]) {
  Branch (409:21): [True: 2.70k, False: 18.7k]
410
                    LOG("Right half does not match.\n");
411
                    window_last += i - cut + 1;
412
                    memory = 0;
413
                    goto periodicwindowloop;
414
                }
415
            }
416
            
for (i = memory; 145
i < cut;
i++1.37k
) {
  Branch (416:30): [True: 1.38k, False: 138]
417
                if (needle[i] != window[i]) {
  Branch (417:21): [True: 7, False: 1.37k]
418
                    LOG("Left half does not match.\n");
419
                    window_last += period;
420
                    memory = len_needle - period;
421
                    if (window_last >= haystack_end) {
  Branch (421:25): [True: 0, False: 7]
422
                        return -1;
423
                    }
424
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
425
                    if (shift) {
  Branch (425:25): [True: 0, False: 7]
426
                        // A mismatch has been identified to the right
427
                        // of where i will next start, so we can jump
428
                        // at least as far as if the mismatch occurred
429
                        // on the first comparison.
430
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
431
                        LOG("Skip with Memory.\n");
432
                        memory = 0;
433
                        window_last += Py_MAX(shift, mem_jump);
434
                        goto periodicwindowloop;
435
                    }
436
                    goto no_shift;
437
                }
438
            }
439
            LOG("Found a match!\n");
440
            return window - haystack;
441
        }
442
    }
443
    else {
444
        Py_ssize_t gap = p->gap;
445
        period = Py_MAX(gap, period);
446
        LOG("Needle is not periodic.\n");
447
        Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
448
      windowloop:
449
        while (window_last < haystack_end) {
  Branch (449:16): [True: 34.6k, False: 0]
450
            for (;;) {
451
                LOG_LINEUP();
452
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
453
                window_last += shift;
454
                if (shift == 0) {
  Branch (454:21): [True: 34.4k, False: 2.10M]
455
                    break;
456
                }
457
                if (window_last >= haystack_end) {
  Branch (457:21): [True: 178, False: 2.10M]
458
                    return -1;
459
                }
460
                LOG("Horspool skip");
461
            }
462
            window = window_last - len_needle + 1;
463
            assert((window[len_needle - 1] & TABLE_MASK) ==
464
                   (needle[len_needle - 1] & TABLE_MASK));
465
            for (Py_ssize_t i = cut; i < gap_jump_end; 
i++11.2k
) {
  Branch (465:38): [True: 43.6k, False: 2.03k]
466
                if (needle[i] != window[i]) {
  Branch (466:21): [True: 32.4k, False: 11.2k]
467
                    LOG("Early right half mismatch: jump by gap.\n");
468
                    assert(gap >= i - cut + 1);
469
                    window_last += gap;
470
                    goto windowloop;
471
                }
472
            }
473
            
for (Py_ssize_t i = gap_jump_end; 2.03k
i < len_needle;
i++1.06k
) {
  Branch (473:47): [True: 2.90k, False: 188]
474
                if (needle[i] != window[i]) {
  Branch (474:21): [True: 1.84k, False: 1.06k]
475
                    LOG("Late right half mismatch.\n");
476
                    assert(i - cut + 1 > gap);
477
                    window_last += i - cut + 1;
478
                    goto windowloop;
479
                }
480
            }
481
            
for (Py_ssize_t i = 0; 188
i < cut;
i++8.17k
) {
  Branch (481:36): [True: 8.22k, False: 131]
482
                if (needle[i] != window[i]) {
  Branch (482:21): [True: 57, False: 8.17k]
483
                    LOG("Left half does not match.\n");
484
                    window_last += period;
485
                    goto windowloop;
486
                }
487
            }
488
            LOG("Found a match!\n");
489
            return window - haystack;
490
        }
491
    }
492
    LOG("Not found. Returning -1.\n");
493
    return -1;
494
}
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way
Unexecuted instantiation: bytesobject.c:stringlib__two_way
unicodeobject.c:asciilib__two_way
Line
Count
Source
372
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
    const Py_ssize_t len_needle = p->len_needle;
376
    const Py_ssize_t cut = p->cut;
377
    Py_ssize_t period = p->period;
378
    const STRINGLIB_CHAR *const needle = p->needle;
379
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
    SHIFT_TYPE *table = p->table;
382
    const STRINGLIB_CHAR *window;
383
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
    if (p->is_periodic) {
  Branch (385:9): [True: 163, False: 3]
386
        LOG("Needle is periodic.\n");
387
        Py_ssize_t memory = 0;
388
      periodicwindowloop:
389
        while (window_last < haystack_end) {
  Branch (389:16): [True: 3.39k, False: 0]
390
            assert(memory == 0);
391
            for (;;) {
392
                LOG_LINEUP();
393
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
394
                window_last += shift;
395
                if (shift == 0) {
  Branch (395:21): [True: 3.39k, False: 8.07k]
396
                    break;
397
                }
398
                if (window_last >= haystack_end) {
  Branch (398:21): [True: 0, False: 8.07k]
399
                    return -1;
400
                }
401
                LOG("Horspool skip");
402
            }
403
          no_shift:
404
            window = window_last - len_needle + 1;
405
            assert((window[len_needle - 1] & TABLE_MASK) ==
406
                   (needle[len_needle - 1] & TABLE_MASK));
407
            Py_ssize_t i = Py_MAX(cut, memory);
408
            for (; i < len_needle; 
i++21.9k
) {
  Branch (408:20): [True: 25.1k, False: 176]
409
                if (needle[i] != window[i]) {
  Branch (409:21): [True: 3.22k, False: 21.9k]
410
                    LOG("Right half does not match.\n");
411
                    window_last += i - cut + 1;
412
                    memory = 0;
413
                    goto periodicwindowloop;
414
                }
415
            }
416
            
for (i = memory; 176
i < cut;
i++1.46k
) {
  Branch (416:30): [True: 1.47k, False: 163]
417
                if (needle[i] != window[i]) {
  Branch (417:21): [True: 13, False: 1.46k]
418
                    LOG("Left half does not match.\n");
419
                    window_last += period;
420
                    memory = len_needle - period;
421
                    if (window_last >= haystack_end) {
  Branch (421:25): [True: 0, False: 13]
422
                        return -1;
423
                    }
424
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
425
                    if (shift) {
  Branch (425:25): [True: 0, False: 13]
426
                        // A mismatch has been identified to the right
427
                        // of where i will next start, so we can jump
428
                        // at least as far as if the mismatch occurred
429
                        // on the first comparison.
430
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
431
                        LOG("Skip with Memory.\n");
432
                        memory = 0;
433
                        window_last += Py_MAX(shift, mem_jump);
434
                        goto periodicwindowloop;
435
                    }
436
                    goto no_shift;
437
                }
438
            }
439
            LOG("Found a match!\n");
440
            return window - haystack;
441
        }
442
    }
443
    else {
444
        Py_ssize_t gap = p->gap;
445
        period = Py_MAX(gap, period);
446
        LOG("Needle is not periodic.\n");
447
        Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
448
      windowloop:
449
        while (window_last < haystack_end) {
  Branch (449:16): [True: 14, False: 0]
450
            for (;;) {
451
                LOG_LINEUP();
452
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
453
                window_last += shift;
454
                if (shift == 0) {
  Branch (454:21): [True: 14, False: 770]
455
                    break;
456
                }
457
                if (window_last >= haystack_end) {
  Branch (457:21): [True: 0, False: 770]
458
                    return -1;
459
                }
460
                LOG("Horspool skip");
461
            }
462
            window = window_last - len_needle + 1;
463
            assert((window[len_needle - 1] & TABLE_MASK) ==
464
                   (needle[len_needle - 1] & TABLE_MASK));
465
            for (Py_ssize_t i = cut; i < gap_jump_end; 
i++6
) {
  Branch (465:38): [True: 17, False: 3]
466
                if (needle[i] != window[i]) {
  Branch (466:21): [True: 11, False: 6]
467
                    LOG("Early right half mismatch: jump by gap.\n");
468
                    assert(gap >= i - cut + 1);
469
                    window_last += gap;
470
                    goto windowloop;
471
                }
472
            }
473
            
for (Py_ssize_t i = gap_jump_end; 3
i < len_needle;
i++788
) {
  Branch (473:47): [True: 788, False: 3]
474
                if (needle[i] != window[i]) {
  Branch (474:21): [True: 0, False: 788]
475
                    LOG("Late right half mismatch.\n");
476
                    assert(i - cut + 1 > gap);
477
                    window_last += i - cut + 1;
478
                    goto windowloop;
479
                }
480
            }
481
            
for (Py_ssize_t i = 0; 3
i < cut;
i++2.55k
) {
  Branch (481:36): [True: 2.55k, False: 3]
482
                if (needle[i] != window[i]) {
  Branch (482:21): [True: 0, False: 2.55k]
483
                    LOG("Left half does not match.\n");
484
                    window_last += period;
485
                    goto windowloop;
486
                }
487
            }
488
            LOG("Found a match!\n");
489
            return window - haystack;
490
        }
491
    }
492
    LOG("Not found. Returning -1.\n");
493
    return -1;
494
}
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way
unicodeobject.c:ucs2lib__two_way
Line
Count
Source
372
{
373
    // Crochemore and Perrin's (1991) Two-Way algorithm.
374
    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
375
    const Py_ssize_t len_needle = p->len_needle;
376
    const Py_ssize_t cut = p->cut;
377
    Py_ssize_t period = p->period;
378
    const STRINGLIB_CHAR *const needle = p->needle;
379
    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
380
    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
381
    SHIFT_TYPE *table = p->table;
382
    const STRINGLIB_CHAR *window;
383
    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
384
385
    if (p->is_periodic) {
  Branch (385:9): [True: 30, False: 75]
386
        LOG("Needle is periodic.\n");
387
        Py_ssize_t memory = 0;
388
      periodicwindowloop:
389
        while (window_last < haystack_end) {
  Branch (389:16): [True: 1.73k, False: 0]
390
            assert(memory == 0);
391
            for (;;) {
392
                LOG_LINEUP();
393
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
394
                window_last += shift;
395
                if (shift == 0) {
  Branch (395:21): [True: 1.73k, False: 14.4k]
396
                    break;
397
                }
398
                if (window_last >= haystack_end) {
  Branch (398:21): [True: 1, False: 14.4k]
399
                    return -1;
400
                }
401
                LOG("Horspool skip");
402
            }
403
          no_shift:
404
            window = window_last - len_needle + 1;
405
            assert((window[len_needle - 1] & TABLE_MASK) ==
406
                   (needle[len_needle - 1] & TABLE_MASK));
407
            Py_ssize_t i = Py_MAX(cut, memory);
408
            for (; i < len_needle; 
i++555
) {
  Branch (408:20): [True: 2.25k, False: 29]
409
                if (needle[i] != window[i]) {
  Branch (409:21): [True: 1.70k, False: 555]
410
                    LOG("Right half does not match.\n");
411
                    window_last += i - cut + 1;
412
                    memory = 0;
413
                    goto periodicwindowloop;
414
                }
415
            }
416
            
for (i = memory; 29
i < cut;
i++116
) {
  Branch (416:30): [True: 116, False: 29]
417
                if (needle[i] != window[i]) {
  Branch (417:21): [True: 0, False: 116]
418
                    LOG("Left half does not match.\n");
419
                    window_last += period;
420
                    memory = len_needle - period;
421
                    if (window_last >= haystack_end) {
  Branch (421:25): [True: 0, False: 0]
422
                        return -1;
423
                    }
424
                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
425
                    if (shift) {
  Branch (425:25): [True: 0, False: 0]
426
                        // A mismatch has been identified to the right
427
                        // of where i will next start, so we can jump
428
                        // at least as far as if the mismatch occurred
429
                        // on the first comparison.
430
                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
431
                        LOG("Skip with Memory.\n");
432
                        memory = 0;
433
                        window_last += Py_MAX(shift, mem_jump);
434
                        goto periodicwindowloop;
435
                    }
436
                    goto no_shift;
437
                }
438
            }
439
            LOG("Found a match!\n");
440
            return window - haystack;
441
        }
442
    }
443
    else {
444
        Py_ssize_t gap = p->gap;
445
        period = Py_MAX(gap, period);
446
        LOG("Needle is not periodic.\n");
447
        Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
448
      windowloop:
449
        while (window_last < haystack_end) {
  Branch (449:16): [True: 2.51k, False: 0]
450
            for (;;) {
451
                LOG_LINEUP();
452
                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
453
                window_last += shift;
454
                if (shift == 0) {
  Branch (454:21): [True: 2.51k, False: 18.8k]
455
                    break;
456
                }
457
                if (window_last >= haystack_end) {
  Branch (457:21): [True: 2, False: 18.8k]
458
                    return -1;
459
                }
460
                LOG("Horspool skip");
461
            }
462
            window = window_last - len_needle + 1;
463
            assert((window[len_needle - 1] & TABLE_MASK) ==
464
                   (needle[len_needle - 1] & TABLE_MASK));
465
            for (Py_ssize_t i = cut; i < gap_jump_end; 
i++1.42k
) {
  Branch (465:38): [True: 3.50k, False: 431]
466
                if (needle[i] != window[i]) {
  Branch (466:21): [True: 2.08k, False: 1.42k]
467
                    LOG("Early right half mismatch: jump by gap.\n");
468
                    assert(gap >= i - cut + 1);
469
                    window_last += gap;
470
                    goto windowloop;
471
                }
472
            }
473
            
for (Py_ssize_t i = gap_jump_end; 431
i < len_needle;
i++1.86k
) {
  Branch (473:47): [True: 2.11k, False: 177]
474
                if (needle[i] != window[i]) {
  Branch (474:21): [True: 254, False: 1.86k]
475
                    LOG("Late right half mismatch.\n");
476
                    assert(i - cut + 1 > gap);
477
                    window_last += i - cut + 1;
478
                    goto windowloop;
479
                }
480
            }
481
            
for (Py_ssize_t i = 0; 177
i < cut;
i++374
) {
  Branch (481:36): [True: 478, False: 73]
482
                if (needle[i] != window[i]) {
  Branch (482:21): [True: 104, False: 374]
483
                    LOG("Left half does not match.\n");
484
                    window_last += period;
485
                    goto windowloop;
486
                }
487
            }
488
            LOG("Found a match!\n");
489
            return window - haystack;
490
        }
491
    }
492
    LOG("Not found. Returning -1.\n");
493
    return -1;
494
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way
495
496
497
static Py_ssize_t
498
STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
499
                         Py_ssize_t len_haystack,
500
                         const STRINGLIB_CHAR *needle,
501
                         Py_ssize_t len_needle)
502
{
503
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
504
    STRINGLIB(prework) p;
505
    STRINGLIB(_preprocess)(needle, len_needle, &p);
506
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
507
}
bytes_methods.c:stringlib__two_way_find
Line
Count
Source
502
{
503
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
504
    STRINGLIB(prework) p;
505
    STRINGLIB(_preprocess)(needle, len_needle, &p);
506
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
507
}
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_find
Unexecuted instantiation: bytesobject.c:stringlib__two_way_find
unicodeobject.c:asciilib__two_way_find
Line
Count
Source
502
{
503
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
504
    STRINGLIB(prework) p;
505
    STRINGLIB(_preprocess)(needle, len_needle, &p);
506
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
507
}
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_find
unicodeobject.c:ucs2lib__two_way_find
Line
Count
Source
502
{
503
    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
504
    STRINGLIB(prework) p;
505
    STRINGLIB(_preprocess)(needle, len_needle, &p);
506
    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
507
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_find
508
509
510
static Py_ssize_t
511
STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
512
                          Py_ssize_t len_haystack,
513
                          const STRINGLIB_CHAR *needle,
514
                          Py_ssize_t len_needle,
515
                          Py_ssize_t maxcount)
516
{
517
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
518
    STRINGLIB(prework) p;
519
    STRINGLIB(_preprocess)(needle, len_needle, &p);
520
    Py_ssize_t index = 0, count = 0;
521
    while (1) {
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
  Branch (521:12): [Folded - Ignored]
522
        Py_ssize_t result;
523
        result = STRINGLIB(_two_way)(haystack + index,
524
                                     len_haystack - index, &p);
525
        if (result == -1) {
  Branch (525:13): [True: 0, False: 0]
  Branch (525:13): [True: 0, False: 0]
  Branch (525:13): [True: 0, False: 0]
  Branch (525:13): [True: 0, False: 0]
  Branch (525:13): [True: 0, False: 0]
  Branch (525:13): [True: 3, False: 56]
  Branch (525:13): [True: 0, False: 0]
526
            return count;
527
        }
528
        count++;
529
        if (count == maxcount) {
  Branch (529:13): [True: 0, False: 0]
  Branch (529:13): [True: 0, False: 0]
  Branch (529:13): [True: 0, False: 0]
  Branch (529:13): [True: 0, False: 0]
  Branch (529:13): [True: 0, False: 0]
  Branch (529:13): [True: 0, False: 56]
  Branch (529:13): [True: 0, False: 0]
530
            return maxcount;
531
        }
532
        index += result + len_needle;
533
    }
534
    return count;
535
}
Unexecuted instantiation: bytes_methods.c:stringlib__two_way_count
Unexecuted instantiation: bytearrayobject.c:stringlib__two_way_count
Unexecuted instantiation: bytesobject.c:stringlib__two_way_count
Unexecuted instantiation: unicodeobject.c:asciilib__two_way_count
Unexecuted instantiation: unicodeobject.c:ucs1lib__two_way_count
unicodeobject.c:ucs2lib__two_way_count
Line
Count
Source
516
{
517
    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
518
    STRINGLIB(prework) p;
519
    STRINGLIB(_preprocess)(needle, len_needle, &p);
520
    Py_ssize_t index = 0, count = 0;
521
    while (1) {
  Branch (521:12): [Folded - Ignored]
522
        Py_ssize_t result;
523
        result = STRINGLIB(_two_way)(haystack + index,
524
                                     len_haystack - index, &p);
525
        if (result == -1) {
  Branch (525:13): [True: 3, False: 56]
526
            return count;
527
        }
528
        count++;
529
        if (count == maxcount) {
  Branch (529:13): [True: 0, False: 56]
530
            return maxcount;
531
        }
532
        index += result + len_needle;
533
    }
534
    return count;
535
}
Unexecuted instantiation: unicodeobject.c:ucs4lib__two_way_count
536
537
#undef SHIFT_TYPE
538
#undef NOT_FOUND
539
#undef SHIFT_OVERFLOW
540
#undef TABLE_SIZE_BITS
541
#undef TABLE_SIZE
542
#undef TABLE_MASK
543
544
#undef LOG
545
#undef LOG_STRING
546
#undef LOG_LINEUP
547
548
static inline Py_ssize_t
549
STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
550
                        const STRINGLIB_CHAR* p, Py_ssize_t m,
551
                        Py_ssize_t maxcount, int mode)
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++10.8M
) {
  Branch (560:28): [True: 2.58M, False: 685k]
  Branch (560:28): [True: 214k, False: 46.5k]
  Branch (560:28): [True: 219k, False: 48.6k]
  Branch (560:28): [True: 3.01M, False: 1.53M]
  Branch (560:28): [True: 4.75M, False: 1.50M]
  Branch (560:28): [True: 3.12k, False: 2.59k]
  Branch (560:28): [True: 182, False: 100]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 921k, False: 1.66M]
  Branch (562:13): [True: 107k, False: 107k]
  Branch (562:13): [True: 107k, False: 112k]
  Branch (562:13): [True: 938k, False: 2.07M]
  Branch (562:13): [True: 665k, False: 4.09M]
  Branch (562:13): [True: 145, False: 2.97k]
  Branch (562:13): [True: 103, False: 79]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++35.3M
) {
  Branch (568:28): [True: 18.4M, False: 663k]
  Branch (568:28): [True: 74.3k, False: 42.7k]
  Branch (568:28): [True: 183k, False: 44.0k]
  Branch (568:28): [True: 14.2M, False: 1.03M]
  Branch (568:28): [True: 3.02M, False: 1.45M]
  Branch (568:28): [True: 13.1k, False: 2.50k]
  Branch (568:28): [True: 652, False: 61]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 563k, False: 17.8M]
  Branch (569:13): [True: 39.5k, False: 34.8k]
  Branch (569:13): [True: 47.2k, False: 136k]
  Branch (569:13): [True: 1.33M, False: 12.8M]
  Branch (569:13): [True: 376k, False: 2.64M]
  Branch (569:13): [True: 570, False: 12.5k]
  Branch (569:13): [True: 76, False: 576]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++2.00M
) {
  Branch (572:25): [True: 863k, False: 29.1k]
  Branch (572:25): [True: 74.4k, False: 7.65k]
  Branch (572:25): [True: 83.0k, False: 8.67k]
  Branch (572:25): [True: 2.07M, False: 518k]
  Branch (572:25): [True: 633k, False: 73.6k]
  Branch (572:25): [True: 894, False: 139]
  Branch (572:25): [True: 77, False: 74]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 534k, False: 329k]
  Branch (573:21): [True: 31.8k, False: 42.5k]
  Branch (573:21): [True: 38.5k, False: 44.4k]
  Branch (573:21): [True: 818k, False: 1.25M]
  Branch (573:21): [True: 303k, False: 330k]
  Branch (573:21): [True: 431, False: 463]
  Branch (573:21): [True: 2, False: 75]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 29.1k, False: 534k]
  Branch (577:17): [True: 7.65k, False: 31.8k]
  Branch (577:17): [True: 8.67k, False: 38.5k]
  Branch (577:17): [True: 518k, False: 818k]
  Branch (577:17): [True: 73.6k, False: 303k]
  Branch (577:17): [True: 139, False: 431]
  Branch (577:17): [True: 74, False: 2]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 21.6k, False: 7.44k]
  Branch (579:21): [True: 3.88k, False: 3.77k]
  Branch (579:21): [True: 4.62k, False: 4.04k]
  Branch (579:21): [True: 499k, False: 19.0k]
  Branch (579:21): [True: 53.6k, False: 19.9k]
  Branch (579:21): [True: 87, False: 52]
  Branch (579:21): [True: 39, False: 35]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 0, False: 7.44k]
  Branch (583:21): [True: 5, False: 3.77k]
  Branch (583:21): [True: 5, False: 4.04k]
  Branch (583:21): [True: 14, False: 19.0k]
  Branch (583:21): [True: 0, False: 19.9k]
  Branch (583:21): [True: 0, False: 52]
  Branch (583:21): [True: 0, False: 35]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 420k, False: 113k]
  Branch (590:17): [True: 17.7k, False: 14.0k]
  Branch (590:17): [True: 23.1k, False: 15.4k]
  Branch (590:17): [True: 423k, False: 395k]
  Branch (590:17): [True: 227k, False: 75.6k]
  Branch (590:17): [True: 371, False: 60]
  Branch (590:17): [True: 0, False: 2]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 16.5M, False: 1.29M]
  Branch (599:17): [True: 18.3k, False: 16.5k]
  Branch (599:17): [True: 113k, False: 23.4k]
  Branch (599:17): [True: 11.7M, False: 1.07M]
  Branch (599:17): [True: 2.26M, False: 379k]
  Branch (599:17): [True: 12.0k, False: 565]
  Branch (599:17): [True: 546, False: 30]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count654k
:
-12.58M
;
  Branch (604:12): [True: 85.3k, False: 578k]
  Branch (604:12): [True: 42.7k, False: 10]
  Branch (604:12): [True: 43.8k, False: 178]
  Branch (604:12): [True: 379k, False: 655k]
  Branch (604:12): [True: 101k, False: 1.35M]
  Branch (604:12): [True: 2.45k, False: 47]
  Branch (604:12): [True: 35, False: 26]
605
}
bytes_methods.c:stringlib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++2.58M
) {
  Branch (560:28): [True: 2.58M, False: 685k]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 921k, False: 1.66M]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++18.4M
) {
  Branch (568:28): [True: 18.4M, False: 663k]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 563k, False: 17.8M]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++329k
) {
  Branch (572:25): [True: 863k, False: 29.1k]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 534k, False: 329k]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 29.1k, False: 534k]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 21.6k, False: 7.44k]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 0, False: 7.44k]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 420k, False: 113k]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 16.5M, False: 1.29M]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count85.3k
:
-1578k
;
  Branch (604:12): [True: 85.3k, False: 578k]
605
}
bytearrayobject.c:stringlib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++214k
) {
  Branch (560:28): [True: 214k, False: 46.5k]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 107k, False: 107k]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++70.4k
) {
  Branch (568:28): [True: 74.3k, False: 42.7k]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 39.5k, False: 34.8k]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++42.5k
) {
  Branch (572:25): [True: 74.4k, False: 7.65k]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 31.8k, False: 42.5k]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 7.65k, False: 31.8k]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 3.88k, False: 3.77k]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 5, False: 3.77k]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 17.7k, False: 14.0k]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 18.3k, False: 16.5k]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count42.7k
:
-110
;
  Branch (604:12): [True: 42.7k, False: 10]
605
}
bytesobject.c:stringlib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++219k
) {
  Branch (560:28): [True: 219k, False: 48.6k]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 107k, False: 112k]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++179k
) {
  Branch (568:28): [True: 183k, False: 44.0k]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 47.2k, False: 136k]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++44.4k
) {
  Branch (572:25): [True: 83.0k, False: 8.67k]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 38.5k, False: 44.4k]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 8.67k, False: 38.5k]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 4.62k, False: 4.04k]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 5, False: 4.04k]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 23.1k, False: 15.4k]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 113k, False: 23.4k]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count43.8k
:
-1178
;
  Branch (604:12): [True: 43.8k, False: 178]
605
}
unicodeobject.c:asciilib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++3.01M
) {
  Branch (560:28): [True: 3.01M, False: 1.53M]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 938k, False: 2.07M]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++13.7M
) {
  Branch (568:28): [True: 14.2M, False: 1.03M]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 1.33M, False: 12.8M]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++1.25M
) {
  Branch (572:25): [True: 2.07M, False: 518k]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 818k, False: 1.25M]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 518k, False: 818k]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 499k, False: 19.0k]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 14, False: 19.0k]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 423k, False: 395k]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 11.7M, False: 1.07M]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count379k
:
-1655k
;
  Branch (604:12): [True: 379k, False: 655k]
605
}
unicodeobject.c:ucs1lib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++4.75M
) {
  Branch (560:28): [True: 4.75M, False: 1.50M]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 665k, False: 4.09M]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++2.96M
) {
  Branch (568:28): [True: 3.02M, False: 1.45M]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 376k, False: 2.64M]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++330k
) {
  Branch (572:25): [True: 633k, False: 73.6k]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 303k, False: 330k]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 73.6k, False: 303k]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 53.6k, False: 19.9k]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 0, False: 19.9k]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 227k, False: 75.6k]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 2.26M, False: 379k]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count101k
:
-11.35M
;
  Branch (604:12): [True: 101k, False: 1.35M]
605
}
unicodeobject.c:ucs2lib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++3.12k
) {
  Branch (560:28): [True: 3.12k, False: 2.59k]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 145, False: 2.97k]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++13.0k
) {
  Branch (568:28): [True: 13.1k, False: 2.50k]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 570, False: 12.5k]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++463
) {
  Branch (572:25): [True: 894, False: 139]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 431, False: 463]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 139, False: 431]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 87, False: 52]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 0, False: 52]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 371, False: 60]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 12.0k, False: 565]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count2.45k
:
-147
;
  Branch (604:12): [True: 2.45k, False: 47]
605
}
unicodeobject.c:ucs4lib_default_find
Line
Count
Source
552
{
553
    const Py_ssize_t w = n - m;
554
    Py_ssize_t mlast = m - 1, count = 0;
555
    Py_ssize_t gap = mlast;
556
    const STRINGLIB_CHAR last = p[mlast];
557
    const STRINGLIB_CHAR *const ss = &s[mlast];
558
559
    unsigned long mask = 0;
560
    for (Py_ssize_t i = 0; i < mlast; 
i++182
) {
  Branch (560:28): [True: 182, False: 100]
561
        STRINGLIB_BLOOM_ADD(mask, p[i]);
562
        if (p[i] == last) {
  Branch (562:13): [True: 103, False: 79]
563
            gap = mlast - i - 1;
564
        }
565
    }
566
    STRINGLIB_BLOOM_ADD(mask, last);
567
568
    for (Py_ssize_t i = 0; i <= w; 
i++613
) {
  Branch (568:28): [True: 652, False: 61]
569
        if (ss[i] == last) {
  Branch (569:13): [True: 76, False: 576]
570
            /* candidate match */
571
            Py_ssize_t j;
572
            for (j = 0; j < mlast; 
j++75
) {
  Branch (572:25): [True: 77, False: 74]
573
                if (s[i+j] != p[j]) {
  Branch (573:21): [True: 2, False: 75]
574
                    break;
575
                }
576
            }
577
            if (j == mlast) {
  Branch (577:17): [True: 74, False: 2]
578
                /* got a match! */
579
                if (mode != FAST_COUNT) {
  Branch (579:21): [True: 39, False: 35]
580
                    return i;
581
                }
582
                count++;
583
                if (count == maxcount) {
  Branch (583:21): [True: 0, False: 35]
584
                    return maxcount;
585
                }
586
                i = i + mlast;
587
                continue;
588
            }
589
            /* miss: check if next character is part of pattern */
590
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (590:17): [True: 0, False: 2]
591
                i = i + m;
592
            }
593
            else {
594
                i = i + gap;
595
            }
596
        }
597
        else {
598
            /* skip: check if next character is part of pattern */
599
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (599:17): [True: 546, False: 30]
600
                i = i + m;
601
            }
602
        }
603
    }
604
    return mode == FAST_COUNT ? 
count35
:
-126
;
  Branch (604:12): [True: 35, False: 26]
605
}
606
607
608
static Py_ssize_t
609
STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
610
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
611
                         Py_ssize_t maxcount, int mode)
612
{
613
    const Py_ssize_t w = n - m;
614
    Py_ssize_t mlast = m - 1, count = 0;
615
    Py_ssize_t gap = mlast;
616
    Py_ssize_t hits = 0, res;
617
    const STRINGLIB_CHAR last = p[mlast];
618
    const STRINGLIB_CHAR *const ss = &s[mlast];
619
620
    unsigned long mask = 0;
621
    for (Py_ssize_t i = 0; i < mlast; i++) {
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
  Branch (621:28): [True: 0, False: 0]
622
        STRINGLIB_BLOOM_ADD(mask, p[i]);
623
        if (p[i] == last) {
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
  Branch (623:13): [True: 0, False: 0]
624
            gap = mlast - i - 1;
625
        }
626
    }
627
    STRINGLIB_BLOOM_ADD(mask, last);
628
629
    for (Py_ssize_t i = 0; i <= w; i++) {
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
  Branch (629:28): [True: 0, False: 0]
630
        if (ss[i] == last) {
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
  Branch (630:13): [True: 0, False: 0]
631
            /* candidate match */
632
            Py_ssize_t j;
633
            for (j = 0; j < mlast; j++) {
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
  Branch (633:25): [True: 0, False: 0]
634
                if (s[i+j] != p[j]) {
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
  Branch (634:21): [True: 0, False: 0]
635
                    break;
636
                }
637
            }
638
            if (j == mlast) {
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
  Branch (638:17): [True: 0, False: 0]
639
                /* got a match! */
640
                if (mode != FAST_COUNT) {
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
  Branch (640:21): [True: 0, False: 0]
641
                    return i;
642
                }
643
                count++;
644
                if (count == maxcount) {
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
  Branch (644:21): [True: 0, False: 0]
645
                    return maxcount;
646
                }
647
                i = i + mlast;
648
                continue;
649
            }
650
            hits += j + 1;
651
            if (hits > m / 4 && w - i > 2000) {
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
  Branch (651:17): [True: 0, False: 0]
  Branch (651:33): [True: 0, False: 0]
652
                if (mode == FAST_SEARCH) {
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
  Branch (652:21): [True: 0, False: 0]
653
                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
654
                    return res == -1 ? -1 : res + i;
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
  Branch (654:28): [True: 0, False: 0]
655
                }
656
                else {
657
                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
658
                                                    maxcount - count);
659
                    return res + count;
660
                }
661
            }
662
            /* miss: check if next character is part of pattern */
663
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
  Branch (663:17): [True: 0, False: 0]
664
                i = i + m;
665
            }
666
            else {
667
                i = i + gap;
668
            }
669
        }
670
        else {
671
            /* skip: check if next character is part of pattern */
672
            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
  Branch (672:17): [True: 0, False: 0]
673
                i = i + m;
674
            }
675
        }
676
    }
677
    return mode == FAST_COUNT ? count : -1;
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
  Branch (677:12): [True: 0, False: 0]
678
}
Unexecuted instantiation: bytes_methods.c:stringlib_adaptive_find
Unexecuted instantiation: bytearrayobject.c:stringlib_adaptive_find
Unexecuted instantiation: bytesobject.c:stringlib_adaptive_find
Unexecuted instantiation: unicodeobject.c:asciilib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs1lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs2lib_adaptive_find
Unexecuted instantiation: unicodeobject.c:ucs4lib_adaptive_find
679
680
681
static Py_ssize_t
682
STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
683
                         const STRINGLIB_CHAR* p, Py_ssize_t m,
684
                         Py_ssize_t maxcount, int mode)
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--1.35M
) {
  Branch (693:21): [True: 674k, False: 196k]
  Branch (693:21): [True: 221, False: 88]
  Branch (693:21): [True: 236, False: 95]
  Branch (693:21): [True: 675k, False: 196k]
  Branch (693:21): [True: 0, False: 0]
  Branch (693:21): [True: 15, False: 15]
  Branch (693:21): [True: 25, False: 25]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 224k, False: 450k]
  Branch (695:13): [True: 37, False: 184]
  Branch (695:13): [True: 37, False: 199]
  Branch (695:13): [True: 224k, False: 450k]
  Branch (695:13): [True: 0, False: 0]
  Branch (695:13): [True: 13, False: 2]
  Branch (695:13): [True: 21, False: 4]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--532k
) {
  Branch (700:17): [True: 303k, False: 190k]
  Branch (700:17): [True: 173, False: 4]
  Branch (700:17): [True: 194, False: 5]
  Branch (700:17): [True: 240k, False: 190k]
  Branch (700:17): [True: 0, False: 0]
  Branch (700:17): [True: 127, False: 7]
  Branch (700:17): [True: 231, False: 11]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 84.9k, False: 218k]
  Branch (701:13): [True: 107, False: 66]
  Branch (701:13): [True: 116, False: 78]
  Branch (701:13): [True: 85.0k, False: 155k]
  Branch (701:13): [True: 0, False: 0]
  Branch (701:13): [True: 8, False: 119]
  Branch (701:13): [True: 14, False: 217]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--83.8k
) {
  Branch (703:29): [True: 120k, False: 5.68k]
  Branch (703:29): [True: 237, False: 84]
  Branch (703:29): [True: 253, False: 90]
  Branch (703:29): [True: 121k, False: 5.85k]
  Branch (703:29): [True: 0, False: 0]
  Branch (703:29): [True: 8, False: 8]
  Branch (703:29): [True: 14, False: 14]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 79.2k, False: 41.4k]
  Branch (704:21): [True: 23, False: 214]
  Branch (704:21): [True: 26, False: 227]
  Branch (704:21): [True: 79.1k, False: 41.9k]
  Branch (704:21): [True: 0, False: 0]
  Branch (704:21): [True: 0, False: 8]
  Branch (704:21): [True: 0, False: 14]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 5.68k, False: 79.2k]
  Branch (708:17): [True: 84, False: 23]
  Branch (708:17): [True: 90, False: 26]
  Branch (708:17): [True: 5.85k, False: 79.1k]
  Branch (708:17): [True: 0, False: 0]
  Branch (708:17): [True: 8, False: 0]
  Branch (708:17): [True: 14, False: 0]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && 
!46.2k
STRINGLIB_BLOOM46.2k
(mask, s[i-1])) {
  Branch (713:17): [True: 23.1k, False: 56.1k]
  Branch (713:26): [True: 6.09k, False: 17.0k]
  Branch (713:17): [True: 22, False: 1]
  Branch (713:26): [True: 0, False: 22]
  Branch (713:17): [True: 25, False: 1]
  Branch (713:26): [True: 2, False: 23]
  Branch (713:17): [True: 23.0k, False: 56.0k]
  Branch (713:26): [True: 6.03k, False: 17.0k]
  Branch (713:17): [True: 0, False: 0]
  Branch (713:26): [True: 0, False: 0]
  Branch (713:17): [True: 0, False: 0]
  Branch (713:26): [True: 0, False: 0]
  Branch (713:17): [True: 0, False: 0]
  Branch (713:26): [True: 0, False: 0]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && 
!159k
STRINGLIB_BLOOM159k
(mask, s[i-1])) {
  Branch (722:17): [True: 110k, False: 107k]
  Branch (722:26): [True: 75.0k, False: 35.5k]
  Branch (722:17): [True: 64, False: 2]
  Branch (722:26): [True: 6, False: 58]
  Branch (722:17): [True: 75, False: 3]
  Branch (722:26): [True: 6, False: 69]
  Branch (722:17): [True: 48.1k, False: 107k]
  Branch (722:26): [True: 12.6k, False: 35.4k]
  Branch (722:17): [True: 0, False: 0]
  Branch (722:26): [True: 0, False: 0]
  Branch (722:17): [True: 119, False: 0]
  Branch (722:26): [True: 117, False: 2]
  Branch (722:17): [True: 217, False: 0]
  Branch (722:26): [True: 213, False: 4]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
bytes_methods.c:stringlib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--674k
) {
  Branch (693:21): [True: 674k, False: 196k]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 224k, False: 450k]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--297k
) {
  Branch (700:17): [True: 303k, False: 190k]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 84.9k, False: 218k]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--41.4k
) {
  Branch (703:29): [True: 120k, False: 5.68k]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 79.2k, False: 41.4k]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 5.68k, False: 79.2k]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && 
!23.1k
STRINGLIB_BLOOM23.1k
(mask, s[i-1])) {
  Branch (713:17): [True: 23.1k, False: 56.1k]
  Branch (713:26): [True: 6.09k, False: 17.0k]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && 
!110k
STRINGLIB_BLOOM110k
(mask, s[i-1])) {
  Branch (722:17): [True: 110k, False: 107k]
  Branch (722:26): [True: 75.0k, False: 35.5k]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
bytearrayobject.c:stringlib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--221
) {
  Branch (693:21): [True: 221, False: 88]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 37, False: 184]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--89
) {
  Branch (700:17): [True: 173, False: 4]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 107, False: 66]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--214
) {
  Branch (703:29): [True: 237, False: 84]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 23, False: 214]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 84, False: 23]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && 
!22
STRINGLIB_BLOOM22
(mask, s[i-1])) {
  Branch (713:17): [True: 22, False: 1]
  Branch (713:26): [True: 0, False: 22]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && 
!64
STRINGLIB_BLOOM64
(mask, s[i-1])) {
  Branch (722:17): [True: 64, False: 2]
  Branch (722:26): [True: 6, False: 58]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
bytesobject.c:stringlib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--236
) {
  Branch (693:21): [True: 236, False: 95]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 37, False: 199]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--104
) {
  Branch (700:17): [True: 194, False: 5]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 116, False: 78]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--227
) {
  Branch (703:29): [True: 253, False: 90]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 26, False: 227]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 90, False: 26]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && 
!25
STRINGLIB_BLOOM25
(mask, s[i-1])) {
  Branch (713:17): [True: 25, False: 1]
  Branch (713:26): [True: 2, False: 23]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && 
!75
STRINGLIB_BLOOM75
(mask, s[i-1])) {
  Branch (722:17): [True: 75, False: 3]
  Branch (722:26): [True: 6, False: 69]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
unicodeobject.c:asciilib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--675k
) {
  Branch (693:21): [True: 675k, False: 196k]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 224k, False: 450k]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--234k
) {
  Branch (700:17): [True: 240k, False: 190k]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 85.0k, False: 155k]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--41.9k
) {
  Branch (703:29): [True: 121k, False: 5.85k]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 79.1k, False: 41.9k]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 5.85k, False: 79.1k]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && 
!23.0k
STRINGLIB_BLOOM23.0k
(mask, s[i-1])) {
  Branch (713:17): [True: 23.0k, False: 56.0k]
  Branch (713:26): [True: 6.03k, False: 17.0k]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && 
!48.1k
STRINGLIB_BLOOM48.1k
(mask, s[i-1])) {
  Branch (722:17): [True: 48.1k, False: 107k]
  Branch (722:26): [True: 12.6k, False: 35.4k]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
Unexecuted instantiation: unicodeobject.c:ucs1lib_default_rfind
unicodeobject.c:ucs2lib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--15
) {
  Branch (693:21): [True: 15, False: 15]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 13, False: 2]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--119
) {
  Branch (700:17): [True: 127, False: 7]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 8, False: 119]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--8
) {
  Branch (703:29): [True: 8, False: 8]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 0, False: 8]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 8, False: 0]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
  Branch (713:17): [True: 0, False: 0]
  Branch (713:26): [True: 0, False: 0]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
  Branch (722:17): [True: 119, False: 0]
  Branch (722:26): [True: 117, False: 2]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
unicodeobject.c:ucs4lib_default_rfind
Line
Count
Source
685
{
686
    /* create compressed boyer-moore delta 1 table */
687
    unsigned long mask = 0;
688
    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
689
690
    /* process pattern[0] outside the loop */
691
    STRINGLIB_BLOOM_ADD(mask, p[0]);
692
    /* process pattern[:0:-1] */
693
    for (i = mlast; i > 0; 
i--25
) {
  Branch (693:21): [True: 25, False: 25]
694
        STRINGLIB_BLOOM_ADD(mask, p[i]);
695
        if (p[i] == p[0]) {
  Branch (695:13): [True: 21, False: 4]
696
            skip = i - 1;
697
        }
698
    }
699
700
    for (i = w; i >= 0; 
i--217
) {
  Branch (700:17): [True: 231, False: 11]
701
        if (s[i] == p[0]) {
  Branch (701:13): [True: 14, False: 217]
702
            /* candidate match */
703
            for (j = mlast; j > 0; 
j--14
) {
  Branch (703:29): [True: 14, False: 14]
704
                if (s[i+j] != p[j]) {
  Branch (704:21): [True: 0, False: 14]
705
                    break;
706
                }
707
            }
708
            if (j == 0) {
  Branch (708:17): [True: 14, False: 0]
709
                /* got a match! */
710
                return i;
711
            }
712
            /* miss: check if previous character is part of pattern */
713
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
  Branch (713:17): [True: 0, False: 0]
  Branch (713:26): [True: 0, False: 0]
714
                i = i - m;
715
            }
716
            else {
717
                i = i - skip;
718
            }
719
        }
720
        else {
721
            /* skip: check if previous character is part of pattern */
722
            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
  Branch (722:17): [True: 217, False: 0]
  Branch (722:26): [True: 213, False: 4]
723
                i = i - m;
724
            }
725
        }
726
    }
727
    return -1;
728
}
729
730
731
static inline Py_ssize_t
732
STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
733
                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++23.6M
) {
  Branch (736:17): [True: 5.77M, False: 11.9k]
  Branch (736:17): [True: 0, False: 0]
  Branch (736:17): [True: 0, False: 0]
  Branch (736:17): [True: 12.8M, False: 390k]
  Branch (736:17): [True: 4.93M, False: 103k]
  Branch (736:17): [True: 110k, False: 11.7k]
  Branch (736:17): [True: 3.02k, False: 790]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 4.49M, False: 1.28M]
  Branch (737:13): [True: 0, False: 0]
  Branch (737:13): [True: 0, False: 0]
  Branch (737:13): [True: 464k, False: 12.3M]
  Branch (737:13): [True: 164k, False: 4.76M]
  Branch (737:13): [True: 2.43k, False: 108k]
  Branch (737:13): [True: 22, False: 3.00k]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 0, False: 4.49M]
  Branch (739:17): [True: 0, False: 0]
  Branch (739:17): [True: 0, False: 0]
  Branch (739:17): [True: 20, False: 464k]
  Branch (739:17): [True: 0, False: 164k]
  Branch (739:17): [True: 0, False: 2.43k]
  Branch (739:17): [True: 0, False: 22]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
bytes_methods.c:stringlib_count_char
Line
Count
Source
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++5.77M
) {
  Branch (736:17): [True: 5.77M, False: 11.9k]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 4.49M, False: 1.28M]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 0, False: 4.49M]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
Unexecuted instantiation: bytearrayobject.c:stringlib_count_char
Unexecuted instantiation: bytesobject.c:stringlib_count_char
unicodeobject.c:asciilib_count_char
Line
Count
Source
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++12.8M
) {
  Branch (736:17): [True: 12.8M, False: 390k]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 464k, False: 12.3M]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 20, False: 464k]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
unicodeobject.c:ucs1lib_count_char
Line
Count
Source
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++4.93M
) {
  Branch (736:17): [True: 4.93M, False: 103k]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 164k, False: 4.76M]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 0, False: 164k]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
unicodeobject.c:ucs2lib_count_char
Line
Count
Source
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++110k
) {
  Branch (736:17): [True: 110k, False: 11.7k]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 2.43k, False: 108k]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 0, False: 2.43k]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
unicodeobject.c:ucs4lib_count_char
Line
Count
Source
734
{
735
    Py_ssize_t i, count = 0;
736
    for (i = 0; i < n; 
i++3.02k
) {
  Branch (736:17): [True: 3.02k, False: 790]
737
        if (s[i] == p0) {
  Branch (737:13): [True: 22, False: 3.00k]
738
            count++;
739
            if (count == maxcount) {
  Branch (739:17): [True: 0, False: 22]
740
                return maxcount;
741
            }
742
        }
743
    }
744
    return count;
745
}
746
747
748
Py_LOCAL_INLINE(Py_ssize_t)
749
FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
750
           const STRINGLIB_CHAR* p, Py_ssize_t m,
751
           Py_ssize_t maxcount, int mode)
752
{
753
    if (n < m || 
(5.49M
mode == 5.49M
FAST_COUNT5.49M
&&
maxcount == 01.17M
)) {
  Branch (753:9): [True: 175k, False: 986k]
  Branch (753:19): [True: 97.2k, False: 889k]
  Branch (753:41): [True: 0, False: 97.2k]
  Branch (753:9): [True: 24, False: 46.6k]
  Branch (753:19): [True: 42.7k, False: 3.98k]
  Branch (753:41): [True: 0, False: 42.7k]
  Branch (753:9): [True: 126, False: 49.9k]
  Branch (753:19): [True: 43.8k, False: 6.09k]
  Branch (753:41): [True: 0, False: 43.8k]
  Branch (753:9): [True: 5.81k, False: 2.78M]
  Branch (753:19): [True: 769k, False: 2.01M]
  Branch (753:41): [True: 0, False: 769k]
  Branch (753:9): [True: 2, False: 1.61M]
  Branch (753:19): [True: 205k, False: 1.40M]
  Branch (753:41): [True: 0, False: 205k]
  Branch (753:9): [True: 0, False: 14.7k]
  Branch (753:19): [True: 14.2k, False: 565]
  Branch (753:41): [True: 0, False: 14.2k]
  Branch (753:9): [True: 0, False: 933]
  Branch (753:19): [True: 825, False: 108]
  Branch (753:41): [True: 0, False: 825]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 104k, False: 882k]
  Branch (758:9): [True: 9, False: 46.6k]
  Branch (758:9): [True: 1.19k, False: 48.7k]
  Branch (758:9): [True: 1.05M, False: 1.73M]
  Branch (758:9): [True: 106k, False: 1.50M]
  Branch (758:9): [True: 12.1k, False: 2.65k]
  Branch (758:9): [True: 808, False: 125]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 104k]
  Branch (759:13): [True: 0, False: 9]
  Branch (759:13): [True: 0, False: 1.19k]
  Branch (759:13): [True: 0, False: 1.05M]
  Branch (759:13): [True: 0, False: 106k]
  Branch (759:13): [True: 0, False: 12.1k]
  Branch (759:13): [True: 0, False: 808]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 92.3k, False: 11.9k]
  Branch (763:13): [True: 4, False: 5]
  Branch (763:13): [True: 878, False: 316]
  Branch (763:13): [True: 525k, False: 531k]
  Branch (763:13): [True: 2.24k, False: 104k]
  Branch (763:13): [True: 305, False: 11.8k]
  Branch (763:13): [True: 10, False: 798]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 0, False: 11.9k]
  Branch (765:18): [True: 5, False: 0]
  Branch (765:18): [True: 316, False: 0]
  Branch (765:18): [True: 140k, False: 390k]
  Branch (765:18): [True: 490, False: 103k]
  Branch (765:18): [True: 65, False: 11.7k]
  Branch (765:18): [True: 8, False: 790]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 685k, False: 196k]
  Branch (772:9): [True: 46.5k, False: 88]
  Branch (772:9): [True: 48.6k, False: 95]
  Branch (772:9): [True: 1.53M, False: 196k]
  Branch (772:9): [True: 1.50M, False: 0]
  Branch (772:9): [True: 2.64k, False: 15]
  Branch (772:9): [True: 100, False: 25]
773
        if (n < 2500 || 
(17.5k
m < 10017.5k
&&
n < 3000017.2k
) ||
m < 6679
) {
  Branch (773:13): [True: 678k, False: 6.92k]
  Branch (773:26): [True: 6.77k, False: 157]
  Branch (773:37): [True: 6.47k, False: 298]
  Branch (773:51): [True: 8, False: 447]
  Branch (773:13): [True: 46.5k, False: 0]
  Branch (773:26): [True: 0, False: 0]
  Branch (773:37): [True: 0, False: 0]
  Branch (773:51): [True: 0, False: 0]
  Branch (773:13): [True: 48.6k, False: 3]
  Branch (773:26): [True: 3, False: 0]
  Branch (773:37): [True: 1, False: 2]
  Branch (773:51): [True: 2, False: 0]
  Branch (773:13): [True: 1.53M, False: 950]
  Branch (773:26): [True: 784, False: 166]
  Branch (773:37): [True: 779, False: 5]
  Branch (773:51): [True: 5, False: 166]
  Branch (773:13): [True: 1.49M, False: 9.64k]
  Branch (773:26): [True: 9.64k, False: 0]
  Branch (773:37): [True: 9.64k, False: 2]
  Branch (773:51): [True: 2, False: 0]
  Branch (773:13): [True: 2.58k, False: 60]
  Branch (773:26): [True: 60, False: 0]
  Branch (773:37): [True: 11, False: 49]
  Branch (773:51): [True: 0, False: 49]
  Branch (773:13): [True: 100, False: 0]
  Branch (773:26): [True: 0, False: 0]
  Branch (773:37): [True: 0, False: 0]
  Branch (773:51): [True: 0, False: 0]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 447, False: 0]
  Branch (776:18): [True: 0, False: 0]
  Branch (776:18): [True: 0, False: 0]
  Branch (776:18): [True: 166, False: 0]
  Branch (776:18): [True: 0, False: 0]
  Branch (776:18): [True: 49, False: 0]
  Branch (776:18): [True: 0, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 447, False: 0]
  Branch (782:17): [True: 0, False: 0]
  Branch (782:17): [True: 0, False: 0]
  Branch (782:17): [True: 166, False: 0]
  Branch (782:17): [True: 0, False: 0]
  Branch (782:17): [True: 46, False: 3]
  Branch (782:17): [True: 0, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
bytes_methods.c:fastsearch
Line
Count
Source
752
{
753
    if (n < m || 
(986k
mode == 986k
FAST_COUNT986k
&&
maxcount == 097.2k
)) {
  Branch (753:9): [True: 175k, False: 986k]
  Branch (753:19): [True: 97.2k, False: 889k]
  Branch (753:41): [True: 0, False: 97.2k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 104k, False: 882k]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 104k]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 92.3k, False: 11.9k]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 0, False: 11.9k]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 685k, False: 196k]
773
        if (n < 2500 || 
(6.92k
m < 1006.92k
&&
n < 300006.77k
) ||
m < 6455
) {
  Branch (773:13): [True: 678k, False: 6.92k]
  Branch (773:26): [True: 6.77k, False: 157]
  Branch (773:37): [True: 6.47k, False: 298]
  Branch (773:51): [True: 8, False: 447]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 447, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 447, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
bytearrayobject.c:fastsearch
Line
Count
Source
752
{
753
    if (n < m || 
(46.6k
mode == 46.6k
FAST_COUNT46.6k
&&
maxcount == 042.7k
)) {
  Branch (753:9): [True: 24, False: 46.6k]
  Branch (753:19): [True: 42.7k, False: 3.98k]
  Branch (753:41): [True: 0, False: 42.7k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 9, False: 46.6k]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 9]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 4, False: 5]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 5, False: 0]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 46.5k, False: 88]
773
        if (n < 2500 || 
(0
m < 1000
&&
n < 300000
) ||
m < 60
) {
  Branch (773:13): [True: 46.5k, False: 0]
  Branch (773:26): [True: 0, False: 0]
  Branch (773:37): [True: 0, False: 0]
  Branch (773:51): [True: 0, False: 0]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 0, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 0, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
bytesobject.c:fastsearch
Line
Count
Source
752
{
753
    if (n < m || 
(49.9k
mode == 49.9k
FAST_COUNT49.9k
&&
maxcount == 043.8k
)) {
  Branch (753:9): [True: 126, False: 49.9k]
  Branch (753:19): [True: 43.8k, False: 6.09k]
  Branch (753:41): [True: 0, False: 43.8k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 1.19k, False: 48.7k]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 1.19k]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 878, False: 316]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 316, False: 0]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 48.6k, False: 95]
773
        if (n < 2500 || 
(3
m < 1003
&&
n < 300003
) ||
m < 62
) {
  Branch (773:13): [True: 48.6k, False: 3]
  Branch (773:26): [True: 3, False: 0]
  Branch (773:37): [True: 1, False: 2]
  Branch (773:51): [True: 2, False: 0]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 0, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 0, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
unicodeobject.c:asciilib_fastsearch
Line
Count
Source
752
{
753
    if (n < m || 
(2.78M
mode == 2.78M
FAST_COUNT2.78M
&&
maxcount == 0769k
)) {
  Branch (753:9): [True: 5.81k, False: 2.78M]
  Branch (753:19): [True: 769k, False: 2.01M]
  Branch (753:41): [True: 0, False: 769k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 1.05M, False: 1.73M]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 1.05M]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 525k, False: 531k]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 140k, False: 390k]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 1.53M, False: 196k]
773
        if (n < 2500 || 
(950
m < 100950
&&
n < 30000784
) ||
m < 6171
) {
  Branch (773:13): [True: 1.53M, False: 950]
  Branch (773:26): [True: 784, False: 166]
  Branch (773:37): [True: 779, False: 5]
  Branch (773:51): [True: 5, False: 166]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 166, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 166, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
unicodeobject.c:ucs1lib_fastsearch
Line
Count
Source
752
{
753
    if (n < m || 
(1.61M
mode == 1.61M
FAST_COUNT1.61M
&&
maxcount == 0205k
)) {
  Branch (753:9): [True: 2, False: 1.61M]
  Branch (753:19): [True: 205k, False: 1.40M]
  Branch (753:41): [True: 0, False: 205k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 106k, False: 1.50M]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 106k]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 2.24k, False: 104k]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 490, False: 103k]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 1.50M, False: 0]
773
        if (n < 2500 || 
(9.64k
m < 1009.64k
&&
n < 300009.64k
) ||
m < 62
) {
  Branch (773:13): [True: 1.49M, False: 9.64k]
  Branch (773:26): [True: 9.64k, False: 0]
  Branch (773:37): [True: 9.64k, False: 2]
  Branch (773:51): [True: 2, False: 0]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 0, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 0, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
unicodeobject.c:ucs2lib_fastsearch
Line
Count
Source
752
{
753
    if (n < m || (mode == FAST_COUNT && 
maxcount == 014.2k
)) {
  Branch (753:9): [True: 0, False: 14.7k]
  Branch (753:19): [True: 14.2k, False: 565]
  Branch (753:41): [True: 0, False: 14.2k]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 12.1k, False: 2.65k]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 12.1k]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 305, False: 11.8k]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 65, False: 11.7k]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 2.64k, False: 15]
773
        if (n < 2500 || 
(60
m < 10060
&&
n < 3000060
) ||
m < 649
) {
  Branch (773:13): [True: 2.58k, False: 60]
  Branch (773:26): [True: 60, False: 0]
  Branch (773:37): [True: 11, False: 49]
  Branch (773:51): [True: 0, False: 49]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 49, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 46, False: 3]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
unicodeobject.c:ucs4lib_fastsearch
Line
Count
Source
752
{
753
    if (n < m || (mode == FAST_COUNT && 
maxcount == 0825
)) {
  Branch (753:9): [True: 0, False: 933]
  Branch (753:19): [True: 825, False: 108]
  Branch (753:41): [True: 0, False: 825]
754
        return -1;
755
    }
756
757
    /* look for special cases */
758
    if (m <= 1) {
  Branch (758:9): [True: 808, False: 125]
759
        if (m <= 0) {
  Branch (759:13): [True: 0, False: 808]
760
            return -1;
761
        }
762
        /* use special case for 1-character strings */
763
        if (mode == FAST_SEARCH)
  Branch (763:13): [True: 10, False: 798]
764
            return STRINGLIB(find_char)(s, n, p[0]);
765
        else if (mode == FAST_RSEARCH)
  Branch (765:18): [True: 8, False: 790]
766
            return STRINGLIB(rfind_char)(s, n, p[0]);
767
        else {
768
            return STRINGLIB(count_char)(s, n, p[0], maxcount);
769
        }
770
    }
771
772
    if (mode != FAST_RSEARCH) {
  Branch (772:9): [True: 100, False: 25]
773
        if (n < 2500 || 
(0
m < 1000
&&
n < 300000
) ||
m < 60
) {
  Branch (773:13): [True: 100, False: 0]
  Branch (773:26): [True: 0, False: 0]
  Branch (773:37): [True: 0, False: 0]
  Branch (773:51): [True: 0, False: 0]
774
            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
775
        }
776
        else if ((m >> 2) * 3 < (n >> 2)) {
  Branch (776:18): [True: 0, False: 0]
777
            /* 33% threshold, but don't overflow. */
778
            /* For larger problems where the needle isn't a huge
779
               percentage of the size of the haystack, the relatively
780
               expensive O(m) startup cost of the two-way algorithm
781
               will surely pay off. */
782
            if (mode == FAST_SEARCH) {
  Branch (782:17): [True: 0, False: 0]
783
                return STRINGLIB(_two_way_find)(s, n, p, m);
784
            }
785
            else {
786
                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
787
            }
788
        }
789
        else {
790
            /* To ensure that we have good worst-case behavior,
791
               here's an adaptive version of the algorithm, where if
792
               we match O(m) characters without any matches of the
793
               entire needle, then we predict that the startup cost of
794
               the two-way algorithm will probably be worth it. */
795
            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
796
        }
797
    }
798
    else {
799
        /* FAST_RSEARCH */
800
        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
801
    }
802
}
803