/home/mdboom/Work/builds/cpython/Objects/stringlib/find_max_char.h
Line | Count | Source |
1 | /* Finding the optimal width of unicode characters in a buffer */ |
2 | |
3 | #if !STRINGLIB_IS_UNICODE |
4 | # error "find_max_char.h is specific to Unicode" |
5 | #endif |
6 | |
7 | /* Mask to quickly check whether a C 'size_t' contains a |
8 | non-ASCII, UTF8-encoded char. */ |
9 | #if (SIZEOF_SIZE_T == 8) |
10 | # define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL |
11 | #elif (SIZEOF_SIZE_T == 4) |
12 | # define UCS1_ASCII_CHAR_MASK 0x80808080U |
13 | #else |
14 | # error C 'size_t' size should be either 4 or 8! |
15 | #endif |
16 | |
17 | #if STRINGLIB_SIZEOF_CHAR == 1 |
18 | |
19 | Py_LOCAL_INLINE(Py_UCS4) |
20 | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) |
21 | { |
22 | const unsigned char *p = (const unsigned char *) begin; |
23 | |
24 | while (p < end) { Branch (24:12): [True: 7.50M, False: 1.09M]
|
25 | if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { |
26 | /* Help register allocation */ |
27 | const unsigned char *_p = p; |
28 | while (_p + SIZEOF_SIZE_T <= end) { Branch (28:20): [True: 3.49M, False: 1.03M]
|
29 | size_t value = *(const size_t *) _p; |
30 | if (value & UCS1_ASCII_CHAR_MASK) Branch (30:21): [True: 16.7k, False: 3.48M]
|
31 | return 255; |
32 | _p += SIZEOF_SIZE_T; |
33 | } |
34 | p = _p; |
35 | if (p == end) Branch (35:17): [True: 92.2k, False: 940k]
|
36 | break; |
37 | } |
38 | if (*p++ & 0x80) Branch (38:13): [True: 12.6k, False: 7.38M]
|
39 | return 255; |
40 | } |
41 | return 127; |
42 | } unicodeobject.c:ucs1lib_find_max_char Line | Count | Source | 21 | { | 22 | const unsigned char *p = (const unsigned char *) begin; | 23 | | 24 | while (p < end) { Branch (24:12): [True: 7.50M, False: 1.09M]
| 25 | if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { | 26 | /* Help register allocation */ | 27 | const unsigned char *_p = p; | 28 | while (_p + SIZEOF_SIZE_T <= end) { Branch (28:20): [True: 3.49M, False: 1.03M]
| 29 | size_t value = *(const size_t *) _p; | 30 | if (value & UCS1_ASCII_CHAR_MASK) Branch (30:21): [True: 16.7k, False: 3.48M]
| 31 | return 255; | 32 | _p += SIZEOF_SIZE_T; | 33 | } | 34 | p = _p; | 35 | if (p == end) Branch (35:17): [True: 92.2k, False: 940k]
| 36 | break; | 37 | } | 38 | if (*p++ & 0x80) Branch (38:13): [True: 12.6k, False: 7.38M]
| 39 | return 255; | 40 | } | 41 | return 127; | 42 | } |
Unexecuted instantiation: unicodeobject.c:asciilib_find_max_char |
43 | |
44 | #undef ASCII_CHAR_MASK |
45 | |
46 | #else /* STRINGLIB_SIZEOF_CHAR == 1 */ |
47 | |
48 | #define MASK_ASCII 0xFFFFFF80 |
49 | #define MASK_UCS1 0xFFFFFF00 |
50 | #define MASK_UCS2 0xFFFF0000 |
51 | |
52 | #define MAX_CHAR_ASCII 0x7f |
53 | #define MAX_CHAR_UCS1 0xff |
54 | #define MAX_CHAR_UCS2 0xffff |
55 | #define MAX_CHAR_UCS4 0x10ffff |
56 | |
57 | Py_LOCAL_INLINE(Py_UCS4) |
58 | STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) |
59 | { |
60 | #if STRINGLIB_SIZEOF_CHAR == 2 |
61 | const Py_UCS4 mask_limit = MASK_UCS1; |
62 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; |
63 | #elif STRINGLIB_SIZEOF_CHAR == 4 |
64 | const Py_UCS4 mask_limit = MASK_UCS2; |
65 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; |
66 | #else |
67 | #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) |
68 | #endif |
69 | Py_UCS4 mask; |
70 | Py_ssize_t n = end - begin; |
71 | const STRINGLIB_CHAR *p = begin; |
72 | const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); |
73 | Py_UCS4 max_char; |
74 | |
75 | max_char = MAX_CHAR_ASCII; |
76 | mask = MASK_ASCII; |
77 | while (p < unrolled_end) { Branch (77:12): [True: 1.25M, False: 135k]
Branch (77:12): [True: 21.0M, False: 86.8k]
|
78 | STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; |
79 | if (bits & mask) { Branch (79:13): [True: 143k, False: 1.11M]
Branch (79:13): [True: 5.15k, False: 21.0M]
|
80 | if (mask == mask_limit) { Branch (80:17): [True: 71.6k, False: 71.7k]
Branch (80:17): [True: 569, False: 4.58k]
|
81 | /* Limit reached */ |
82 | return max_char_limit; |
83 | } |
84 | if (mask == MASK_ASCII) { Branch (84:17): [True: 71.7k, False: 0]
Branch (84:17): [True: 2.36k, False: 2.22k]
|
85 | max_char = MAX_CHAR_UCS1; |
86 | mask = MASK_UCS1; |
87 | } |
88 | else { |
89 | /* mask can't be MASK_UCS2 because of mask_limit above */ |
90 | assert(mask == MASK_UCS1); |
91 | max_char = MAX_CHAR_UCS2; |
92 | mask = MASK_UCS2; |
93 | } |
94 | /* We check the new mask on the same chars in the next iteration */ |
95 | continue; |
96 | } |
97 | p += 4; |
98 | } |
99 | while (222k p < end) { Branch (99:12): [True: 210k, False: 109k]
Branch (99:12): [True: 131k, False: 86.6k]
|
100 | if (p[0] & mask) { Branch (100:13): [True: 52.4k, False: 157k]
Branch (100:13): [True: 662, False: 131k]
|
101 | if (mask == mask_limit) { Branch (101:17): [True: 26.2k, False: 26.2k]
Branch (101:17): [True: 170, False: 492]
|
102 | /* Limit reached */ |
103 | return max_char_limit; |
104 | } |
105 | if (mask == MASK_ASCII) { Branch (105:17): [True: 26.2k, False: 0]
Branch (105:17): [True: 236, False: 256]
|
106 | max_char = MAX_CHAR_UCS1; |
107 | mask = MASK_UCS1; |
108 | } |
109 | else { |
110 | /* mask can't be MASK_UCS2 because of mask_limit above */ |
111 | assert(mask == MASK_UCS1); |
112 | max_char = MAX_CHAR_UCS2; |
113 | mask = MASK_UCS2; |
114 | } |
115 | /* We check the new mask on the same chars in the next iteration */ |
116 | continue; |
117 | } |
118 | p++; |
119 | } |
120 | return max_char; |
121 | } unicodeobject.c:ucs2lib_find_max_char Line | Count | Source | 59 | { | 60 | #if STRINGLIB_SIZEOF_CHAR == 2 | 61 | const Py_UCS4 mask_limit = MASK_UCS1; | 62 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; | 63 | #elif STRINGLIB_SIZEOF_CHAR == 4 | 64 | const Py_UCS4 mask_limit = MASK_UCS2; | 65 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; | 66 | #else | 67 | #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) | 68 | #endif | 69 | Py_UCS4 mask; | 70 | Py_ssize_t n = end - begin; | 71 | const STRINGLIB_CHAR *p = begin; | 72 | const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); | 73 | Py_UCS4 max_char; | 74 | | 75 | max_char = MAX_CHAR_ASCII; | 76 | mask = MASK_ASCII; | 77 | while (p < unrolled_end) { Branch (77:12): [True: 1.25M, False: 135k]
| 78 | STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; | 79 | if (bits & mask) { Branch (79:13): [True: 143k, False: 1.11M]
| 80 | if (mask == mask_limit) { Branch (80:17): [True: 71.6k, False: 71.7k]
| 81 | /* Limit reached */ | 82 | return max_char_limit; | 83 | } | 84 | if (mask == MASK_ASCII) { Branch (84:17): [True: 71.7k, False: 0]
| 85 | max_char = MAX_CHAR_UCS1; | 86 | mask = MASK_UCS1; | 87 | } | 88 | else { | 89 | /* mask can't be MASK_UCS2 because of mask_limit above */ | 90 | assert(mask == MASK_UCS1); | 91 | max_char = MAX_CHAR_UCS2; | 92 | mask = MASK_UCS2; | 93 | } | 94 | /* We check the new mask on the same chars in the next iteration */ | 95 | continue; | 96 | } | 97 | p += 4; | 98 | } | 99 | while (135k p < end) { Branch (99:12): [True: 210k, False: 109k]
| 100 | if (p[0] & mask) { Branch (100:13): [True: 52.4k, False: 157k]
| 101 | if (mask == mask_limit) { Branch (101:17): [True: 26.2k, False: 26.2k]
| 102 | /* Limit reached */ | 103 | return max_char_limit; | 104 | } | 105 | if (mask == MASK_ASCII) { Branch (105:17): [True: 26.2k, False: 0]
| 106 | max_char = MAX_CHAR_UCS1; | 107 | mask = MASK_UCS1; | 108 | } | 109 | else { | 110 | /* mask can't be MASK_UCS2 because of mask_limit above */ | 111 | assert(mask == MASK_UCS1); | 112 | max_char = MAX_CHAR_UCS2; | 113 | mask = MASK_UCS2; | 114 | } | 115 | /* We check the new mask on the same chars in the next iteration */ | 116 | continue; | 117 | } | 118 | p++; | 119 | } | 120 | return max_char; | 121 | } |
unicodeobject.c:ucs4lib_find_max_char Line | Count | Source | 59 | { | 60 | #if STRINGLIB_SIZEOF_CHAR == 2 | 61 | const Py_UCS4 mask_limit = MASK_UCS1; | 62 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; | 63 | #elif STRINGLIB_SIZEOF_CHAR == 4 | 64 | const Py_UCS4 mask_limit = MASK_UCS2; | 65 | const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; | 66 | #else | 67 | #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) | 68 | #endif | 69 | Py_UCS4 mask; | 70 | Py_ssize_t n = end - begin; | 71 | const STRINGLIB_CHAR *p = begin; | 72 | const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); | 73 | Py_UCS4 max_char; | 74 | | 75 | max_char = MAX_CHAR_ASCII; | 76 | mask = MASK_ASCII; | 77 | while (p < unrolled_end) { Branch (77:12): [True: 21.0M, False: 86.8k]
| 78 | STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; | 79 | if (bits & mask) { Branch (79:13): [True: 5.15k, False: 21.0M]
| 80 | if (mask == mask_limit) { Branch (80:17): [True: 569, False: 4.58k]
| 81 | /* Limit reached */ | 82 | return max_char_limit; | 83 | } | 84 | if (mask == MASK_ASCII) { Branch (84:17): [True: 2.36k, False: 2.22k]
| 85 | max_char = MAX_CHAR_UCS1; | 86 | mask = MASK_UCS1; | 87 | } | 88 | else { | 89 | /* mask can't be MASK_UCS2 because of mask_limit above */ | 90 | assert(mask == MASK_UCS1); | 91 | max_char = MAX_CHAR_UCS2; | 92 | mask = MASK_UCS2; | 93 | } | 94 | /* We check the new mask on the same chars in the next iteration */ | 95 | continue; | 96 | } | 97 | p += 4; | 98 | } | 99 | while (86.8k p < end) { Branch (99:12): [True: 131k, False: 86.6k]
| 100 | if (p[0] & mask) { Branch (100:13): [True: 662, False: 131k]
| 101 | if (mask == mask_limit) { Branch (101:17): [True: 170, False: 492]
| 102 | /* Limit reached */ | 103 | return max_char_limit; | 104 | } | 105 | if (mask == MASK_ASCII) { Branch (105:17): [True: 236, False: 256]
| 106 | max_char = MAX_CHAR_UCS1; | 107 | mask = MASK_UCS1; | 108 | } | 109 | else { | 110 | /* mask can't be MASK_UCS2 because of mask_limit above */ | 111 | assert(mask == MASK_UCS1); | 112 | max_char = MAX_CHAR_UCS2; | 113 | mask = MASK_UCS2; | 114 | } | 115 | /* We check the new mask on the same chars in the next iteration */ | 116 | continue; | 117 | } | 118 | p++; | 119 | } | 120 | return max_char; | 121 | } |
|
122 | |
123 | #undef MASK_ASCII |
124 | #undef MASK_UCS1 |
125 | #undef MASK_UCS2 |
126 | #undef MAX_CHAR_ASCII |
127 | #undef MAX_CHAR_UCS1 |
128 | #undef MAX_CHAR_UCS2 |
129 | #undef MAX_CHAR_UCS4 |
130 | |
131 | #endif /* STRINGLIB_SIZEOF_CHAR == 1 */ |
132 | |