/home/mdboom/Work/builds/cpython/Include/internal/pycore_bitutils.h
Line | Count | Source (jump to first uncovered line) |
1 | /* Bit and bytes utilities. |
2 | |
3 | Bytes swap functions, reverse order of bytes: |
4 | |
5 | - _Py_bswap16(uint16_t) |
6 | - _Py_bswap32(uint32_t) |
7 | - _Py_bswap64(uint64_t) |
8 | */ |
9 | |
10 | #ifndef Py_INTERNAL_BITUTILS_H |
11 | #define Py_INTERNAL_BITUTILS_H |
12 | #ifdef __cplusplus |
13 | extern "C" { |
14 | #endif |
15 | |
16 | #ifndef Py_BUILD_CORE |
17 | # error "this header requires Py_BUILD_CORE define" |
18 | #endif |
19 | |
20 | #if defined(__GNUC__) \ |
21 | && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) |
22 | /* __builtin_bswap16() is available since GCC 4.8, |
23 | __builtin_bswap32() is available since GCC 4.3, |
24 | __builtin_bswap64() is available since GCC 4.3. */ |
25 | # define _PY_HAVE_BUILTIN_BSWAP |
26 | #endif |
27 | |
28 | #ifdef _MSC_VER |
29 | /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */ |
30 | # include <intrin.h> |
31 | #endif |
32 | |
33 | static inline uint16_t |
34 | _Py_bswap16(uint16_t word) |
35 | { |
36 | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16) |
37 | return __builtin_bswap16(word); |
38 | #elif defined(_MSC_VER) |
39 | Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short)); |
40 | return _byteswap_ushort(word); |
41 | #else |
42 | // Portable implementation which doesn't rely on circular bit shift |
43 | return ( ((word & UINT16_C(0x00FF)) << 8) |
44 | | ((word & UINT16_C(0xFF00)) >> 8)); |
45 | #endif |
46 | } Unexecuted instantiation: longobject.c:_Py_bswap16 Unexecuted instantiation: dictobject.c:_Py_bswap16 Unexecuted instantiation: unicodeobject.c:_Py_bswap16 Unexecuted instantiation: hamt.c:_Py_bswap16 |
47 | |
48 | static inline uint32_t |
49 | _Py_bswap32(uint32_t word) |
50 | { |
51 | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32) |
52 | return __builtin_bswap32(word); |
53 | #elif defined(_MSC_VER) |
54 | Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long)); |
55 | return _byteswap_ulong(word); |
56 | #else |
57 | // Portable implementation which doesn't rely on circular bit shift |
58 | return ( ((word & UINT32_C(0x000000FF)) << 24) |
59 | | ((word & UINT32_C(0x0000FF00)) << 8) |
60 | | ((word & UINT32_C(0x00FF0000)) >> 8) |
61 | | ((word & UINT32_C(0xFF000000)) >> 24)); |
62 | #endif |
63 | } Unexecuted instantiation: longobject.c:_Py_bswap32 Unexecuted instantiation: dictobject.c:_Py_bswap32 unicodeobject.c:_Py_bswap32 Line | Count | Source | 50 | { | 51 | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32) | 52 | return __builtin_bswap32(word); | 53 | #elif defined(_MSC_VER) | 54 | Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long)); | 55 | return _byteswap_ulong(word); | 56 | #else | 57 | // Portable implementation which doesn't rely on circular bit shift | 58 | return ( ((word & UINT32_C(0x000000FF)) << 24) | 59 | | ((word & UINT32_C(0x0000FF00)) << 8) | 60 | | ((word & UINT32_C(0x00FF0000)) >> 8) | 61 | | ((word & UINT32_C(0xFF000000)) >> 24)); | 62 | #endif | 63 | } |
Unexecuted instantiation: hamt.c:_Py_bswap32 |
64 | |
65 | static inline uint64_t |
66 | _Py_bswap64(uint64_t word) |
67 | { |
68 | #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64) |
69 | return __builtin_bswap64(word); |
70 | #elif defined(_MSC_VER) |
71 | return _byteswap_uint64(word); |
72 | #else |
73 | // Portable implementation which doesn't rely on circular bit shift |
74 | return ( ((word & UINT64_C(0x00000000000000FF)) << 56) |
75 | | ((word & UINT64_C(0x000000000000FF00)) << 40) |
76 | | ((word & UINT64_C(0x0000000000FF0000)) << 24) |
77 | | ((word & UINT64_C(0x00000000FF000000)) << 8) |
78 | | ((word & UINT64_C(0x000000FF00000000)) >> 8) |
79 | | ((word & UINT64_C(0x0000FF0000000000)) >> 24) |
80 | | ((word & UINT64_C(0x00FF000000000000)) >> 40) |
81 | | ((word & UINT64_C(0xFF00000000000000)) >> 56)); |
82 | #endif |
83 | } Unexecuted instantiation: longobject.c:_Py_bswap64 Unexecuted instantiation: dictobject.c:_Py_bswap64 Unexecuted instantiation: unicodeobject.c:_Py_bswap64 Unexecuted instantiation: hamt.c:_Py_bswap64 |
84 | |
85 | |
86 | // Population count: count the number of 1's in 'x' |
87 | // (number of bits set to 1), also known as the hamming weight. |
88 | // |
89 | // Implementation note. CPUID is not used, to test if x86 POPCNT instruction |
90 | // can be used, to keep the implementation simple. For example, Visual Studio |
91 | // __popcnt() is not used this reason. The clang and GCC builtin function can |
92 | // use the x86 POPCNT instruction if the target architecture has SSE4a or |
93 | // newer. |
94 | static inline int |
95 | _Py_popcount32(uint32_t x) |
96 | { |
97 | #if (defined(__clang__) || defined(__GNUC__)) |
98 | |
99 | #if SIZEOF_INT >= 4 |
100 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); |
101 | return __builtin_popcount(x); |
102 | #else |
103 | // The C standard guarantees that unsigned long will always be big enough |
104 | // to hold a uint32_t value without losing information. |
105 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); |
106 | return __builtin_popcountl(x); |
107 | #endif |
108 | |
109 | #else |
110 | // 32-bit SWAR (SIMD Within A Register) popcount |
111 | |
112 | // Binary: 0 1 0 1 ... |
113 | const uint32_t M1 = 0x55555555; |
114 | // Binary: 00 11 00 11. .. |
115 | const uint32_t M2 = 0x33333333; |
116 | // Binary: 0000 1111 0000 1111 ... |
117 | const uint32_t M4 = 0x0F0F0F0F; |
118 | |
119 | // Put count of each 2 bits into those 2 bits |
120 | x = x - ((x >> 1) & M1); |
121 | // Put count of each 4 bits into those 4 bits |
122 | x = (x & M2) + ((x >> 2) & M2); |
123 | // Put count of each 8 bits into those 8 bits |
124 | x = (x + (x >> 4)) & M4; |
125 | // Sum of the 4 byte counts. |
126 | // Take care when considering changes to the next line. Portability and |
127 | // correctness are delicate here, thanks to C's "integer promotions" (C99 |
128 | // §6.3.1.1p2). On machines where the `int` type has width greater than 32 |
129 | // bits, `x` will be promoted to an `int`, and following C's "usual |
130 | // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be |
131 | // performed as a multiplication of two `unsigned int` operands. In this |
132 | // case it's critical that we cast back to `uint32_t` in order to keep only |
133 | // the least significant 32 bits. On machines where the `int` type has |
134 | // width no greater than 32, the multiplication is of two 32-bit unsigned |
135 | // integer types, and the (uint32_t) cast is a no-op. In both cases, we |
136 | // avoid the risk of undefined behaviour due to overflow of a |
137 | // multiplication of signed integer types. |
138 | return (uint32_t)(x * 0x01010101U) >> 24; |
139 | #endif |
140 | } longobject.c:_Py_popcount32 Line | Count | Source | 96 | { | 97 | #if (defined(__clang__) || defined(__GNUC__)) | 98 | | 99 | #if SIZEOF_INT >= 4 | 100 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); | 101 | return __builtin_popcount(x); | 102 | #else | 103 | // The C standard guarantees that unsigned long will always be big enough | 104 | // to hold a uint32_t value without losing information. | 105 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); | 106 | return __builtin_popcountl(x); | 107 | #endif | 108 | | 109 | #else | 110 | // 32-bit SWAR (SIMD Within A Register) popcount | 111 | | 112 | // Binary: 0 1 0 1 ... | 113 | const uint32_t M1 = 0x55555555; | 114 | // Binary: 00 11 00 11. .. | 115 | const uint32_t M2 = 0x33333333; | 116 | // Binary: 0000 1111 0000 1111 ... | 117 | const uint32_t M4 = 0x0F0F0F0F; | 118 | | 119 | // Put count of each 2 bits into those 2 bits | 120 | x = x - ((x >> 1) & M1); | 121 | // Put count of each 4 bits into those 4 bits | 122 | x = (x & M2) + ((x >> 2) & M2); | 123 | // Put count of each 8 bits into those 8 bits | 124 | x = (x + (x >> 4)) & M4; | 125 | // Sum of the 4 byte counts. | 126 | // Take care when considering changes to the next line. Portability and | 127 | // correctness are delicate here, thanks to C's "integer promotions" (C99 | 128 | // §6.3.1.1p2). On machines where the `int` type has width greater than 32 | 129 | // bits, `x` will be promoted to an `int`, and following C's "usual | 130 | // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be | 131 | // performed as a multiplication of two `unsigned int` operands. In this | 132 | // case it's critical that we cast back to `uint32_t` in order to keep only | 133 | // the least significant 32 bits. On machines where the `int` type has | 134 | // width no greater than 32, the multiplication is of two 32-bit unsigned | 135 | // integer types, and the (uint32_t) cast is a no-op. In both cases, we | 136 | // avoid the risk of undefined behaviour due to overflow of a | 137 | // multiplication of signed integer types. | 138 | return (uint32_t)(x * 0x01010101U) >> 24; | 139 | #endif | 140 | } |
Unexecuted instantiation: dictobject.c:_Py_popcount32 Unexecuted instantiation: unicodeobject.c:_Py_popcount32 Line | Count | Source | 96 | { | 97 | #if (defined(__clang__) || defined(__GNUC__)) | 98 | | 99 | #if SIZEOF_INT >= 4 | 100 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); | 101 | return __builtin_popcount(x); | 102 | #else | 103 | // The C standard guarantees that unsigned long will always be big enough | 104 | // to hold a uint32_t value without losing information. | 105 | Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); | 106 | return __builtin_popcountl(x); | 107 | #endif | 108 | | 109 | #else | 110 | // 32-bit SWAR (SIMD Within A Register) popcount | 111 | | 112 | // Binary: 0 1 0 1 ... | 113 | const uint32_t M1 = 0x55555555; | 114 | // Binary: 00 11 00 11. .. | 115 | const uint32_t M2 = 0x33333333; | 116 | // Binary: 0000 1111 0000 1111 ... | 117 | const uint32_t M4 = 0x0F0F0F0F; | 118 | | 119 | // Put count of each 2 bits into those 2 bits | 120 | x = x - ((x >> 1) & M1); | 121 | // Put count of each 4 bits into those 4 bits | 122 | x = (x & M2) + ((x >> 2) & M2); | 123 | // Put count of each 8 bits into those 8 bits | 124 | x = (x + (x >> 4)) & M4; | 125 | // Sum of the 4 byte counts. | 126 | // Take care when considering changes to the next line. Portability and | 127 | // correctness are delicate here, thanks to C's "integer promotions" (C99 | 128 | // §6.3.1.1p2). On machines where the `int` type has width greater than 32 | 129 | // bits, `x` will be promoted to an `int`, and following C's "usual | 130 | // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be | 131 | // performed as a multiplication of two `unsigned int` operands. In this | 132 | // case it's critical that we cast back to `uint32_t` in order to keep only | 133 | // the least significant 32 bits. On machines where the `int` type has | 134 | // width no greater than 32, the multiplication is of two 32-bit unsigned | 135 | // integer types, and the (uint32_t) cast is a no-op. In both cases, we | 136 | // avoid the risk of undefined behaviour due to overflow of a | 137 | // multiplication of signed integer types. | 138 | return (uint32_t)(x * 0x01010101U) >> 24; | 139 | #endif | 140 | } |
|
141 | |
142 | |
143 | // Return the index of the most significant 1 bit in 'x'. This is the smallest |
144 | // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. |
145 | static inline int |
146 | _Py_bit_length(unsigned long x) |
147 | { |
148 | #if (defined(__clang__) || defined(__GNUC__)) |
149 | if (x != 0) { Branch (149:9): [True: 10.5M, False: 0]
Branch (149:9): [True: 2.34M, False: 0]
|
150 | // __builtin_clzl() is available since GCC 3.4. |
151 | // Undefined behavior for x == 0. |
152 | return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); |
153 | } |
154 | else { |
155 | return 0; |
156 | } |
157 | #elif defined(_MSC_VER) |
158 | // _BitScanReverse() is documented to search 32 bits. |
159 | Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); |
160 | unsigned long msb; |
161 | if (_BitScanReverse(&msb, x)) { |
162 | return (int)msb + 1; |
163 | } |
164 | else { |
165 | return 0; |
166 | } |
167 | #else |
168 | const int BIT_LENGTH_TABLE[32] = { |
169 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, |
170 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 |
171 | }; |
172 | int msb = 0; |
173 | while (x >= 32) { |
174 | msb += 6; |
175 | x >>= 6; |
176 | } |
177 | msb += BIT_LENGTH_TABLE[x]; |
178 | return msb; |
179 | #endif |
180 | } longobject.c:_Py_bit_length Line | Count | Source | 147 | { | 148 | #if (defined(__clang__) || defined(__GNUC__)) | 149 | if (x != 0) { Branch (149:9): [True: 10.5M, False: 0]
| 150 | // __builtin_clzl() is available since GCC 3.4. | 151 | // Undefined behavior for x == 0. | 152 | return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); | 153 | } | 154 | else { | 155 | return 0; | 156 | } | 157 | #elif defined(_MSC_VER) | 158 | // _BitScanReverse() is documented to search 32 bits. | 159 | Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); | 160 | unsigned long msb; | 161 | if (_BitScanReverse(&msb, x)) { | 162 | return (int)msb + 1; | 163 | } | 164 | else { | 165 | return 0; | 166 | } | 167 | #else | 168 | const int BIT_LENGTH_TABLE[32] = { | 169 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | 170 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 | 171 | }; | 172 | int msb = 0; | 173 | while (x >= 32) { | 174 | msb += 6; | 175 | x >>= 6; | 176 | } | 177 | msb += BIT_LENGTH_TABLE[x]; | 178 | return msb; | 179 | #endif | 180 | } |
dictobject.c:_Py_bit_length Line | Count | Source | 147 | { | 148 | #if (defined(__clang__) || defined(__GNUC__)) | 149 | if (x != 0) { Branch (149:9): [True: 2.34M, False: 0]
| 150 | // __builtin_clzl() is available since GCC 3.4. | 151 | // Undefined behavior for x == 0. | 152 | return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); | 153 | } | 154 | else { | 155 | return 0; | 156 | } | 157 | #elif defined(_MSC_VER) | 158 | // _BitScanReverse() is documented to search 32 bits. | 159 | Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); | 160 | unsigned long msb; | 161 | if (_BitScanReverse(&msb, x)) { | 162 | return (int)msb + 1; | 163 | } | 164 | else { | 165 | return 0; | 166 | } | 167 | #else | 168 | const int BIT_LENGTH_TABLE[32] = { | 169 | 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | 170 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 | 171 | }; | 172 | int msb = 0; | 173 | while (x >= 32) { | 174 | msb += 6; | 175 | x >>= 6; | 176 | } | 177 | msb += BIT_LENGTH_TABLE[x]; | 178 | return msb; | 179 | #endif | 180 | } |
Unexecuted instantiation: unicodeobject.c:_Py_bit_length Unexecuted instantiation: hamt.c:_Py_bit_length |
181 | |
182 | |
183 | #ifdef __cplusplus |
184 | } |
185 | #endif |
186 | #endif /* !Py_INTERNAL_BITUTILS_H */ |