Line data Source code
1 : /* Finding the optimal width of unicode characters in a buffer */ 2 : 3 : #if !STRINGLIB_IS_UNICODE 4 : # error "find_max_char.h is specific to Unicode" 5 : #endif 6 : 7 : /* Mask to quickly check whether a C 'size_t' contains a 8 : non-ASCII, UTF8-encoded char. */ 9 : #if (SIZEOF_SIZE_T == 8) 10 : # define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL 11 : #elif (SIZEOF_SIZE_T == 4) 12 : # define UCS1_ASCII_CHAR_MASK 0x80808080U 13 : #else 14 : # error C 'size_t' size should be either 4 or 8! 15 : #endif 16 : 17 : #if STRINGLIB_SIZEOF_CHAR == 1 18 : 19 : Py_LOCAL_INLINE(Py_UCS4) 20 78306100 : STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) 21 : { 22 78306100 : const unsigned char *p = (const unsigned char *) begin; 23 : 24 466650000 : while (p < end) { 25 394584000 : if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { 26 : /* Help register allocation */ 27 65001700 : const unsigned char *_p = p; 28 220062000 : while (_p + SIZEOF_SIZE_T <= end) { 29 155096000 : size_t value = *(const size_t *) _p; 30 155096000 : if (value & UCS1_ASCII_CHAR_MASK) 31 34795 : return 255; 32 155061000 : _p += SIZEOF_SIZE_T; 33 : } 34 64966900 : p = _p; 35 64966900 : if (p == end) 36 6178700 : break; 37 : } 38 388371000 : if (*p++ & 0x80) 39 27068 : return 255; 40 : } 41 78244200 : return 127; 42 : } 43 : 44 : #undef ASCII_CHAR_MASK 45 : 46 : #else /* STRINGLIB_SIZEOF_CHAR == 1 */ 47 : 48 : #define MASK_ASCII 0xFFFFFF80 49 : #define MASK_UCS1 0xFFFFFF00 50 : #define MASK_UCS2 0xFFFF0000 51 : 52 : #define MAX_CHAR_ASCII 0x7f 53 : #define MAX_CHAR_UCS1 0xff 54 : #define MAX_CHAR_UCS2 0xffff 55 : #define MAX_CHAR_UCS4 0x10ffff 56 : 57 : Py_LOCAL_INLINE(Py_UCS4) 58 585231 : STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) 59 : { 60 : #if STRINGLIB_SIZEOF_CHAR == 2 61 287186 : const Py_UCS4 mask_limit = MASK_UCS1; 62 287186 : const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; 63 : #elif STRINGLIB_SIZEOF_CHAR == 4 64 298045 : const Py_UCS4 mask_limit = MASK_UCS2; 65 298045 : const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; 66 : #else 67 : #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) 68 : #endif 69 : Py_UCS4 mask; 70 585231 : Py_ssize_t n = end - begin; 71 585231 : const STRINGLIB_CHAR *p = begin; 72 585231 : const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); 73 : Py_UCS4 max_char; 74 : 75 585231 : max_char = MAX_CHAR_ASCII; 76 585231 : mask = MASK_ASCII; 77 24275200 : while (p < unrolled_end) { 78 23806840 : STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; 79 23806840 : if (bits & mask) { 80 292011 : if (mask == mask_limit) { 81 : /* Limit reached */ 82 116846 : return max_char_limit; 83 : } 84 175165 : if (mask == MASK_ASCII) { 85 140704 : max_char = MAX_CHAR_UCS1; 86 140704 : mask = MASK_UCS1; 87 : } 88 : else { 89 : /* mask can't be MASK_UCS2 because of mask_limit above */ 90 34461 : assert(mask == MASK_UCS1); 91 34461 : max_char = MAX_CHAR_UCS2; 92 34461 : mask = MASK_UCS2; 93 : } 94 : /* We check the new mask on the same chars in the next iteration */ 95 175165 : continue; 96 : } 97 23514810 : p += 4; 98 : } 99 1594723 : while (p < end) { 100 1154326 : if (p[0] & mask) { 101 314358 : if (mask == mask_limit) { 102 : /* Limit reached */ 103 27984 : return max_char_limit; 104 : } 105 286374 : if (mask == MASK_ASCII) { 106 156582 : max_char = MAX_CHAR_UCS1; 107 156582 : mask = MASK_UCS1; 108 : } 109 : else { 110 : /* mask can't be MASK_UCS2 because of mask_limit above */ 111 129792 : assert(mask == MASK_UCS1); 112 129792 : max_char = MAX_CHAR_UCS2; 113 129792 : mask = MASK_UCS2; 114 : } 115 : /* We check the new mask on the same chars in the next iteration */ 116 286374 : continue; 117 : } 118 839968 : p++; 119 : } 120 440401 : return max_char; 121 : } 122 : 123 : #undef MASK_ASCII 124 : #undef MASK_UCS1 125 : #undef MASK_UCS2 126 : #undef MAX_CHAR_ASCII 127 : #undef MAX_CHAR_UCS1 128 : #undef MAX_CHAR_UCS2 129 : #undef MAX_CHAR_UCS4 130 : 131 : #endif /* STRINGLIB_SIZEOF_CHAR == 1 */ 132 :