LCOV - code coverage report
Current view: top level - Objects/stringlib - find_max_char.h (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 52 52 100.0 %
Date: 2022-07-07 18:19:46 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /* Finding the optimal width of unicode characters in a buffer */
       2             : 
       3             : #if !STRINGLIB_IS_UNICODE
       4             : # error "find_max_char.h is specific to Unicode"
       5             : #endif
       6             : 
       7             : /* Mask to quickly check whether a C 'size_t' contains a
       8             :    non-ASCII, UTF8-encoded char. */
       9             : #if (SIZEOF_SIZE_T == 8)
      10             : # define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
      11             : #elif (SIZEOF_SIZE_T == 4)
      12             : # define UCS1_ASCII_CHAR_MASK 0x80808080U
      13             : #else
      14             : # error C 'size_t' size should be either 4 or 8!
      15             : #endif
      16             : 
      17             : #if STRINGLIB_SIZEOF_CHAR == 1
      18             : 
      19             : Py_LOCAL_INLINE(Py_UCS4)
      20    78306100 : STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
      21             : {
      22    78306100 :     const unsigned char *p = (const unsigned char *) begin;
      23             : 
      24   466650000 :     while (p < end) {
      25   394584000 :         if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
      26             :             /* Help register allocation */
      27    65001700 :             const unsigned char *_p = p;
      28   220062000 :             while (_p + SIZEOF_SIZE_T <= end) {
      29   155096000 :                 size_t value = *(const size_t *) _p;
      30   155096000 :                 if (value & UCS1_ASCII_CHAR_MASK)
      31       34795 :                     return 255;
      32   155061000 :                 _p += SIZEOF_SIZE_T;
      33             :             }
      34    64966900 :             p = _p;
      35    64966900 :             if (p == end)
      36     6178700 :                 break;
      37             :         }
      38   388371000 :         if (*p++ & 0x80)
      39       27068 :             return 255;
      40             :     }
      41    78244200 :     return 127;
      42             : }
      43             : 
      44             : #undef ASCII_CHAR_MASK
      45             : 
      46             : #else /* STRINGLIB_SIZEOF_CHAR == 1 */
      47             : 
      48             : #define MASK_ASCII 0xFFFFFF80
      49             : #define MASK_UCS1 0xFFFFFF00
      50             : #define MASK_UCS2 0xFFFF0000
      51             : 
      52             : #define MAX_CHAR_ASCII 0x7f
      53             : #define MAX_CHAR_UCS1  0xff
      54             : #define MAX_CHAR_UCS2  0xffff
      55             : #define MAX_CHAR_UCS4  0x10ffff
      56             : 
      57             : Py_LOCAL_INLINE(Py_UCS4)
      58      585231 : STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
      59             : {
      60             : #if STRINGLIB_SIZEOF_CHAR == 2
      61      287186 :     const Py_UCS4 mask_limit = MASK_UCS1;
      62      287186 :     const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
      63             : #elif STRINGLIB_SIZEOF_CHAR == 4
      64      298045 :     const Py_UCS4 mask_limit = MASK_UCS2;
      65      298045 :     const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
      66             : #else
      67             : #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
      68             : #endif
      69             :     Py_UCS4 mask;
      70      585231 :     Py_ssize_t n = end - begin;
      71      585231 :     const STRINGLIB_CHAR *p = begin;
      72      585231 :     const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
      73             :     Py_UCS4 max_char;
      74             : 
      75      585231 :     max_char = MAX_CHAR_ASCII;
      76      585231 :     mask = MASK_ASCII;
      77    24275200 :     while (p < unrolled_end) {
      78    23806840 :         STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
      79    23806840 :         if (bits & mask) {
      80      292011 :             if (mask == mask_limit) {
      81             :                 /* Limit reached */
      82      116846 :                 return max_char_limit;
      83             :             }
      84      175165 :             if (mask == MASK_ASCII) {
      85      140704 :                 max_char = MAX_CHAR_UCS1;
      86      140704 :                 mask = MASK_UCS1;
      87             :             }
      88             :             else {
      89             :                 /* mask can't be MASK_UCS2 because of mask_limit above */
      90       34461 :                 assert(mask == MASK_UCS1);
      91       34461 :                 max_char = MAX_CHAR_UCS2;
      92       34461 :                 mask = MASK_UCS2;
      93             :             }
      94             :             /* We check the new mask on the same chars in the next iteration */
      95      175165 :             continue;
      96             :         }
      97    23514810 :         p += 4;
      98             :     }
      99     1594723 :     while (p < end) {
     100     1154326 :         if (p[0] & mask) {
     101      314358 :             if (mask == mask_limit) {
     102             :                 /* Limit reached */
     103       27984 :                 return max_char_limit;
     104             :             }
     105      286374 :             if (mask == MASK_ASCII) {
     106      156582 :                 max_char = MAX_CHAR_UCS1;
     107      156582 :                 mask = MASK_UCS1;
     108             :             }
     109             :             else {
     110             :                 /* mask can't be MASK_UCS2 because of mask_limit above */
     111      129792 :                 assert(mask == MASK_UCS1);
     112      129792 :                 max_char = MAX_CHAR_UCS2;
     113      129792 :                 mask = MASK_UCS2;
     114             :             }
     115             :             /* We check the new mask on the same chars in the next iteration */
     116      286374 :             continue;
     117             :         }
     118      839968 :         p++;
     119             :     }
     120      440401 :     return max_char;
     121             : }
     122             : 
     123             : #undef MASK_ASCII
     124             : #undef MASK_UCS1
     125             : #undef MASK_UCS2
     126             : #undef MAX_CHAR_ASCII
     127             : #undef MAX_CHAR_UCS1
     128             : #undef MAX_CHAR_UCS2
     129             : #undef MAX_CHAR_UCS4
     130             : 
     131             : #endif /* STRINGLIB_SIZEOF_CHAR == 1 */
     132             : 

Generated by: LCOV version 1.14