/home/mdboom/Work/builds/cpython/Objects/stringlib/codecs.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  | /* stringlib: codec implementations */  | 
2  |  | 
3  | #if !STRINGLIB_IS_UNICODE  | 
4  | # error "codecs.h is specific to Unicode"  | 
5  | #endif  | 
6  |  | 
7  | #include "pycore_bitutils.h"      // _Py_bswap32()  | 
8  |  | 
9  | /* Mask to quickly check whether a C 'size_t' contains a  | 
10  |    non-ASCII, UTF8-encoded char. */  | 
11  | #if (SIZEOF_SIZE_T == 8)  | 
12  | # define ASCII_CHAR_MASK 0x8080808080808080ULL  | 
13  | #elif (SIZEOF_SIZE_T == 4)  | 
14  | # define ASCII_CHAR_MASK 0x80808080U  | 
15  | #else  | 
16  | # error C 'size_t' size should be either 4 or 8!  | 
17  | #endif  | 
18  |  | 
19  | /* 10xxxxxx */  | 
20  | #define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x804.94M  && (ch) < 0xC04.94M )  | 
21  |  | 
22  | Py_LOCAL_INLINE(Py_UCS4)  | 
23  | STRINGLIB(utf8_decode)(const char **inptr, const char *end,  | 
24  |                        STRINGLIB_CHAR *dest,  | 
25  |                        Py_ssize_t *outpos)  | 
26  | { | 
27  |     Py_UCS4 ch;  | 
28  |     const char *s = *inptr;  | 
29  |     STRINGLIB_CHAR *p = dest + *outpos;  | 
30  |  | 
31  |     while (s < end) {  Branch (31:12): [True: 264k, False: 380]
   Branch (31:12): [True: 473k, False: 35.9k]
   Branch (31:12): [True: 959k, False: 46.8k]
   Branch (31:12): [True: 1.07M, False: 2.60k]
  | 
32  |         ch = (unsigned char)*s;  | 
33  |  | 
34  |         if (ch < 0x80) {  Branch (34:13): [True: 1.50k, False: 263k]
   Branch (34:13): [True: 440k, False: 32.2k]
   Branch (34:13): [True: 214k, False: 744k]
   Branch (34:13): [True: 17.4k, False: 1.05M]
  | 
35  |             /* Fast path for runs of ASCII characters. Given that common UTF-8  | 
36  |                input will consist of an overwhelming majority of ASCII  | 
37  |                characters, we try to optimize for this case by checking  | 
38  |                as many characters as a C 'size_t' can contain.  | 
39  |                First, check if we can do an aligned read, as most CPUs have  | 
40  |                a penalty for unaligned reads.  | 
41  |             */  | 
42  |             if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) { | 
43  |                 /* Help register allocation */  | 
44  |                 const char *_s = s;  | 
45  |                 STRINGLIB_CHAR *_p = p;  | 
46  |                 while (_s + SIZEOF_SIZE_T <= end) {  Branch (46:24): [True: 24, False: 302]
   Branch (46:24): [True: 325k, False: 34.6k]
   Branch (46:24): [True: 523k, False: 12.1k]
   Branch (46:24): [True: 87.1k, False: 1.99k]
  | 
47  |                     /* Read a whole size_t at a time (either 4 or 8 bytes),  | 
48  |                        and do a fast unrolled copy if it only contains ASCII  | 
49  |                        characters. */  | 
50  |                     size_t value = *(const size_t *) _s;  | 
51  |                     if (value & ASCII_CHAR_MASK)   Branch (51:25): [True: 0, False: 24]
   Branch (51:25): [True: 13.5k, False: 312k]
   Branch (51:25): [True: 14.8k, False: 508k]
   Branch (51:25): [True: 179, False: 86.9k]
  | 
52  |                         break;  | 
53  | #if PY_LITTLE_ENDIAN  | 
54  |                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  | 
55  |                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  | 
56  |                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  | 
57  |                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  | 
58  | # if SIZEOF_SIZE_T == 8  | 
59  |                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  | 
60  |                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  | 
61  |                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  | 
62  |                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  | 
63  | # endif  | 
64  | #else  | 
65  | # if SIZEOF_SIZE_T == 8  | 
66  |                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  | 
67  |                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  | 
68  |                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  | 
69  |                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  | 
70  |                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  | 
71  |                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  | 
72  |                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  | 
73  |                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  | 
74  | # else  | 
75  |                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  | 
76  |                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  | 
77  |                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  | 
78  |                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  | 
79  | # endif  | 
80  | #endif  | 
81  |                     _s += SIZEOF_SIZE_T;  | 
82  |                     _p += SIZEOF_SIZE_T;  | 
83  |                 }  | 
84  |                 s = _s;  | 
85  |                 p = _p;  | 
86  |                 if (s == end)   Branch (86:21): [True: 0, False: 302]
   Branch (86:21): [True: 4.43k, False: 43.7k]
   Branch (86:21): [True: 1.49k, False: 25.4k]
   Branch (86:21): [True: 128, False: 2.04k]
  | 
87  |                     break;  | 
88  |                 ch = (unsigned char)*s;  | 
89  |             }  | 
90  |             if (ch < 0x80) {  Branch (90:17): [True: 1.50k, False: 0]
   Branch (90:17): [True: 436k, False: 83]
   Branch (90:17): [True: 212k, False: 551]
   Branch (90:17): [True: 17.2k, False: 10]
  | 
91  |                 s++;  | 
92  |                 *p++ = ch;  | 
93  |                 continue;  | 
94  |             }  | 
95  |         }  | 
96  |  | 
97  |         if (ch < 0xE0) {  Branch (97:13): [True: 59.8k, False: 203k]
   Branch (97:13): [True: 29.5k, False: 2.78k]
   Branch (97:13): [True: 21.2k, False: 724k]
   Branch (97:13): [True: 857, False: 1.05M]
  | 
98  |             /* \xC2\x80-\xDF\xBF -- 0080-07FF */  | 
99  |             Py_UCS4 ch2;  | 
100  |             if (ch < 0xC2) {  Branch (100:17): [True: 561, False: 59.2k]
   Branch (100:17): [True: 2, False: 29.5k]
   Branch (100:17): [True: 368, False: 20.8k]
   Branch (100:17): [True: 12, False: 845]
  | 
101  |                 /* invalid sequence  | 
102  |                 \x80-\xBF -- continuation byte  | 
103  |                 \xC0-\xC1 -- fake 0000-007F */  | 
104  |                 goto InvalidStart;  | 
105  |             }  | 
106  |             if (end - s < 2) {  Branch (106:17): [True: 1.20k, False: 58.0k]
   Branch (106:17): [True: 49, False: 29.4k]
   Branch (106:17): [True: 778, False: 20.0k]
   Branch (106:17): [True: 24, False: 821]
  | 
107  |                 /* unexpected end of data: the caller will decide whether  | 
108  |                    it's an error or not */  | 
109  |                 break;  | 
110  |             }  | 
111  |             ch2 = (unsigned char)s[1];  | 
112  |             if (!IS_CONTINUATION_BYTE(ch2))  | 
113  |                 /* invalid continuation byte */  | 
114  |                 goto InvalidContinuation1;  | 
115  |             ch = (ch << 6) + ch2 -  | 
116  |                  ((0xC0 << 6) + 0x80);  | 
117  |             assert ((ch > 0x007F) && (ch <= 0x07FF));  | 
118  |             s += 2;  | 
119  |             if (STRINGLIB_MAX_CHAR <= 0x007F ||   Branch (119:17): [Folded - Ignored]
   Branch (119:17): [Folded - Ignored]
   Branch (119:17): [Folded - Ignored]
   Branch (119:17): [Folded - Ignored]
  | 
120  |                 (29.4k STRINGLIB_MAX_CHAR29.4k  < 0x07FF29.4k  && ch > 29.4k STRINGLIB_MAX_CHAR29.4k ))   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 304, False: 29.1k]
   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
  | 
121  |                 /* Out-of-range */  | 
122  |                 goto Return;  | 
123  |             *p++ = ch;  | 
124  |             continue;  | 
125  |         }  | 
126  |  | 
127  |         if (ch < 0xF0) {  Branch (127:13): [True: 200k, False: 2.47k]
   Branch (127:13): [True: 2.77k, False: 10]
   Branch (127:13): [True: 718k, False: 5.56k]
   Branch (127:13): [True: 5.30k, False: 1.05M]
  | 
128  |             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  | 
129  |             Py_UCS4 ch2, ch3;  | 
130  |             if (end - s < 3) {  Branch (130:17): [True: 40.2k, False: 160k]
   Branch (130:17): [True: 1.02k, False: 1.75k]
   Branch (130:17): [True: 78.5k, False: 640k]
   Branch (130:17): [True: 99, False: 5.20k]
  | 
131  |                 /* unexpected end of data: the caller will decide whether  | 
132  |                    it's an error or not */  | 
133  |                 if (end - s < 2)   Branch (133:21): [True: 16.4k, False: 23.8k]
   Branch (133:21): [True: 617, False: 403]
   Branch (133:21): [True: 41.0k, False: 37.5k]
   Branch (133:21): [True: 81, False: 18]
  | 
134  |                     break;  | 
135  |                 ch2 = (unsigned char)s[1];  | 
136  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  | 
137  |                     (61.7k ch2 < 0xA061.7k  ? ch == 0xE044.6k  : ch == 0xED17.0k ))   Branch (137:21): [True: 22, False: 23.7k]
  Branch (137:22): [True: 16.4k, False: 7.32k]
   Branch (137:21): [True: 0, False: 403]
  Branch (137:22): [True: 3, False: 400]
   Branch (137:21): [True: 0, False: 37.4k]
  Branch (137:22): [True: 28.1k, False: 9.32k]
   Branch (137:21): [True: 0, False: 18]
  Branch (137:22): [True: 0, False: 18]
  | 
138  |                     /* for clarification see comments below */  | 
139  |                     goto InvalidContinuation1;  | 
140  |                 break;  | 
141  |             }  | 
142  |             ch2 = (unsigned char)s[1];  | 
143  |             ch3 = (unsigned char)s[2];  | 
144  |             if (!IS_CONTINUATION_BYTE(ch2)) { | 
145  |                 /* invalid continuation byte */  | 
146  |                 goto InvalidContinuation1;  | 
147  |             }  | 
148  |             if (ch == 0xE0) {  Branch (148:17): [True: 3.96k, False: 152k]
   Branch (148:17): [True: 1.39k, False: 351]
   Branch (148:17): [True: 7.79k, False: 632k]
   Branch (148:17): [True: 4, False: 5.19k]
  | 
149  |                 if (ch2 < 0xA0)   Branch (149:21): [True: 68, False: 3.89k]
   Branch (149:21): [True: 0, False: 1.39k]
   Branch (149:21): [True: 0, False: 7.79k]
   Branch (149:21): [True: 0, False: 4]
  | 
150  |                     /* invalid sequence  | 
151  |                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  | 
152  |                     goto InvalidContinuation1;  | 
153  |             } else if (ch == 0xED && ch2 >= 0xA016.8k ) {   Branch (153:24): [True: 3.38k, False: 148k]
  Branch (153:38): [True: 399, False: 2.98k]
   Branch (153:24): [True: 1, False: 350]
  Branch (153:38): [True: 1, False: 0]
   Branch (153:24): [True: 13.4k, False: 618k]
  Branch (153:38): [True: 234, False: 13.1k]
   Branch (153:24): [True: 17, False: 5.18k]
  Branch (153:38): [True: 17, False: 0]
  | 
154  |                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  | 
155  |                    will result in surrogates in range D800-DFFF. Surrogates are  | 
156  |                    not valid UTF-8 so they are rejected.  | 
157  |                    See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf  | 
158  |                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  | 
159  |                 goto InvalidContinuation1;  | 
160  |             }  | 
161  |             if (!IS_CONTINUATION_BYTE(ch3)) { | 
162  |                 /* invalid continuation byte */  | 
163  |                 goto InvalidContinuation2;  | 
164  |             }  | 
165  |             ch = (ch << 12) + (ch2 << 6) + ch3 -  | 
166  |                  ((0xE0 << 12) + (0x80 << 6) + 0x80);  | 
167  |             assert ((ch > 0x07FF) && (ch <= 0xFFFF));  | 
168  |             s += 3;  | 
169  |             if (STRINGLIB_MAX_CHAR <= 0x07FF ||   Branch (169:17): [Folded - Ignored]
   Branch (169:17): [Folded - Ignored]
   Branch (169:17): [Folded - Ignored]
   Branch (169:17): [Folded - Ignored]
  | 
170  |                 (0 STRINGLIB_MAX_CHAR0  < 0xFFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
  | 
171  |                 /* Out-of-range */  | 
172  |                 goto Return;  | 
173  |             *p++ = ch;  | 
174  |             continue;  | 
175  |         }  | 
176  |  | 
177  |         if (ch < 0xF5) {  Branch (177:13): [True: 2.10k, False: 368]
   Branch (177:13): [True: 5, False: 5]
   Branch (177:13): [True: 4.72k, False: 833]
   Branch (177:13): [True: 1.05M, False: 0]
  | 
178  |             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  | 
179  |             Py_UCS4 ch2, ch3, ch4;  | 
180  |             if (end - s < 4) {  Branch (180:17): [True: 465, False: 1.64k]
   Branch (180:17): [True: 1, False: 4]
   Branch (180:17): [True: 3.98k, False: 748]
   Branch (180:17): [True: 90, False: 1.05M]
  | 
181  |                 /* unexpected end of data: the caller will decide whether  | 
182  |                    it's an error or not */  | 
183  |                 if (end - s < 2)   Branch (183:21): [True: 56, False: 409]
   Branch (183:21): [True: 0, False: 1]
   Branch (183:21): [True: 3.91k, False: 68]
   Branch (183:21): [True: 54, False: 36]
  | 
184  |                     break;  | 
185  |                 ch2 = (unsigned char)s[1];  | 
186  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  | 
187  |                     (405 ch2 < 0x90405  ? ch == 0xF0121  : ch == 0xF4284 ))   Branch (187:21): [True: 18, False: 317]
  Branch (187:22): [True: 114, False: 221]
   Branch (187:21): [True: 0, False: 0]
  Branch (187:22): [True: 0, False: 0]
   Branch (187:21): [True: 0, False: 34]
  Branch (187:22): [True: 7, False: 27]
   Branch (187:21): [True: 0, False: 36]
  Branch (187:22): [True: 0, False: 36]
  | 
188  |                     /* for clarification see comments below */  | 
189  |                     goto InvalidContinuation1;  | 
190  |                 if (end - s < 3)   Branch (190:21): [True: 94, False: 223]
   Branch (190:21): [True: 0, False: 0]
   Branch (190:21): [True: 12, False: 22]
   Branch (190:21): [True: 18, False: 18]
  | 
191  |                     break;  | 
192  |                 ch3 = (unsigned char)s[2];  | 
193  |                 if (!IS_CONTINUATION_BYTE(ch3))  | 
194  |                     goto InvalidContinuation2;  | 
195  |                 break;  | 
196  |             }  | 
197  |             ch2 = (unsigned char)s[1];  | 
198  |             ch3 = (unsigned char)s[2];  | 
199  |             ch4 = (unsigned char)s[3];  | 
200  |             if (!IS_CONTINUATION_BYTE(ch2)) { | 
201  |                 /* invalid continuation byte */  | 
202  |                 goto InvalidContinuation1;  | 
203  |             }  | 
204  |             if (ch == 0xF0) {  Branch (204:17): [True: 1.05k, False: 519]
   Branch (204:17): [True: 4, False: 0]
   Branch (204:17): [True: 664, False: 62]
   Branch (204:17): [True: 199k, False: 852k]
  | 
205  |                 if (ch2 < 0x90)   Branch (205:21): [True: 36, False: 1.01k]
   Branch (205:21): [True: 0, False: 4]
   Branch (205:21): [True: 0, False: 664]
   Branch (205:21): [True: 0, False: 199k]
  | 
206  |                     /* invalid sequence  | 
207  |                        \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  | 
208  |                     goto InvalidContinuation1;  | 
209  |             } else if (ch == 0xF4 && ch2 >= 0x9066.7k ) {   Branch (209:24): [True: 265, False: 254]
  Branch (209:38): [True: 100, False: 165]
   Branch (209:24): [True: 0, False: 0]
  Branch (209:38): [True: 0, False: 0]
   Branch (209:24): [True: 55, False: 7]
  Branch (209:38): [True: 0, False: 55]
   Branch (209:24): [True: 66.4k, False: 786k]
  Branch (209:38): [True: 0, False: 66.4k]
  | 
210  |                 /* invalid sequence  | 
211  |                    \xF4\x90\x80\x80- -- 110000- overflow */  | 
212  |                 goto InvalidContinuation1;  | 
213  |             }  | 
214  |             if (!IS_CONTINUATION_BYTE(ch3)) { | 
215  |                 /* invalid continuation byte */  | 
216  |                 goto InvalidContinuation2;  | 
217  |             }  | 
218  |             if (!IS_CONTINUATION_BYTE(ch4)) { | 
219  |                 /* invalid continuation byte */  | 
220  |                 goto InvalidContinuation3;  | 
221  |             }  | 
222  |             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  | 
223  |                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  | 
224  |             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  | 
225  |             s += 4;  | 
226  |             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||   Branch (226:17): [Folded - Ignored]
   Branch (226:17): [Folded - Ignored]
   Branch (226:17): [Folded - Ignored]
   Branch (226:17): [Folded - Ignored]
  | 
227  |                 (0 STRINGLIB_MAX_CHAR0  < 0x10FFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
  | 
228  |                 /* Out-of-range */  | 
229  |                 goto Return;  | 
230  |             *p++ = ch;  | 
231  |             continue;  | 
232  |         }  | 
233  |         goto InvalidStart;  | 
234  |     }  | 
235  |     ch = 0;  | 
236  | Return:  | 
237  |     *inptr = s;  | 
238  |     *outpos = p - dest;  | 
239  |     return ch;  | 
240  | InvalidStart:  | 
241  |     ch = 1;  | 
242  |     goto Return;  | 
243  | InvalidContinuation1:  | 
244  |     ch = 2;  | 
245  |     goto Return;  | 
246  | InvalidContinuation2:  | 
247  |     ch = 3;  | 
248  |     goto Return;  | 
249  | InvalidContinuation3:  | 
250  |     ch = 4;  | 
251  |     goto Return;  | 
252  | } unicodeobject.c:asciilib_utf8_decode Line  | Count  | Source  |  26  | { |  27  |     Py_UCS4 ch;  |  28  |     const char *s = *inptr;  |  29  |     STRINGLIB_CHAR *p = dest + *outpos;  |  30  |  |  31  |     while (s < end) {  Branch (31:12): [True: 264k, False: 380]
  |  32  |         ch = (unsigned char)*s;  |  33  |  |  34  |         if (ch < 0x80) {  Branch (34:13): [True: 1.50k, False: 263k]
  |  35  |             /* Fast path for runs of ASCII characters. Given that common UTF-8  |  36  |                input will consist of an overwhelming majority of ASCII  |  37  |                characters, we try to optimize for this case by checking  |  38  |                as many characters as a C 'size_t' can contain.  |  39  |                First, check if we can do an aligned read, as most CPUs have  |  40  |                a penalty for unaligned reads.  |  41  |             */  |  42  |             if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) { |  43  |                 /* Help register allocation */  |  44  |                 const char *_s = s;  |  45  |                 STRINGLIB_CHAR *_p = p;  |  46  |                 while (_s + SIZEOF_SIZE_T <= end) {  Branch (46:24): [True: 24, False: 302]
  |  47  |                     /* Read a whole size_t at a time (either 4 or 8 bytes),  |  48  |                        and do a fast unrolled copy if it only contains ASCII  |  49  |                        characters. */  |  50  |                     size_t value = *(const size_t *) _s;  |  51  |                     if (value & ASCII_CHAR_MASK)   Branch (51:25): [True: 0, False: 24]
  |  52  |                         break;  |  53  | #if PY_LITTLE_ENDIAN  |  54  |                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  |  55  |                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  56  |                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  57  |                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  58  | # if SIZEOF_SIZE_T == 8  |  59  |                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  60  |                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  61  |                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  62  |                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  63  | # endif  |  64  | #else  |  65  | # if SIZEOF_SIZE_T == 8  |  66  |                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  67  |                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  68  |                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  69  |                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  70  |                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  71  |                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  72  |                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  73  |                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  |  74  | # else  |  75  |                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  76  |                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  77  |                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  78  |                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  |  79  | # endif  |  80  | #endif  |  81  |                     _s += SIZEOF_SIZE_T;  |  82  |                     _p += SIZEOF_SIZE_T;  |  83  |                 }  |  84  |                 s = _s;  |  85  |                 p = _p;  |  86  |                 if (s == end)   Branch (86:21): [True: 0, False: 302]
  |  87  |                     break;  |  88  |                 ch = (unsigned char)*s;  |  89  |             }  |  90  |             if (ch < 0x80) {  Branch (90:17): [True: 1.50k, False: 0]
  |  91  |                 s++;  |  92  |                 *p++ = ch;  |  93  |                 continue;  |  94  |             }  |  95  |         }  |  96  |  |  97  |         if (ch < 0xE0) {  Branch (97:13): [True: 59.8k, False: 203k]
  |  98  |             /* \xC2\x80-\xDF\xBF -- 0080-07FF */  |  99  |             Py_UCS4 ch2;  |  100  |             if (ch < 0xC2) {  Branch (100:17): [True: 561, False: 59.2k]
  |  101  |                 /* invalid sequence  |  102  |                 \x80-\xBF -- continuation byte  |  103  |                 \xC0-\xC1 -- fake 0000-007F */  |  104  |                 goto InvalidStart;  |  105  |             }  |  106  |             if (end - s < 2) {  Branch (106:17): [True: 1.20k, False: 58.0k]
  |  107  |                 /* unexpected end of data: the caller will decide whether  |  108  |                    it's an error or not */  |  109  |                 break;  |  110  |             }  |  111  |             ch2 = (unsigned char)s[1];  |  112  |             if (!IS_CONTINUATION_BYTE(ch2))  |  113  |                 /* invalid continuation byte */  |  114  |                 goto InvalidContinuation1;  |  115  |             ch = (ch << 6) + ch2 -  |  116  |                  ((0xC0 << 6) + 0x80);  |  117  |             assert ((ch > 0x007F) && (ch <= 0x07FF));  |  118  |             s += 2;  |  119  |             if (STRINGLIB_MAX_CHAR <= 0x007F ||   Branch (119:17): [Folded - Ignored]
  |  120  |                 (0 STRINGLIB_MAX_CHAR0  < 0x07FF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
  |  121  |                 /* Out-of-range */  |  122  |                 goto Return;  |  123  |             *p++ = ch;  |  124  |             continue;  |  125  |         }  |  126  |  |  127  |         if (ch < 0xF0) {  Branch (127:13): [True: 200k, False: 2.47k]
  |  128  |             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  |  129  |             Py_UCS4 ch2, ch3;  |  130  |             if (end - s < 3) {  Branch (130:17): [True: 40.2k, False: 160k]
  |  131  |                 /* unexpected end of data: the caller will decide whether  |  132  |                    it's an error or not */  |  133  |                 if (end - s < 2)   Branch (133:21): [True: 16.4k, False: 23.8k]
  |  134  |                     break;  |  135  |                 ch2 = (unsigned char)s[1];  |  136  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  137  |                     (23.7k ch2 < 0xA023.7k  ? ch == 0xE016.4k  : ch == 0xED7.32k ))   Branch (137:21): [True: 22, False: 23.7k]
  Branch (137:22): [True: 16.4k, False: 7.32k]
  |  138  |                     /* for clarification see comments below */  |  139  |                     goto InvalidContinuation1;  |  140  |                 break;  |  141  |             }  |  142  |             ch2 = (unsigned char)s[1];  |  143  |             ch3 = (unsigned char)s[2];  |  144  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  145  |                 /* invalid continuation byte */  |  146  |                 goto InvalidContinuation1;  |  147  |             }  |  148  |             if (ch == 0xE0) {  Branch (148:17): [True: 3.96k, False: 152k]
  |  149  |                 if (ch2 < 0xA0)   Branch (149:21): [True: 68, False: 3.89k]
  |  150  |                     /* invalid sequence  |  151  |                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  |  152  |                     goto InvalidContinuation1;  |  153  |             } else if (ch == 0xED && ch2 >= 0xA03.38k ) {   Branch (153:24): [True: 3.38k, False: 148k]
  Branch (153:38): [True: 399, False: 2.98k]
  |  154  |                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  |  155  |                    will result in surrogates in range D800-DFFF. Surrogates are  |  156  |                    not valid UTF-8 so they are rejected.  |  157  |                    See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf  |  158  |                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  |  159  |                 goto InvalidContinuation1;  |  160  |             }  |  161  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  162  |                 /* invalid continuation byte */  |  163  |                 goto InvalidContinuation2;  |  164  |             }  |  165  |             ch = (ch << 12) + (ch2 << 6) + ch3 -  |  166  |                  ((0xE0 << 12) + (0x80 << 6) + 0x80);  |  167  |             assert ((ch > 0x07FF) && (ch <= 0xFFFF));  |  168  |             s += 3;  |  169  |             if (STRINGLIB_MAX_CHAR <= 0x07FF ||   Branch (169:17): [Folded - Ignored]
  |  170  |                 (0 STRINGLIB_MAX_CHAR0  < 0xFFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
  |  171  |                 /* Out-of-range */  |  172  |                 goto Return;  |  173  |             *p++ = ch;  |  174  |             continue;  |  175  |         }  |  176  |  |  177  |         if (ch < 0xF5) {  Branch (177:13): [True: 2.10k, False: 368]
  |  178  |             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  |  179  |             Py_UCS4 ch2, ch3, ch4;  |  180  |             if (end - s < 4) {  Branch (180:17): [True: 465, False: 1.64k]
  |  181  |                 /* unexpected end of data: the caller will decide whether  |  182  |                    it's an error or not */  |  183  |                 if (end - s < 2)   Branch (183:21): [True: 56, False: 409]
  |  184  |                     break;  |  185  |                 ch2 = (unsigned char)s[1];  |  186  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  187  |                     (335 ch2 < 0x90335  ? ch == 0xF0114  : ch == 0xF4221 ))   Branch (187:21): [True: 18, False: 317]
  Branch (187:22): [True: 114, False: 221]
  |  188  |                     /* for clarification see comments below */  |  189  |                     goto InvalidContinuation1;  |  190  |                 if (end - s < 3)   Branch (190:21): [True: 94, False: 223]
  |  191  |                     break;  |  192  |                 ch3 = (unsigned char)s[2];  |  193  |                 if (!IS_CONTINUATION_BYTE(ch3))  |  194  |                     goto InvalidContinuation2;  |  195  |                 break;  |  196  |             }  |  197  |             ch2 = (unsigned char)s[1];  |  198  |             ch3 = (unsigned char)s[2];  |  199  |             ch4 = (unsigned char)s[3];  |  200  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  201  |                 /* invalid continuation byte */  |  202  |                 goto InvalidContinuation1;  |  203  |             }  |  204  |             if (ch == 0xF0) {  Branch (204:17): [True: 1.05k, False: 519]
  |  205  |                 if (ch2 < 0x90)   Branch (205:21): [True: 36, False: 1.01k]
  |  206  |                     /* invalid sequence  |  207  |                        \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  |  208  |                     goto InvalidContinuation1;  |  209  |             } else if (519 ch == 0xF4519  && ch2 >= 0x90265 ) {   Branch (209:24): [True: 265, False: 254]
  Branch (209:38): [True: 100, False: 165]
  |  210  |                 /* invalid sequence  |  211  |                    \xF4\x90\x80\x80- -- 110000- overflow */  |  212  |                 goto InvalidContinuation1;  |  213  |             }  |  214  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  215  |                 /* invalid continuation byte */  |  216  |                 goto InvalidContinuation2;  |  217  |             }  |  218  |             if (!IS_CONTINUATION_BYTE(ch4)) { |  219  |                 /* invalid continuation byte */  |  220  |                 goto InvalidContinuation3;  |  221  |             }  |  222  |             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  |  223  |                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  |  224  |             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  |  225  |             s += 4;  |  226  |             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||   Branch (226:17): [Folded - Ignored]
  |  227  |                 (0 STRINGLIB_MAX_CHAR0  < 0x10FFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
  |  228  |                 /* Out-of-range */  |  229  |                 goto Return;  |  230  |             *p++ = ch;  |  231  |             continue;  |  232  |         }  |  233  |         goto InvalidStart;  |  234  |     }  |  235  |     ch = 0;  |  236  | Return:  |  237  |     *inptr = s;  |  238  |     *outpos = p - dest;  |  239  |     return ch;  |  240  | InvalidStart:  |  241  |     ch = 1;  |  242  |     goto Return;  |  243  | InvalidContinuation1:  |  244  |     ch = 2;  |  245  |     goto Return;  |  246  | InvalidContinuation2:  |  247  |     ch = 3;  |  248  |     goto Return;  |  249  | InvalidContinuation3:  |  250  |     ch = 4;  |  251  |     goto Return;  |  252  | }  |  
 unicodeobject.c:ucs1lib_utf8_decode Line  | Count  | Source  |  26  | { |  27  |     Py_UCS4 ch;  |  28  |     const char *s = *inptr;  |  29  |     STRINGLIB_CHAR *p = dest + *outpos;  |  30  |  |  31  |     while (s < end) {  Branch (31:12): [True: 473k, False: 35.9k]
  |  32  |         ch = (unsigned char)*s;  |  33  |  |  34  |         if (ch < 0x80) {  Branch (34:13): [True: 440k, False: 32.2k]
  |  35  |             /* Fast path for runs of ASCII characters. Given that common UTF-8  |  36  |                input will consist of an overwhelming majority of ASCII  |  37  |                characters, we try to optimize for this case by checking  |  38  |                as many characters as a C 'size_t' can contain.  |  39  |                First, check if we can do an aligned read, as most CPUs have  |  40  |                a penalty for unaligned reads.  |  41  |             */  |  42  |             if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) { |  43  |                 /* Help register allocation */  |  44  |                 const char *_s = s;  |  45  |                 STRINGLIB_CHAR *_p = p;  |  46  |                 while (_s + SIZEOF_SIZE_T <= end) {  Branch (46:24): [True: 325k, False: 34.6k]
  |  47  |                     /* Read a whole size_t at a time (either 4 or 8 bytes),  |  48  |                        and do a fast unrolled copy if it only contains ASCII  |  49  |                        characters. */  |  50  |                     size_t value = *(const size_t *) _s;  |  51  |                     if (value & ASCII_CHAR_MASK)   Branch (51:25): [True: 13.5k, False: 312k]
  |  52  |                         break;  |  53  | #if PY_LITTLE_ENDIAN  |  54  |                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  |  55  |                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  56  |                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  57  |                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  58  | # if SIZEOF_SIZE_T == 8  |  59  |                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  60  |                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  61  |                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  62  |                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  63  | # endif  |  64  | #else  |  65  | # if SIZEOF_SIZE_T == 8  |  66  |                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  67  |                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  68  |                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  69  |                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  70  |                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  71  |                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  72  |                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  73  |                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  |  74  | # else  |  75  |                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  76  |                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  77  |                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  78  |                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  |  79  | # endif  |  80  | #endif  |  81  |                     _s += SIZEOF_SIZE_T;  |  82  |                     _p += SIZEOF_SIZE_T;  |  83  |                 }  |  84  |                 s = _s;  |  85  |                 p = _p;  |  86  |                 if (s == end)   Branch (86:21): [True: 4.43k, False: 43.7k]
  |  87  |                     break;  |  88  |                 ch = (unsigned char)*s;  |  89  |             }  |  90  |             if (ch < 0x80) {  Branch (90:17): [True: 436k, False: 83]
  |  91  |                 s++;  |  92  |                 *p++ = ch;  |  93  |                 continue;  |  94  |             }  |  95  |         }  |  96  |  |  97  |         if (ch < 0xE0) {  Branch (97:13): [True: 29.5k, False: 2.78k]
  |  98  |             /* \xC2\x80-\xDF\xBF -- 0080-07FF */  |  99  |             Py_UCS4 ch2;  |  100  |             if (ch < 0xC2) {  Branch (100:17): [True: 2, False: 29.5k]
  |  101  |                 /* invalid sequence  |  102  |                 \x80-\xBF -- continuation byte  |  103  |                 \xC0-\xC1 -- fake 0000-007F */  |  104  |                 goto InvalidStart;  |  105  |             }  |  106  |             if (end - s < 2) {  Branch (106:17): [True: 49, False: 29.4k]
  |  107  |                 /* unexpected end of data: the caller will decide whether  |  108  |                    it's an error or not */  |  109  |                 break;  |  110  |             }  |  111  |             ch2 = (unsigned char)s[1];  |  112  |             if (!IS_CONTINUATION_BYTE(ch2))  |  113  |                 /* invalid continuation byte */  |  114  |                 goto InvalidContinuation1;  |  115  |             ch = (ch << 6) + ch2 -  |  116  |                  ((0xC0 << 6) + 0x80);  |  117  |             assert ((ch > 0x007F) && (ch <= 0x07FF));  |  118  |             s += 2;  |  119  |             if (STRINGLIB_MAX_CHAR <= 0x007F ||   Branch (119:17): [Folded - Ignored]
  |  120  |                 (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 304, False: 29.1k]
  |  121  |                 /* Out-of-range */  |  122  |                 goto Return;  |  123  |             *p++ = ch;  |  124  |             continue;  |  125  |         }  |  126  |  |  127  |         if (ch < 0xF0) {  Branch (127:13): [True: 2.77k, False: 10]
  |  128  |             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  |  129  |             Py_UCS4 ch2, ch3;  |  130  |             if (end - s < 3) {  Branch (130:17): [True: 1.02k, False: 1.75k]
  |  131  |                 /* unexpected end of data: the caller will decide whether  |  132  |                    it's an error or not */  |  133  |                 if (end - s < 2)   Branch (133:21): [True: 617, False: 403]
  |  134  |                     break;  |  135  |                 ch2 = (unsigned char)s[1];  |  136  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  137  |                     (ch2 < 0xA0 ? ch == 0xE03  : ch == 0xED400 ))   Branch (137:21): [True: 0, False: 403]
  Branch (137:22): [True: 3, False: 400]
  |  138  |                     /* for clarification see comments below */  |  139  |                     goto InvalidContinuation1;  |  140  |                 break;  |  141  |             }  |  142  |             ch2 = (unsigned char)s[1];  |  143  |             ch3 = (unsigned char)s[2];  |  144  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  145  |                 /* invalid continuation byte */  |  146  |                 goto InvalidContinuation1;  |  147  |             }  |  148  |             if (ch == 0xE0) {  Branch (148:17): [True: 1.39k, False: 351]
  |  149  |                 if (ch2 < 0xA0)   Branch (149:21): [True: 0, False: 1.39k]
  |  150  |                     /* invalid sequence  |  151  |                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  |  152  |                     goto InvalidContinuation1;  |  153  |             } else if (351 ch == 0xED351  && ch2 >= 0xA01 ) {   Branch (153:24): [True: 1, False: 350]
  Branch (153:38): [True: 1, False: 0]
  |  154  |                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  |  155  |                    will result in surrogates in range D800-DFFF. Surrogates are  |  156  |                    not valid UTF-8 so they are rejected.  |  157  |                    See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf  |  158  |                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  |  159  |                 goto InvalidContinuation1;  |  160  |             }  |  161  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  162  |                 /* invalid continuation byte */  |  163  |                 goto InvalidContinuation2;  |  164  |             }  |  165  |             ch = (ch << 12) + (ch2 << 6) + ch3 -  |  166  |                  ((0xE0 << 12) + (0x80 << 6) + 0x80);  |  167  |             assert ((ch > 0x07FF) && (ch <= 0xFFFF));  |  168  |             s += 3;  |  169  |             if (STRINGLIB_MAX_CHAR <= 0x07FF ||   Branch (169:17): [Folded - Ignored]
  |  170  |                 (0 STRINGLIB_MAX_CHAR0  < 0xFFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
  |  171  |                 /* Out-of-range */  |  172  |                 goto Return;  |  173  |             *p++ = ch;  |  174  |             continue;  |  175  |         }  |  176  |  |  177  |         if (ch < 0xF5) {  Branch (177:13): [True: 5, False: 5]
  |  178  |             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  |  179  |             Py_UCS4 ch2, ch3, ch4;  |  180  |             if (end - s < 4) {  Branch (180:17): [True: 1, False: 4]
  |  181  |                 /* unexpected end of data: the caller will decide whether  |  182  |                    it's an error or not */  |  183  |                 if (end - s < 2)   Branch (183:21): [True: 0, False: 1]
  |  184  |                     break;  |  185  |                 ch2 = (unsigned char)s[1];  |  186  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  187  |                     (0 ch2 < 0x900  ? ch == 0xF00  : ch == 0xF40 ))   Branch (187:21): [True: 0, False: 0]
  Branch (187:22): [True: 0, False: 0]
  |  188  |                     /* for clarification see comments below */  |  189  |                     goto InvalidContinuation1;  |  190  |                 if (end - s < 3)   Branch (190:21): [True: 0, False: 0]
  |  191  |                     break;  |  192  |                 ch3 = (unsigned char)s[2];  |  193  |                 if (!IS_CONTINUATION_BYTE(ch3))  |  194  |                     goto InvalidContinuation2;  |  195  |                 break;  |  196  |             }  |  197  |             ch2 = (unsigned char)s[1];  |  198  |             ch3 = (unsigned char)s[2];  |  199  |             ch4 = (unsigned char)s[3];  |  200  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  201  |                 /* invalid continuation byte */  |  202  |                 goto InvalidContinuation1;  |  203  |             }  |  204  |             if (ch == 0xF0) {  Branch (204:17): [True: 4, False: 0]
  |  205  |                 if (ch2 < 0x90)   Branch (205:21): [True: 0, False: 4]
  |  206  |                     /* invalid sequence  |  207  |                        \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  |  208  |                     goto InvalidContinuation1;  |  209  |             } else if (0 ch == 0xF40  && ch2 >= 0x900 ) {   Branch (209:24): [True: 0, False: 0]
  Branch (209:38): [True: 0, False: 0]
  |  210  |                 /* invalid sequence  |  211  |                    \xF4\x90\x80\x80- -- 110000- overflow */  |  212  |                 goto InvalidContinuation1;  |  213  |             }  |  214  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  215  |                 /* invalid continuation byte */  |  216  |                 goto InvalidContinuation2;  |  217  |             }  |  218  |             if (!IS_CONTINUATION_BYTE(ch4)) { |  219  |                 /* invalid continuation byte */  |  220  |                 goto InvalidContinuation3;  |  221  |             }  |  222  |             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  |  223  |                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  |  224  |             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  |  225  |             s += 4;  |  226  |             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||   Branch (226:17): [Folded - Ignored]
  |  227  |                 (0 STRINGLIB_MAX_CHAR0  < 0x10FFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
  |  228  |                 /* Out-of-range */  |  229  |                 goto Return;  |  230  |             *p++ = ch;  |  231  |             continue;  |  232  |         }  |  233  |         goto InvalidStart;  |  234  |     }  |  235  |     ch = 0;  |  236  | Return:  |  237  |     *inptr = s;  |  238  |     *outpos = p - dest;  |  239  |     return ch;  |  240  | InvalidStart:  |  241  |     ch = 1;  |  242  |     goto Return;  |  243  | InvalidContinuation1:  |  244  |     ch = 2;  |  245  |     goto Return;  |  246  | InvalidContinuation2:  |  247  |     ch = 3;  |  248  |     goto Return;  |  249  | InvalidContinuation3:  |  250  |     ch = 4;  |  251  |     goto Return;  |  252  | }  |  
 unicodeobject.c:ucs2lib_utf8_decode Line  | Count  | Source  |  26  | { |  27  |     Py_UCS4 ch;  |  28  |     const char *s = *inptr;  |  29  |     STRINGLIB_CHAR *p = dest + *outpos;  |  30  |  |  31  |     while (s < end) {  Branch (31:12): [True: 959k, False: 46.8k]
  |  32  |         ch = (unsigned char)*s;  |  33  |  |  34  |         if (ch < 0x80) {  Branch (34:13): [True: 214k, False: 744k]
  |  35  |             /* Fast path for runs of ASCII characters. Given that common UTF-8  |  36  |                input will consist of an overwhelming majority of ASCII  |  37  |                characters, we try to optimize for this case by checking  |  38  |                as many characters as a C 'size_t' can contain.  |  39  |                First, check if we can do an aligned read, as most CPUs have  |  40  |                a penalty for unaligned reads.  |  41  |             */  |  42  |             if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) { |  43  |                 /* Help register allocation */  |  44  |                 const char *_s = s;  |  45  |                 STRINGLIB_CHAR *_p = p;  |  46  |                 while (_s + SIZEOF_SIZE_T <= end) {  Branch (46:24): [True: 523k, False: 12.1k]
  |  47  |                     /* Read a whole size_t at a time (either 4 or 8 bytes),  |  48  |                        and do a fast unrolled copy if it only contains ASCII  |  49  |                        characters. */  |  50  |                     size_t value = *(const size_t *) _s;  |  51  |                     if (value & ASCII_CHAR_MASK)   Branch (51:25): [True: 14.8k, False: 508k]
  |  52  |                         break;  |  53  | #if PY_LITTLE_ENDIAN  |  54  |                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  |  55  |                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  56  |                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  57  |                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  58  | # if SIZEOF_SIZE_T == 8  |  59  |                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  60  |                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  61  |                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  62  |                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  63  | # endif  |  64  | #else  |  65  | # if SIZEOF_SIZE_T == 8  |  66  |                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  67  |                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  68  |                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  69  |                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  70  |                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  71  |                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  72  |                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  73  |                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  |  74  | # else  |  75  |                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  76  |                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  77  |                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  78  |                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  |  79  | # endif  |  80  | #endif  |  81  |                     _s += SIZEOF_SIZE_T;  |  82  |                     _p += SIZEOF_SIZE_T;  |  83  |                 }  |  84  |                 s = _s;  |  85  |                 p = _p;  |  86  |                 if (s == end)   Branch (86:21): [True: 1.49k, False: 25.4k]
  |  87  |                     break;  |  88  |                 ch = (unsigned char)*s;  |  89  |             }  |  90  |             if (ch < 0x80) {  Branch (90:17): [True: 212k, False: 551]
  |  91  |                 s++;  |  92  |                 *p++ = ch;  |  93  |                 continue;  |  94  |             }  |  95  |         }  |  96  |  |  97  |         if (ch < 0xE0) {  Branch (97:13): [True: 21.2k, False: 724k]
  |  98  |             /* \xC2\x80-\xDF\xBF -- 0080-07FF */  |  99  |             Py_UCS4 ch2;  |  100  |             if (ch < 0xC2) {  Branch (100:17): [True: 368, False: 20.8k]
  |  101  |                 /* invalid sequence  |  102  |                 \x80-\xBF -- continuation byte  |  103  |                 \xC0-\xC1 -- fake 0000-007F */  |  104  |                 goto InvalidStart;  |  105  |             }  |  106  |             if (end - s < 2) {  Branch (106:17): [True: 778, False: 20.0k]
  |  107  |                 /* unexpected end of data: the caller will decide whether  |  108  |                    it's an error or not */  |  109  |                 break;  |  110  |             }  |  111  |             ch2 = (unsigned char)s[1];  |  112  |             if (!IS_CONTINUATION_BYTE(ch2))  |  113  |                 /* invalid continuation byte */  |  114  |                 goto InvalidContinuation1;  |  115  |             ch = (ch << 6) + ch2 -  |  116  |                  ((0xC0 << 6) + 0x80);  |  117  |             assert ((ch > 0x007F) && (ch <= 0x07FF));  |  118  |             s += 2;  |  119  |             if (STRINGLIB_MAX_CHAR <= 0x007F ||   Branch (119:17): [Folded - Ignored]
  |  120  |                 (0 STRINGLIB_MAX_CHAR0  < 0x07FF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
  |  121  |                 /* Out-of-range */  |  122  |                 goto Return;  |  123  |             *p++ = ch;  |  124  |             continue;  |  125  |         }  |  126  |  |  127  |         if (ch < 0xF0) {  Branch (127:13): [True: 718k, False: 5.56k]
  |  128  |             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  |  129  |             Py_UCS4 ch2, ch3;  |  130  |             if (end - s < 3) {  Branch (130:17): [True: 78.5k, False: 640k]
  |  131  |                 /* unexpected end of data: the caller will decide whether  |  132  |                    it's an error or not */  |  133  |                 if (end - s < 2)   Branch (133:21): [True: 41.0k, False: 37.5k]
  |  134  |                     break;  |  135  |                 ch2 = (unsigned char)s[1];  |  136  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  137  |                     (37.4k ch2 < 0xA037.4k  ? ch == 0xE028.1k  : ch == 0xED9.32k ))   Branch (137:21): [True: 0, False: 37.4k]
  Branch (137:22): [True: 28.1k, False: 9.32k]
  |  138  |                     /* for clarification see comments below */  |  139  |                     goto InvalidContinuation1;  |  140  |                 break;  |  141  |             }  |  142  |             ch2 = (unsigned char)s[1];  |  143  |             ch3 = (unsigned char)s[2];  |  144  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  145  |                 /* invalid continuation byte */  |  146  |                 goto InvalidContinuation1;  |  147  |             }  |  148  |             if (ch == 0xE0) {  Branch (148:17): [True: 7.79k, False: 632k]
  |  149  |                 if (ch2 < 0xA0)   Branch (149:21): [True: 0, False: 7.79k]
  |  150  |                     /* invalid sequence  |  151  |                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  |  152  |                     goto InvalidContinuation1;  |  153  |             } else if (ch == 0xED && ch2 >= 0xA013.4k ) {   Branch (153:24): [True: 13.4k, False: 618k]
  Branch (153:38): [True: 234, False: 13.1k]
  |  154  |                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  |  155  |                    will result in surrogates in range D800-DFFF. Surrogates are  |  156  |                    not valid UTF-8 so they are rejected.  |  157  |                    See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf  |  158  |                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  |  159  |                 goto InvalidContinuation1;  |  160  |             }  |  161  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  162  |                 /* invalid continuation byte */  |  163  |                 goto InvalidContinuation2;  |  164  |             }  |  165  |             ch = (ch << 12) + (ch2 << 6) + ch3 -  |  166  |                  ((0xE0 << 12) + (0x80 << 6) + 0x80);  |  167  |             assert ((ch > 0x07FF) && (ch <= 0xFFFF));  |  168  |             s += 3;  |  169  |             if (STRINGLIB_MAX_CHAR <= 0x07FF ||   Branch (169:17): [Folded - Ignored]
  |  170  |                 (0 STRINGLIB_MAX_CHAR0  < 0xFFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
  |  171  |                 /* Out-of-range */  |  172  |                 goto Return;  |  173  |             *p++ = ch;  |  174  |             continue;  |  175  |         }  |  176  |  |  177  |         if (ch < 0xF5) {  Branch (177:13): [True: 4.72k, False: 833]
  |  178  |             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  |  179  |             Py_UCS4 ch2, ch3, ch4;  |  180  |             if (end - s < 4) {  Branch (180:17): [True: 3.98k, False: 748]
  |  181  |                 /* unexpected end of data: the caller will decide whether  |  182  |                    it's an error or not */  |  183  |                 if (end - s < 2)   Branch (183:21): [True: 3.91k, False: 68]
  |  184  |                     break;  |  185  |                 ch2 = (unsigned char)s[1];  |  186  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  187  |                     (34 ch2 < 0x9034  ? ch == 0xF07  : ch == 0xF427 ))   Branch (187:21): [True: 0, False: 34]
  Branch (187:22): [True: 7, False: 27]
  |  188  |                     /* for clarification see comments below */  |  189  |                     goto InvalidContinuation1;  |  190  |                 if (end - s < 3)   Branch (190:21): [True: 12, False: 22]
  |  191  |                     break;  |  192  |                 ch3 = (unsigned char)s[2];  |  193  |                 if (!IS_CONTINUATION_BYTE(ch3))  |  194  |                     goto InvalidContinuation2;  |  195  |                 break;  |  196  |             }  |  197  |             ch2 = (unsigned char)s[1];  |  198  |             ch3 = (unsigned char)s[2];  |  199  |             ch4 = (unsigned char)s[3];  |  200  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  201  |                 /* invalid continuation byte */  |  202  |                 goto InvalidContinuation1;  |  203  |             }  |  204  |             if (ch == 0xF0) {  Branch (204:17): [True: 664, False: 62]
  |  205  |                 if (ch2 < 0x90)   Branch (205:21): [True: 0, False: 664]
  |  206  |                     /* invalid sequence  |  207  |                        \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  |  208  |                     goto InvalidContinuation1;  |  209  |             } else if (62 ch == 0xF462  && ch2 >= 0x9055 ) {   Branch (209:24): [True: 55, False: 7]
  Branch (209:38): [True: 0, False: 55]
  |  210  |                 /* invalid sequence  |  211  |                    \xF4\x90\x80\x80- -- 110000- overflow */  |  212  |                 goto InvalidContinuation1;  |  213  |             }  |  214  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  215  |                 /* invalid continuation byte */  |  216  |                 goto InvalidContinuation2;  |  217  |             }  |  218  |             if (!IS_CONTINUATION_BYTE(ch4)) { |  219  |                 /* invalid continuation byte */  |  220  |                 goto InvalidContinuation3;  |  221  |             }  |  222  |             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  |  223  |                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  |  224  |             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  |  225  |             s += 4;  |  226  |             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||   Branch (226:17): [Folded - Ignored]
  |  227  |                 (0 STRINGLIB_MAX_CHAR0  < 0x10FFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
  |  228  |                 /* Out-of-range */  |  229  |                 goto Return;  |  230  |             *p++ = ch;  |  231  |             continue;  |  232  |         }  |  233  |         goto InvalidStart;  |  234  |     }  |  235  |     ch = 0;  |  236  | Return:  |  237  |     *inptr = s;  |  238  |     *outpos = p - dest;  |  239  |     return ch;  |  240  | InvalidStart:  |  241  |     ch = 1;  |  242  |     goto Return;  |  243  | InvalidContinuation1:  |  244  |     ch = 2;  |  245  |     goto Return;  |  246  | InvalidContinuation2:  |  247  |     ch = 3;  |  248  |     goto Return;  |  249  | InvalidContinuation3:  |  250  |     ch = 4;  |  251  |     goto Return;  |  252  | }  |  
 unicodeobject.c:ucs4lib_utf8_decode Line  | Count  | Source  |  26  | { |  27  |     Py_UCS4 ch;  |  28  |     const char *s = *inptr;  |  29  |     STRINGLIB_CHAR *p = dest + *outpos;  |  30  |  |  31  |     while (s < end) {  Branch (31:12): [True: 1.07M, False: 2.60k]
  |  32  |         ch = (unsigned char)*s;  |  33  |  |  34  |         if (ch < 0x80) {  Branch (34:13): [True: 17.4k, False: 1.05M]
  |  35  |             /* Fast path for runs of ASCII characters. Given that common UTF-8  |  36  |                input will consist of an overwhelming majority of ASCII  |  37  |                characters, we try to optimize for this case by checking  |  38  |                as many characters as a C 'size_t' can contain.  |  39  |                First, check if we can do an aligned read, as most CPUs have  |  40  |                a penalty for unaligned reads.  |  41  |             */  |  42  |             if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) { |  43  |                 /* Help register allocation */  |  44  |                 const char *_s = s;  |  45  |                 STRINGLIB_CHAR *_p = p;  |  46  |                 while (_s + SIZEOF_SIZE_T <= end) {  Branch (46:24): [True: 87.1k, False: 1.99k]
  |  47  |                     /* Read a whole size_t at a time (either 4 or 8 bytes),  |  48  |                        and do a fast unrolled copy if it only contains ASCII  |  49  |                        characters. */  |  50  |                     size_t value = *(const size_t *) _s;  |  51  |                     if (value & ASCII_CHAR_MASK)   Branch (51:25): [True: 179, False: 86.9k]
  |  52  |                         break;  |  53  | #if PY_LITTLE_ENDIAN  |  54  |                     _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  |  55  |                     _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  56  |                     _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  57  |                     _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  58  | # if SIZEOF_SIZE_T == 8  |  59  |                     _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  60  |                     _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  61  |                     _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  62  |                     _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  63  | # endif  |  64  | #else  |  65  | # if SIZEOF_SIZE_T == 8  |  66  |                     _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  |  67  |                     _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  |  68  |                     _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  |  69  |                     _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  |  70  |                     _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  71  |                     _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  72  |                     _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  73  |                     _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  |  74  | # else  |  75  |                     _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  |  76  |                     _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  |  77  |                     _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  |  78  |                     _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  |  79  | # endif  |  80  | #endif  |  81  |                     _s += SIZEOF_SIZE_T;  |  82  |                     _p += SIZEOF_SIZE_T;  |  83  |                 }  |  84  |                 s = _s;  |  85  |                 p = _p;  |  86  |                 if (s == end)   Branch (86:21): [True: 128, False: 2.04k]
  |  87  |                     break;  |  88  |                 ch = (unsigned char)*s;  |  89  |             }  |  90  |             if (ch < 0x80) {  Branch (90:17): [True: 17.2k, False: 10]
  |  91  |                 s++;  |  92  |                 *p++ = ch;  |  93  |                 continue;  |  94  |             }  |  95  |         }  |  96  |  |  97  |         if (ch < 0xE0) {  Branch (97:13): [True: 857, False: 1.05M]
  |  98  |             /* \xC2\x80-\xDF\xBF -- 0080-07FF */  |  99  |             Py_UCS4 ch2;  |  100  |             if (ch < 0xC2) {  Branch (100:17): [True: 12, False: 845]
  |  101  |                 /* invalid sequence  |  102  |                 \x80-\xBF -- continuation byte  |  103  |                 \xC0-\xC1 -- fake 0000-007F */  |  104  |                 goto InvalidStart;  |  105  |             }  |  106  |             if (end - s < 2) {  Branch (106:17): [True: 24, False: 821]
  |  107  |                 /* unexpected end of data: the caller will decide whether  |  108  |                    it's an error or not */  |  109  |                 break;  |  110  |             }  |  111  |             ch2 = (unsigned char)s[1];  |  112  |             if (!IS_CONTINUATION_BYTE(ch2))  |  113  |                 /* invalid continuation byte */  |  114  |                 goto InvalidContinuation1;  |  115  |             ch = (ch << 6) + ch2 -  |  116  |                  ((0xC0 << 6) + 0x80);  |  117  |             assert ((ch > 0x007F) && (ch <= 0x07FF));  |  118  |             s += 2;  |  119  |             if (STRINGLIB_MAX_CHAR <= 0x007F ||   Branch (119:17): [Folded - Ignored]
  |  120  |                 (0 STRINGLIB_MAX_CHAR0  < 0x07FF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (120:18): [Folded - Ignored]
  Branch (120:49): [True: 0, False: 0]
  |  121  |                 /* Out-of-range */  |  122  |                 goto Return;  |  123  |             *p++ = ch;  |  124  |             continue;  |  125  |         }  |  126  |  |  127  |         if (ch < 0xF0) {  Branch (127:13): [True: 5.30k, False: 1.05M]
  |  128  |             /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  |  129  |             Py_UCS4 ch2, ch3;  |  130  |             if (end - s < 3) {  Branch (130:17): [True: 99, False: 5.20k]
  |  131  |                 /* unexpected end of data: the caller will decide whether  |  132  |                    it's an error or not */  |  133  |                 if (end - s < 2)   Branch (133:21): [True: 81, False: 18]
  |  134  |                     break;  |  135  |                 ch2 = (unsigned char)s[1];  |  136  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  137  |                     (ch2 < 0xA0 ? ch == 0xE00  : ch == 0xED))   Branch (137:21): [True: 0, False: 18]
  Branch (137:22): [True: 0, False: 18]
  |  138  |                     /* for clarification see comments below */  |  139  |                     goto InvalidContinuation1;  |  140  |                 break;  |  141  |             }  |  142  |             ch2 = (unsigned char)s[1];  |  143  |             ch3 = (unsigned char)s[2];  |  144  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  145  |                 /* invalid continuation byte */  |  146  |                 goto InvalidContinuation1;  |  147  |             }  |  148  |             if (ch == 0xE0) {  Branch (148:17): [True: 4, False: 5.19k]
  |  149  |                 if (ch2 < 0xA0)   Branch (149:21): [True: 0, False: 4]
  |  150  |                     /* invalid sequence  |  151  |                        \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  |  152  |                     goto InvalidContinuation1;  |  153  |             } else if (ch == 0xED && ch2 >= 0xA017 ) {   Branch (153:24): [True: 17, False: 5.18k]
  Branch (153:38): [True: 17, False: 0]
  |  154  |                 /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  |  155  |                    will result in surrogates in range D800-DFFF. Surrogates are  |  156  |                    not valid UTF-8 so they are rejected.  |  157  |                    See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf  |  158  |                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  |  159  |                 goto InvalidContinuation1;  |  160  |             }  |  161  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  162  |                 /* invalid continuation byte */  |  163  |                 goto InvalidContinuation2;  |  164  |             }  |  165  |             ch = (ch << 12) + (ch2 << 6) + ch3 -  |  166  |                  ((0xE0 << 12) + (0x80 << 6) + 0x80);  |  167  |             assert ((ch > 0x07FF) && (ch <= 0xFFFF));  |  168  |             s += 3;  |  169  |             if (STRINGLIB_MAX_CHAR <= 0x07FF ||   Branch (169:17): [Folded - Ignored]
  |  170  |                 (0 STRINGLIB_MAX_CHAR0  < 0xFFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (170:18): [Folded - Ignored]
  Branch (170:49): [True: 0, False: 0]
  |  171  |                 /* Out-of-range */  |  172  |                 goto Return;  |  173  |             *p++ = ch;  |  174  |             continue;  |  175  |         }  |  176  |  |  177  |         if (ch < 0xF5) {  Branch (177:13): [True: 1.05M, False: 0]
  |  178  |             /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  |  179  |             Py_UCS4 ch2, ch3, ch4;  |  180  |             if (end - s < 4) {  Branch (180:17): [True: 90, False: 1.05M]
  |  181  |                 /* unexpected end of data: the caller will decide whether  |  182  |                    it's an error or not */  |  183  |                 if (end - s < 2)   Branch (183:21): [True: 54, False: 36]
  |  184  |                     break;  |  185  |                 ch2 = (unsigned char)s[1];  |  186  |                 if (!IS_CONTINUATION_BYTE(ch2) ||  |  187  |                     (ch2 < 0x90 ? ch == 0xF00  : ch == 0xF4))   Branch (187:21): [True: 0, False: 36]
  Branch (187:22): [True: 0, False: 36]
  |  188  |                     /* for clarification see comments below */  |  189  |                     goto InvalidContinuation1;  |  190  |                 if (end - s < 3)   Branch (190:21): [True: 18, False: 18]
  |  191  |                     break;  |  192  |                 ch3 = (unsigned char)s[2];  |  193  |                 if (!IS_CONTINUATION_BYTE(ch3))  |  194  |                     goto InvalidContinuation2;  |  195  |                 break;  |  196  |             }  |  197  |             ch2 = (unsigned char)s[1];  |  198  |             ch3 = (unsigned char)s[2];  |  199  |             ch4 = (unsigned char)s[3];  |  200  |             if (!IS_CONTINUATION_BYTE(ch2)) { |  201  |                 /* invalid continuation byte */  |  202  |                 goto InvalidContinuation1;  |  203  |             }  |  204  |             if (ch == 0xF0) {  Branch (204:17): [True: 199k, False: 852k]
  |  205  |                 if (ch2 < 0x90)   Branch (205:21): [True: 0, False: 199k]
  |  206  |                     /* invalid sequence  |  207  |                        \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  |  208  |                     goto InvalidContinuation1;  |  209  |             } else if (ch == 0xF4 && ch2 >= 0x9066.4k ) {   Branch (209:24): [True: 66.4k, False: 786k]
  Branch (209:38): [True: 0, False: 66.4k]
  |  210  |                 /* invalid sequence  |  211  |                    \xF4\x90\x80\x80- -- 110000- overflow */  |  212  |                 goto InvalidContinuation1;  |  213  |             }  |  214  |             if (!IS_CONTINUATION_BYTE(ch3)) { |  215  |                 /* invalid continuation byte */  |  216  |                 goto InvalidContinuation2;  |  217  |             }  |  218  |             if (!IS_CONTINUATION_BYTE(ch4)) { |  219  |                 /* invalid continuation byte */  |  220  |                 goto InvalidContinuation3;  |  221  |             }  |  222  |             ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  |  223  |                  ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  |  224  |             assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  |  225  |             s += 4;  |  226  |             if (STRINGLIB_MAX_CHAR <= 0xFFFF ||   Branch (226:17): [Folded - Ignored]
  |  227  |                 (0 STRINGLIB_MAX_CHAR0  < 0x10FFFF0  && ch > 0 STRINGLIB_MAX_CHAR0 ))   Branch (227:18): [Folded - Ignored]
  Branch (227:51): [True: 0, False: 0]
  |  228  |                 /* Out-of-range */  |  229  |                 goto Return;  |  230  |             *p++ = ch;  |  231  |             continue;  |  232  |         }  |  233  |         goto InvalidStart;  |  234  |     }  |  235  |     ch = 0;  |  236  | Return:  |  237  |     *inptr = s;  |  238  |     *outpos = p - dest;  |  239  |     return ch;  |  240  | InvalidStart:  |  241  |     ch = 1;  |  242  |     goto Return;  |  243  | InvalidContinuation1:  |  244  |     ch = 2;  |  245  |     goto Return;  |  246  | InvalidContinuation2:  |  247  |     ch = 3;  |  248  |     goto Return;  |  249  | InvalidContinuation3:  |  250  |     ch = 4;  |  251  |     goto Return;  |  252  | }  |  
  | 
253  |  | 
254  | #undef ASCII_CHAR_MASK  | 
255  |  | 
256  |  | 
257  | /* UTF-8 encoder specialized for a Unicode kind to avoid the slow  | 
258  |    PyUnicode_READ() macro. Delete some parts of the code depending on the kind:  | 
259  |    UCS-1 strings don't need to handle surrogates for example. */  | 
260  | Py_LOCAL_INLINE(char *)  | 
261  | STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,  | 
262  |                         PyObject *unicode,  | 
263  |                         const STRINGLIB_CHAR *data,  | 
264  |                         Py_ssize_t size,  | 
265  |                         _Py_error_handler error_handler,  | 
266  |                         const char *errors)  | 
267  | { | 
268  |     Py_ssize_t i;                /* index into data of next input character */  | 
269  |     char *p;                     /* next free byte in output buffer */  | 
270  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
271  |     PyObject *error_handler_obj = NULL;  | 
272  |     PyObject *exc = NULL;  | 
273  |     PyObject *rep = NULL;  | 
274  | #endif  | 
275  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
276  |     const Py_ssize_t max_char_size = 2;  | 
277  | #elif STRINGLIB_SIZEOF_CHAR == 2  | 
278  |     const Py_ssize_t max_char_size = 3;  | 
279  | #else /*  STRINGLIB_SIZEOF_CHAR == 4 */  | 
280  |     const Py_ssize_t max_char_size = 4;  | 
281  | #endif  | 
282  |  | 
283  |     assert(size >= 0);  | 
284  |     if (size > PY_SSIZE_T_MAX / max_char_size) {  Branch (284:9): [True: 0, False: 76.7k]
   Branch (284:9): [True: 0, False: 155k]
   Branch (284:9): [True: 0, False: 1.34k]
  | 
285  |         /* integer overflow */  | 
286  |         PyErr_NoMemory();  | 
287  |         return NULL;  | 
288  |     }  | 
289  |  | 
290  |     _PyBytesWriter_Init(writer);  | 
291  |     p = _PyBytesWriter_Alloc(writer, size * max_char_size);  | 
292  |     if (p == NULL)   Branch (292:9): [True: 0, False: 76.7k]
   Branch (292:9): [True: 0, False: 155k]
   Branch (292:9): [True: 0, False: 1.34k]
  | 
293  |         return NULL;  | 
294  |  | 
295  |     for (i = 0; 76.7k i < size;) {   Branch (295:17): [True: 26.4M, False: 76.7k]
   Branch (295:17): [True: 3.01M, False: 154k]
   Branch (295:17): [True: 1.13M, False: 1.34k]
  | 
296  |         Py_UCS4 ch = data[i++];  | 
297  |  | 
298  |         if (ch < 0x80) {  Branch (298:13): [True: 26.3M, False: 149k]
   Branch (298:13): [True: 2.23M, False: 778k]
   Branch (298:13): [True: 11.0k, False: 1.12M]
  | 
299  |             /* Encode ASCII */  | 
300  |             *p++ = (char) ch;  | 
301  |  | 
302  |         }  | 
303  |         else  | 
304  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
305  |         if (778k ch < 0x0800)   Branch (305:13): [True: 29.3k, False: 748k]
   Branch (305:13): [True: 3.34k, False: 1.12M]
  | 
306  | #endif  | 
307  |         { | 
308  |             /* Encode Latin-1 */  | 
309  |             *p++ = (char)(0xc0 | (ch >> 6));  | 
310  |             *p++ = (char)(0x80 | (ch & 0x3f));  | 
311  |         }  | 
312  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
313  |         else if (748k Py_UNICODE_IS_SURROGATE(ch)) {   Branch (313:18): [True: 3.54k, False: 745k]
   Branch (313:18): [True: 6, False: 1.12M]
  | 
314  |             Py_ssize_t startpos, endpos, newpos;  | 
315  |             Py_ssize_t k;  | 
316  |             if (error_handler == _Py_ERROR_UNKNOWN) {  Branch (316:17): [True: 1.65k, False: 1.89k]
   Branch (316:17): [True: 6, False: 0]
  | 
317  |                 error_handler = _Py_GetErrorHandler(errors);  | 
318  |             }  | 
319  |  | 
320  |             startpos = i-1;  | 
321  |             endpos = startpos+1;  | 
322  |  | 
323  |             while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])2.28k )   Branch (323:20): [True: 2.28k, False: 1.69k]
  Branch (323:39): [True: 438, False: 1.84k]
   Branch (323:20): [True: 4, False: 2]
  Branch (323:39): [True: 0, False: 4]
  | 
324  |                 endpos++;  | 
325  |  | 
326  |             /* Only overallocate the buffer if it's not the last write */  | 
327  |             writer->overallocate = (endpos < size);  | 
328  |  | 
329  |             switch (error_handler)  | 
330  |             { | 
331  |             case _Py_ERROR_REPLACE:   Branch (331:13): [True: 4, False: 3.54k]
   Branch (331:13): [True: 0, False: 6]
  | 
332  |                 memset(p, '?', endpos - startpos);  | 
333  |                 p += (endpos - startpos);  | 
334  |                 /* fall through */  | 
335  |             case _Py_ERROR_IGNORE:   Branch (335:13): [True: 4, False: 3.54k]
   Branch (335:13): [True: 0, False: 6]
  | 
336  |                 i += (endpos - startpos - 1);  | 
337  |                 break;  | 
338  |  | 
339  |             case _Py_ERROR_SURROGATEPASS:   Branch (339:13): [True: 50, False: 3.49k]
   Branch (339:13): [True: 6, False: 0]
  | 
340  |                 for (k=startpos; k<endpos; k++60 ) {   Branch (340:34): [True: 54, False: 50]
   Branch (340:34): [True: 6, False: 6]
  | 
341  |                     ch = data[k];  | 
342  |                     *p++ = (char)(0xe0 | (ch >> 12));  | 
343  |                     *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  | 
344  |                     *p++ = (char)(0x80 | (ch & 0x3f));  | 
345  |                 }  | 
346  |                 i += (endpos - startpos - 1);  | 
347  |                 break;  | 
348  |  | 
349  |             case _Py_ERROR_BACKSLASHREPLACE:   Branch (349:13): [True: 4, False: 3.54k]
   Branch (349:13): [True: 0, False: 6]
  | 
350  |                 /* subtract preallocated bytes */  | 
351  |                 writer->min_size -= max_char_size * (endpos - startpos);  | 
352  |                 p = backslashreplace(writer, p,  | 
353  |                                      unicode, startpos, endpos);  | 
354  |                 if (p == NULL)   Branch (354:21): [True: 0, False: 4]
   Branch (354:21): [True: 0, False: 0]
  | 
355  |                     goto error;  | 
356  |                 i += (endpos - startpos - 1);  | 
357  |                 break;  | 
358  |  | 
359  |             case _Py_ERROR_XMLCHARREFREPLACE:   Branch (359:13): [True: 2, False: 3.54k]
   Branch (359:13): [True: 0, False: 6]
  | 
360  |                 /* subtract preallocated bytes */  | 
361  |                 writer->min_size -= max_char_size * (endpos - startpos);  | 
362  |                 p = xmlcharrefreplace(writer, p,  | 
363  |                                       unicode, startpos, endpos);  | 
364  |                 if (p == NULL)   Branch (364:21): [True: 0, False: 2]
   Branch (364:21): [True: 0, False: 0]
  | 
365  |                     goto error;  | 
366  |                 i += (endpos - startpos - 1);  | 
367  |                 break;  | 
368  |  | 
369  |             case _Py_ERROR_SURROGATEESCAPE:   Branch (369:13): [True: 3.10k, False: 444]
   Branch (369:13): [True: 0, False: 6]
  | 
370  |                 for (k=startpos; k<endpos; k++3.10k ) {   Branch (370:34): [True: 3.15k, False: 3.04k]
   Branch (370:34): [True: 0, False: 0]
  | 
371  |                     ch = data[k];  | 
372  |                     if (!(0xDC80 <= ch && ch <= 0xDCFF3.14k ))   Branch (372:27): [True: 3.14k, False: 10]
  Branch (372:43): [True: 3.10k, False: 46]
   Branch (372:27): [True: 0, False: 0]
  Branch (372:43): [True: 0, False: 0]
  | 
373  |                         break;  | 
374  |                     *p++ = (char)(ch & 0xff);  | 
375  |                 }  | 
376  |                 if (k >= endpos) {  Branch (376:21): [True: 3.04k, False: 56]
   Branch (376:21): [True: 0, False: 0]
  | 
377  |                     i += (endpos - startpos - 1);  | 
378  |                     break;  | 
379  |                 }  | 
380  |                 startpos = k;  | 
381  |                 assert(startpos < endpos);  | 
382  |                 /* fall through */  | 
383  |             default:   Branch (383:13): [True: 380, False: 3.16k]
   Branch (383:13): [True: 0, False: 6]
  | 
384  |                 rep = unicode_encode_call_errorhandler(  | 
385  |                       errors, &error_handler_obj, "utf-8", "surrogates not allowed",  | 
386  |                       unicode, &exc, startpos, endpos, &newpos);  | 
387  |                 if (!rep)   Branch (387:21): [True: 379, False: 57]
   Branch (387:21): [True: 0, False: 0]
  | 
388  |                     goto error;  | 
389  |  | 
390  |                 if (newpos < startpos) {  Branch (390:21): [True: 51, False: 6]
   Branch (390:21): [True: 0, False: 0]
  | 
391  |                     writer->overallocate = 1;  | 
392  |                     p = _PyBytesWriter_Prepare(writer, p,  | 
393  |                                                max_char_size * (startpos - newpos));  | 
394  |                     if (p == NULL)   Branch (394:25): [True: 0, False: 51]
   Branch (394:25): [True: 0, False: 0]
  | 
395  |                         goto error;  | 
396  |                 }  | 
397  |                 else { | 
398  |                     /* subtract preallocated bytes */  | 
399  |                     writer->min_size -= max_char_size * (newpos - startpos);  | 
400  |                     /* Only overallocate the buffer if it's not the last write */  | 
401  |                     writer->overallocate = (newpos < size);  | 
402  |                 }  | 
403  |  | 
404  |                 if (PyBytes_Check(rep)) { | 
405  |                     p = _PyBytesWriter_WriteBytes(writer, p,  | 
406  |                                                   PyBytes_AS_STRING(rep),  | 
407  |                                                   PyBytes_GET_SIZE(rep));  | 
408  |                 }  | 
409  |                 else { | 
410  |                     /* rep is unicode */  | 
411  |                     if (PyUnicode_READY(rep) < 0)   Branch (411:25): [True: 0, False: 56]
   Branch (411:25): [True: 0, False: 0]
  | 
412  |                         goto error;  | 
413  |  | 
414  |                     if (!PyUnicode_IS_ASCII(rep)) {  Branch (414:25): [True: 3, False: 53]
   Branch (414:25): [True: 0, False: 0]
  | 
415  |                         raise_encode_exception(&exc, "utf-8", unicode,  | 
416  |                                                startpos, endpos,  | 
417  |                                                "surrogates not allowed");  | 
418  |                         goto error;  | 
419  |                     }  | 
420  |  | 
421  |                     p = _PyBytesWriter_WriteBytes(writer, p,  | 
422  |                                                   PyUnicode_DATA(rep),  | 
423  |                                                   PyUnicode_GET_LENGTH(rep));  | 
424  |                 }  | 
425  |  | 
426  |                 if (p == NULL)   Branch (426:21): [True: 0, False: 54]
   Branch (426:21): [True: 0, False: 0]
  | 
427  |                     goto error;  | 
428  |                 Py_CLEAR(rep);  | 
429  |  | 
430  |                 i = newpos;  | 
431  |             }  | 
432  |  | 
433  |             /* If overallocation was disabled, ensure that it was the last  | 
434  |                write. Otherwise, we missed an optimization */  | 
435  |             assert(writer->overallocate || i == size);  | 
436  |         }  | 
437  |         else  | 
438  | #if STRINGLIB_SIZEOF_CHAR > 2  | 
439  |         if (ch < 0x10000)   Branch (439:13): [True: 75.0k, False: 1.05M]
  | 
440  | #endif  | 
441  |         { | 
442  |             *p++ = (char)(0xe0 | (ch >> 12));  | 
443  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  | 
444  |             *p++ = (char)(0x80 | (ch & 0x3f));  | 
445  |         }  | 
446  | #if STRINGLIB_SIZEOF_CHAR > 2  | 
447  |         else /* ch >= 0x10000 */  | 
448  |         { | 
449  |             assert(ch <= MAX_UNICODE);  | 
450  |             /* Encode UCS4 Unicode ordinals */  | 
451  |             *p++ = (char)(0xf0 | (ch >> 18));  | 
452  |             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));  | 
453  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  | 
454  |             *p++ = (char)(0x80 | (ch & 0x3f));  | 
455  |         }  | 
456  | #endif /* STRINGLIB_SIZEOF_CHAR > 2 */  | 
457  | #endif /* STRINGLIB_SIZEOF_CHAR > 1 */  | 
458  |     }  | 
459  |  | 
460  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
461  |     Py_XDECREF(error_handler_obj);  | 
462  |     Py_XDECREF(exc);  | 
463  | #endif  | 
464  |     return p;  | 
465  |  | 
466  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
467  |  error:  | 
468  |     Py_XDECREF(rep);  | 
469  |     Py_XDECREF(error_handler_obj);  | 
470  |     Py_XDECREF(exc);  | 
471  |     return NULL;  | 
472  | #endif  | 
473  | } unicodeobject.c:ucs1lib_utf8_encoder Line  | Count  | Source  |  267  | { |  268  |     Py_ssize_t i;                /* index into data of next input character */  |  269  |     char *p;                     /* next free byte in output buffer */  |  270  | #if STRINGLIB_SIZEOF_CHAR > 1  |  271  |     PyObject *error_handler_obj = NULL;  |  272  |     PyObject *exc = NULL;  |  273  |     PyObject *rep = NULL;  |  274  | #endif  |  275  | #if STRINGLIB_SIZEOF_CHAR == 1  |  276  |     const Py_ssize_t max_char_size = 2;  |  277  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  278  |     const Py_ssize_t max_char_size = 3;  |  279  | #else /*  STRINGLIB_SIZEOF_CHAR == 4 */  |  280  |     const Py_ssize_t max_char_size = 4;  |  281  | #endif  |  282  |  |  283  |     assert(size >= 0);  |  284  |     if (size > PY_SSIZE_T_MAX / max_char_size) {  Branch (284:9): [True: 0, False: 76.7k]
  |  285  |         /* integer overflow */  |  286  |         PyErr_NoMemory();  |  287  |         return NULL;  |  288  |     }  |  289  |  |  290  |     _PyBytesWriter_Init(writer);  |  291  |     p = _PyBytesWriter_Alloc(writer, size * max_char_size);  |  292  |     if (p == NULL)   Branch (292:9): [True: 0, False: 76.7k]
  |  293  |         return NULL;  |  294  |  |  295  |     for (i = 0; 76.7k i < size;) {   Branch (295:17): [True: 26.4M, False: 76.7k]
  |  296  |         Py_UCS4 ch = data[i++];  |  297  |  |  298  |         if (ch < 0x80) {  Branch (298:13): [True: 26.3M, False: 149k]
  |  299  |             /* Encode ASCII */  |  300  |             *p++ = (char) ch;  |  301  |  |  302  |         }  |  303  |         else  |  304  | #if STRINGLIB_SIZEOF_CHAR > 1  |  305  |         if (ch < 0x0800)  |  306  | #endif  |  307  |         { |  308  |             /* Encode Latin-1 */  |  309  |             *p++ = (char)(0xc0 | (ch >> 6));  |  310  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  311  |         }  |  312  | #if STRINGLIB_SIZEOF_CHAR > 1  |  313  |         else if (Py_UNICODE_IS_SURROGATE(ch)) { |  314  |             Py_ssize_t startpos, endpos, newpos;  |  315  |             Py_ssize_t k;  |  316  |             if (error_handler == _Py_ERROR_UNKNOWN) { |  317  |                 error_handler = _Py_GetErrorHandler(errors);  |  318  |             }  |  319  |  |  320  |             startpos = i-1;  |  321  |             endpos = startpos+1;  |  322  |  |  323  |             while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))  |  324  |                 endpos++;  |  325  |  |  326  |             /* Only overallocate the buffer if it's not the last write */  |  327  |             writer->overallocate = (endpos < size);  |  328  |  |  329  |             switch (error_handler)  |  330  |             { |  331  |             case _Py_ERROR_REPLACE:  |  332  |                 memset(p, '?', endpos - startpos);  |  333  |                 p += (endpos - startpos);  |  334  |                 /* fall through */  |  335  |             case _Py_ERROR_IGNORE:  |  336  |                 i += (endpos - startpos - 1);  |  337  |                 break;  |  338  |  |  339  |             case _Py_ERROR_SURROGATEPASS:  |  340  |                 for (k=startpos; k<endpos; k++) { |  341  |                     ch = data[k];  |  342  |                     *p++ = (char)(0xe0 | (ch >> 12));  |  343  |                     *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  344  |                     *p++ = (char)(0x80 | (ch & 0x3f));  |  345  |                 }  |  346  |                 i += (endpos - startpos - 1);  |  347  |                 break;  |  348  |  |  349  |             case _Py_ERROR_BACKSLASHREPLACE:  |  350  |                 /* subtract preallocated bytes */  |  351  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  352  |                 p = backslashreplace(writer, p,  |  353  |                                      unicode, startpos, endpos);  |  354  |                 if (p == NULL)  |  355  |                     goto error;  |  356  |                 i += (endpos - startpos - 1);  |  357  |                 break;  |  358  |  |  359  |             case _Py_ERROR_XMLCHARREFREPLACE:  |  360  |                 /* subtract preallocated bytes */  |  361  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  362  |                 p = xmlcharrefreplace(writer, p,  |  363  |                                       unicode, startpos, endpos);  |  364  |                 if (p == NULL)  |  365  |                     goto error;  |  366  |                 i += (endpos - startpos - 1);  |  367  |                 break;  |  368  |  |  369  |             case _Py_ERROR_SURROGATEESCAPE:  |  370  |                 for (k=startpos; k<endpos; k++) { |  371  |                     ch = data[k];  |  372  |                     if (!(0xDC80 <= ch && ch <= 0xDCFF))  |  373  |                         break;  |  374  |                     *p++ = (char)(ch & 0xff);  |  375  |                 }  |  376  |                 if (k >= endpos) { |  377  |                     i += (endpos - startpos - 1);  |  378  |                     break;  |  379  |                 }  |  380  |                 startpos = k;  |  381  |                 assert(startpos < endpos);  |  382  |                 /* fall through */  |  383  |             default:  |  384  |                 rep = unicode_encode_call_errorhandler(  |  385  |                       errors, &error_handler_obj, "utf-8", "surrogates not allowed",  |  386  |                       unicode, &exc, startpos, endpos, &newpos);  |  387  |                 if (!rep)  |  388  |                     goto error;  |  389  |  |  390  |                 if (newpos < startpos) { |  391  |                     writer->overallocate = 1;  |  392  |                     p = _PyBytesWriter_Prepare(writer, p,  |  393  |                                                max_char_size * (startpos - newpos));  |  394  |                     if (p == NULL)  |  395  |                         goto error;  |  396  |                 }  |  397  |                 else { |  398  |                     /* subtract preallocated bytes */  |  399  |                     writer->min_size -= max_char_size * (newpos - startpos);  |  400  |                     /* Only overallocate the buffer if it's not the last write */  |  401  |                     writer->overallocate = (newpos < size);  |  402  |                 }  |  403  |  |  404  |                 if (PyBytes_Check(rep)) { |  405  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  406  |                                                   PyBytes_AS_STRING(rep),  |  407  |                                                   PyBytes_GET_SIZE(rep));  |  408  |                 }  |  409  |                 else { |  410  |                     /* rep is unicode */  |  411  |                     if (PyUnicode_READY(rep) < 0)  |  412  |                         goto error;  |  413  |  |  414  |                     if (!PyUnicode_IS_ASCII(rep)) { |  415  |                         raise_encode_exception(&exc, "utf-8", unicode,  |  416  |                                                startpos, endpos,  |  417  |                                                "surrogates not allowed");  |  418  |                         goto error;  |  419  |                     }  |  420  |  |  421  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  422  |                                                   PyUnicode_DATA(rep),  |  423  |                                                   PyUnicode_GET_LENGTH(rep));  |  424  |                 }  |  425  |  |  426  |                 if (p == NULL)  |  427  |                     goto error;  |  428  |                 Py_CLEAR(rep);  |  429  |  |  430  |                 i = newpos;  |  431  |             }  |  432  |  |  433  |             /* If overallocation was disabled, ensure that it was the last  |  434  |                write. Otherwise, we missed an optimization */  |  435  |             assert(writer->overallocate || i == size);  |  436  |         }  |  437  |         else  |  438  | #if STRINGLIB_SIZEOF_CHAR > 2  |  439  |         if (ch < 0x10000)  |  440  | #endif  |  441  |         { |  442  |             *p++ = (char)(0xe0 | (ch >> 12));  |  443  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  444  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  445  |         }  |  446  | #if STRINGLIB_SIZEOF_CHAR > 2  |  447  |         else /* ch >= 0x10000 */  |  448  |         { |  449  |             assert(ch <= MAX_UNICODE);  |  450  |             /* Encode UCS4 Unicode ordinals */  |  451  |             *p++ = (char)(0xf0 | (ch >> 18));  |  452  |             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));  |  453  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  454  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  455  |         }  |  456  | #endif /* STRINGLIB_SIZEOF_CHAR > 2 */  |  457  | #endif /* STRINGLIB_SIZEOF_CHAR > 1 */  |  458  |     }  |  459  |  |  460  | #if STRINGLIB_SIZEOF_CHAR > 1  |  461  |     Py_XDECREF(error_handler_obj);  |  462  |     Py_XDECREF(exc);  |  463  | #endif  |  464  |     return p;  |  465  |  |  466  | #if STRINGLIB_SIZEOF_CHAR > 1  |  467  |  error:  |  468  |     Py_XDECREF(rep);  |  469  |     Py_XDECREF(error_handler_obj);  |  470  |     Py_XDECREF(exc);  |  471  |     return NULL;  |  472  | #endif  |  473  | }  |  
 unicodeobject.c:ucs2lib_utf8_encoder Line  | Count  | Source  |  267  | { |  268  |     Py_ssize_t i;                /* index into data of next input character */  |  269  |     char *p;                     /* next free byte in output buffer */  |  270  | #if STRINGLIB_SIZEOF_CHAR > 1  |  271  |     PyObject *error_handler_obj = NULL;  |  272  |     PyObject *exc = NULL;  |  273  |     PyObject *rep = NULL;  |  274  | #endif  |  275  | #if STRINGLIB_SIZEOF_CHAR == 1  |  276  |     const Py_ssize_t max_char_size = 2;  |  277  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  278  |     const Py_ssize_t max_char_size = 3;  |  279  | #else /*  STRINGLIB_SIZEOF_CHAR == 4 */  |  280  |     const Py_ssize_t max_char_size = 4;  |  281  | #endif  |  282  |  |  283  |     assert(size >= 0);  |  284  |     if (size > PY_SSIZE_T_MAX / max_char_size) {  Branch (284:9): [True: 0, False: 155k]
  |  285  |         /* integer overflow */  |  286  |         PyErr_NoMemory();  |  287  |         return NULL;  |  288  |     }  |  289  |  |  290  |     _PyBytesWriter_Init(writer);  |  291  |     p = _PyBytesWriter_Alloc(writer, size * max_char_size);  |  292  |     if (p == NULL)   Branch (292:9): [True: 0, False: 155k]
  |  293  |         return NULL;  |  294  |  |  295  |     for (i = 0; 155k i < size;) {   Branch (295:17): [True: 3.01M, False: 154k]
  |  296  |         Py_UCS4 ch = data[i++];  |  297  |  |  298  |         if (ch < 0x80) {  Branch (298:13): [True: 2.23M, False: 778k]
  |  299  |             /* Encode ASCII */  |  300  |             *p++ = (char) ch;  |  301  |  |  302  |         }  |  303  |         else  |  304  | #if STRINGLIB_SIZEOF_CHAR > 1  |  305  |         if (ch < 0x0800)   Branch (305:13): [True: 29.3k, False: 748k]
  |  306  | #endif  |  307  |         { |  308  |             /* Encode Latin-1 */  |  309  |             *p++ = (char)(0xc0 | (ch >> 6));  |  310  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  311  |         }  |  312  | #if STRINGLIB_SIZEOF_CHAR > 1  |  313  |         else if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (313:18): [True: 3.54k, False: 745k]
  |  314  |             Py_ssize_t startpos, endpos, newpos;  |  315  |             Py_ssize_t k;  |  316  |             if (error_handler == _Py_ERROR_UNKNOWN) {  Branch (316:17): [True: 1.65k, False: 1.89k]
  |  317  |                 error_handler = _Py_GetErrorHandler(errors);  |  318  |             }  |  319  |  |  320  |             startpos = i-1;  |  321  |             endpos = startpos+1;  |  322  |  |  323  |             while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])2.28k )   Branch (323:20): [True: 2.28k, False: 1.69k]
  Branch (323:39): [True: 438, False: 1.84k]
  |  324  |                 endpos++;  |  325  |  |  326  |             /* Only overallocate the buffer if it's not the last write */  |  327  |             writer->overallocate = (endpos < size);  |  328  |  |  329  |             switch (error_handler)  |  330  |             { |  331  |             case _Py_ERROR_REPLACE:   Branch (331:13): [True: 4, False: 3.54k]
  |  332  |                 memset(p, '?', endpos - startpos);  |  333  |                 p += (endpos - startpos);  |  334  |                 /* fall through */  |  335  |             case _Py_ERROR_IGNORE:   Branch (335:13): [True: 4, False: 3.54k]
  |  336  |                 i += (endpos - startpos - 1);  |  337  |                 break;  |  338  |  |  339  |             case _Py_ERROR_SURROGATEPASS:   Branch (339:13): [True: 50, False: 3.49k]
  |  340  |                 for (k=startpos; k<endpos; k++54 ) {   Branch (340:34): [True: 54, False: 50]
  |  341  |                     ch = data[k];  |  342  |                     *p++ = (char)(0xe0 | (ch >> 12));  |  343  |                     *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  344  |                     *p++ = (char)(0x80 | (ch & 0x3f));  |  345  |                 }  |  346  |                 i += (endpos - startpos - 1);  |  347  |                 break;  |  348  |  |  349  |             case _Py_ERROR_BACKSLASHREPLACE:   Branch (349:13): [True: 4, False: 3.54k]
  |  350  |                 /* subtract preallocated bytes */  |  351  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  352  |                 p = backslashreplace(writer, p,  |  353  |                                      unicode, startpos, endpos);  |  354  |                 if (p == NULL)   Branch (354:21): [True: 0, False: 4]
  |  355  |                     goto error;  |  356  |                 i += (endpos - startpos - 1);  |  357  |                 break;  |  358  |  |  359  |             case _Py_ERROR_XMLCHARREFREPLACE:   Branch (359:13): [True: 2, False: 3.54k]
  |  360  |                 /* subtract preallocated bytes */  |  361  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  362  |                 p = xmlcharrefreplace(writer, p,  |  363  |                                       unicode, startpos, endpos);  |  364  |                 if (p == NULL)   Branch (364:21): [True: 0, False: 2]
  |  365  |                     goto error;  |  366  |                 i += (endpos - startpos - 1);  |  367  |                 break;  |  368  |  |  369  |             case _Py_ERROR_SURROGATEESCAPE:   Branch (369:13): [True: 3.10k, False: 444]
  |  370  |                 for (k=startpos; k<endpos; k++3.10k ) {   Branch (370:34): [True: 3.15k, False: 3.04k]
  |  371  |                     ch = data[k];  |  372  |                     if (!(0xDC80 <= ch && ch <= 0xDCFF3.14k ))   Branch (372:27): [True: 3.14k, False: 10]
  Branch (372:43): [True: 3.10k, False: 46]
  |  373  |                         break;  |  374  |                     *p++ = (char)(ch & 0xff);  |  375  |                 }  |  376  |                 if (k >= endpos) {  Branch (376:21): [True: 3.04k, False: 56]
  |  377  |                     i += (endpos - startpos - 1);  |  378  |                     break;  |  379  |                 }  |  380  |                 startpos = k;  |  381  |                 assert(startpos < endpos);  |  382  |                 /* fall through */  |  383  |             default:   Branch (383:13): [True: 380, False: 3.16k]
  |  384  |                 rep = unicode_encode_call_errorhandler(  |  385  |                       errors, &error_handler_obj, "utf-8", "surrogates not allowed",  |  386  |                       unicode, &exc, startpos, endpos, &newpos);  |  387  |                 if (!rep)   Branch (387:21): [True: 379, False: 57]
  |  388  |                     goto error;  |  389  |  |  390  |                 if (newpos < startpos) {  Branch (390:21): [True: 51, False: 6]
  |  391  |                     writer->overallocate = 1;  |  392  |                     p = _PyBytesWriter_Prepare(writer, p,  |  393  |                                                max_char_size * (startpos - newpos));  |  394  |                     if (p == NULL)   Branch (394:25): [True: 0, False: 51]
  |  395  |                         goto error;  |  396  |                 }  |  397  |                 else { |  398  |                     /* subtract preallocated bytes */  |  399  |                     writer->min_size -= max_char_size * (newpos - startpos);  |  400  |                     /* Only overallocate the buffer if it's not the last write */  |  401  |                     writer->overallocate = (newpos < size);  |  402  |                 }  |  403  |  |  404  |                 if (PyBytes_Check(rep)) { |  405  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  406  |                                                   PyBytes_AS_STRING(rep),  |  407  |                                                   PyBytes_GET_SIZE(rep));  |  408  |                 }  |  409  |                 else { |  410  |                     /* rep is unicode */  |  411  |                     if (PyUnicode_READY(rep) < 0)   Branch (411:25): [True: 0, False: 56]
  |  412  |                         goto error;  |  413  |  |  414  |                     if (!PyUnicode_IS_ASCII(rep)) {  Branch (414:25): [True: 3, False: 53]
  |  415  |                         raise_encode_exception(&exc, "utf-8", unicode,  |  416  |                                                startpos, endpos,  |  417  |                                                "surrogates not allowed");  |  418  |                         goto error;  |  419  |                     }  |  420  |  |  421  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  422  |                                                   PyUnicode_DATA(rep),  |  423  |                                                   PyUnicode_GET_LENGTH(rep));  |  424  |                 }  |  425  |  |  426  |                 if (p == NULL)   Branch (426:21): [True: 0, False: 54]
  |  427  |                     goto error;  |  428  |                 Py_CLEAR(rep);  |  429  |  |  430  |                 i = newpos;  |  431  |             }  |  432  |  |  433  |             /* If overallocation was disabled, ensure that it was the last  |  434  |                write. Otherwise, we missed an optimization */  |  435  |             assert(writer->overallocate || i == size);  |  436  |         }  |  437  |         else  |  438  | #if STRINGLIB_SIZEOF_CHAR > 2  |  439  |         if (ch < 0x10000)  |  440  | #endif  |  441  |         { |  442  |             *p++ = (char)(0xe0 | (ch >> 12));  |  443  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  444  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  445  |         }  |  446  | #if STRINGLIB_SIZEOF_CHAR > 2  |  447  |         else /* ch >= 0x10000 */  |  448  |         { |  449  |             assert(ch <= MAX_UNICODE);  |  450  |             /* Encode UCS4 Unicode ordinals */  |  451  |             *p++ = (char)(0xf0 | (ch >> 18));  |  452  |             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));  |  453  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  454  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  455  |         }  |  456  | #endif /* STRINGLIB_SIZEOF_CHAR > 2 */  |  457  | #endif /* STRINGLIB_SIZEOF_CHAR > 1 */  |  458  |     }  |  459  |  |  460  | #if STRINGLIB_SIZEOF_CHAR > 1  |  461  |     Py_XDECREF(error_handler_obj);  |  462  |     Py_XDECREF(exc);  |  463  | #endif  |  464  |     return p;  |  465  |  |  466  | #if STRINGLIB_SIZEOF_CHAR > 1  |  467  |  error:  |  468  |     Py_XDECREF(rep);  |  469  |     Py_XDECREF(error_handler_obj);  |  470  |     Py_XDECREF(exc);  |  471  |     return NULL;  |  472  | #endif  |  473  | }  |  
 unicodeobject.c:ucs4lib_utf8_encoder Line  | Count  | Source  |  267  | { |  268  |     Py_ssize_t i;                /* index into data of next input character */  |  269  |     char *p;                     /* next free byte in output buffer */  |  270  | #if STRINGLIB_SIZEOF_CHAR > 1  |  271  |     PyObject *error_handler_obj = NULL;  |  272  |     PyObject *exc = NULL;  |  273  |     PyObject *rep = NULL;  |  274  | #endif  |  275  | #if STRINGLIB_SIZEOF_CHAR == 1  |  276  |     const Py_ssize_t max_char_size = 2;  |  277  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  278  |     const Py_ssize_t max_char_size = 3;  |  279  | #else /*  STRINGLIB_SIZEOF_CHAR == 4 */  |  280  |     const Py_ssize_t max_char_size = 4;  |  281  | #endif  |  282  |  |  283  |     assert(size >= 0);  |  284  |     if (size > PY_SSIZE_T_MAX / max_char_size) {  Branch (284:9): [True: 0, False: 1.34k]
  |  285  |         /* integer overflow */  |  286  |         PyErr_NoMemory();  |  287  |         return NULL;  |  288  |     }  |  289  |  |  290  |     _PyBytesWriter_Init(writer);  |  291  |     p = _PyBytesWriter_Alloc(writer, size * max_char_size);  |  292  |     if (p == NULL)   Branch (292:9): [True: 0, False: 1.34k]
  |  293  |         return NULL;  |  294  |  |  295  |     for (i = 0; 1.34k i < size;) {   Branch (295:17): [True: 1.13M, False: 1.34k]
  |  296  |         Py_UCS4 ch = data[i++];  |  297  |  |  298  |         if (ch < 0x80) {  Branch (298:13): [True: 11.0k, False: 1.12M]
  |  299  |             /* Encode ASCII */  |  300  |             *p++ = (char) ch;  |  301  |  |  302  |         }  |  303  |         else  |  304  | #if STRINGLIB_SIZEOF_CHAR > 1  |  305  |         if (ch < 0x0800)   Branch (305:13): [True: 3.34k, False: 1.12M]
  |  306  | #endif  |  307  |         { |  308  |             /* Encode Latin-1 */  |  309  |             *p++ = (char)(0xc0 | (ch >> 6));  |  310  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  311  |         }  |  312  | #if STRINGLIB_SIZEOF_CHAR > 1  |  313  |         else if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (313:18): [True: 6, False: 1.12M]
  |  314  |             Py_ssize_t startpos, endpos, newpos;  |  315  |             Py_ssize_t k;  |  316  |             if (error_handler == _Py_ERROR_UNKNOWN) {  Branch (316:17): [True: 6, False: 0]
  |  317  |                 error_handler = _Py_GetErrorHandler(errors);  |  318  |             }  |  319  |  |  320  |             startpos = i-1;  |  321  |             endpos = startpos+1;  |  322  |  |  323  |             while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])4 )   Branch (323:20): [True: 4, False: 2]
  Branch (323:39): [True: 0, False: 4]
  |  324  |                 endpos++;  |  325  |  |  326  |             /* Only overallocate the buffer if it's not the last write */  |  327  |             writer->overallocate = (endpos < size);  |  328  |  |  329  |             switch (error_handler)  |  330  |             { |  331  |             case _Py_ERROR_REPLACE:   Branch (331:13): [True: 0, False: 6]
  |  332  |                 memset(p, '?', endpos - startpos);  |  333  |                 p += (endpos - startpos);  |  334  |                 /* fall through */  |  335  |             case _Py_ERROR_IGNORE:   Branch (335:13): [True: 0, False: 6]
  |  336  |                 i += (endpos - startpos - 1);  |  337  |                 break;  |  338  |  |  339  |             case _Py_ERROR_SURROGATEPASS:   Branch (339:13): [True: 6, False: 0]
  |  340  |                 for (k=startpos; k<endpos; k++6 ) {   Branch (340:34): [True: 6, False: 6]
  |  341  |                     ch = data[k];  |  342  |                     *p++ = (char)(0xe0 | (ch >> 12));  |  343  |                     *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  344  |                     *p++ = (char)(0x80 | (ch & 0x3f));  |  345  |                 }  |  346  |                 i += (endpos - startpos - 1);  |  347  |                 break;  |  348  |  |  349  |             case _Py_ERROR_BACKSLASHREPLACE:   Branch (349:13): [True: 0, False: 6]
  |  350  |                 /* subtract preallocated bytes */  |  351  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  352  |                 p = backslashreplace(writer, p,  |  353  |                                      unicode, startpos, endpos);  |  354  |                 if (p == NULL)   Branch (354:21): [True: 0, False: 0]
  |  355  |                     goto error;  |  356  |                 i += (endpos - startpos - 1);  |  357  |                 break;  |  358  |  |  359  |             case _Py_ERROR_XMLCHARREFREPLACE:   Branch (359:13): [True: 0, False: 6]
  |  360  |                 /* subtract preallocated bytes */  |  361  |                 writer->min_size -= max_char_size * (endpos - startpos);  |  362  |                 p = xmlcharrefreplace(writer, p,  |  363  |                                       unicode, startpos, endpos);  |  364  |                 if (p == NULL)   Branch (364:21): [True: 0, False: 0]
  |  365  |                     goto error;  |  366  |                 i += (endpos - startpos - 1);  |  367  |                 break;  |  368  |  |  369  |             case _Py_ERROR_SURROGATEESCAPE:   Branch (369:13): [True: 0, False: 6]
  |  370  |                 for (k=startpos; k<endpos; k++) {  Branch (370:34): [True: 0, False: 0]
  |  371  |                     ch = data[k];  |  372  |                     if (!(0xDC80 <= ch && ch <= 0xDCFF))   Branch (372:27): [True: 0, False: 0]
  Branch (372:43): [True: 0, False: 0]
  |  373  |                         break;  |  374  |                     *p++ = (char)(ch & 0xff);  |  375  |                 }  |  376  |                 if (k >= endpos) {  Branch (376:21): [True: 0, False: 0]
  |  377  |                     i += (endpos - startpos - 1);  |  378  |                     break;  |  379  |                 }  |  380  |                 startpos = k;  |  381  |                 assert(startpos < endpos);  |  382  |                 /* fall through */  |  383  |             default:   Branch (383:13): [True: 0, False: 6]
  |  384  |                 rep = unicode_encode_call_errorhandler(  |  385  |                       errors, &error_handler_obj, "utf-8", "surrogates not allowed",  |  386  |                       unicode, &exc, startpos, endpos, &newpos);  |  387  |                 if (!rep)   Branch (387:21): [True: 0, False: 0]
  |  388  |                     goto error;  |  389  |  |  390  |                 if (newpos < startpos) {  Branch (390:21): [True: 0, False: 0]
  |  391  |                     writer->overallocate = 1;  |  392  |                     p = _PyBytesWriter_Prepare(writer, p,  |  393  |                                                max_char_size * (startpos - newpos));  |  394  |                     if (p == NULL)   Branch (394:25): [True: 0, False: 0]
  |  395  |                         goto error;  |  396  |                 }  |  397  |                 else { |  398  |                     /* subtract preallocated bytes */  |  399  |                     writer->min_size -= max_char_size * (newpos - startpos);  |  400  |                     /* Only overallocate the buffer if it's not the last write */  |  401  |                     writer->overallocate = (newpos < size);  |  402  |                 }  |  403  |  |  404  |                 if (PyBytes_Check(rep)) { |  405  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  406  |                                                   PyBytes_AS_STRING(rep),  |  407  |                                                   PyBytes_GET_SIZE(rep));  |  408  |                 }  |  409  |                 else { |  410  |                     /* rep is unicode */  |  411  |                     if (PyUnicode_READY(rep) < 0)   Branch (411:25): [True: 0, False: 0]
  |  412  |                         goto error;  |  413  |  |  414  |                     if (!PyUnicode_IS_ASCII(rep)) {  Branch (414:25): [True: 0, False: 0]
  |  415  |                         raise_encode_exception(&exc, "utf-8", unicode,  |  416  |                                                startpos, endpos,  |  417  |                                                "surrogates not allowed");  |  418  |                         goto error;  |  419  |                     }  |  420  |  |  421  |                     p = _PyBytesWriter_WriteBytes(writer, p,  |  422  |                                                   PyUnicode_DATA(rep),  |  423  |                                                   PyUnicode_GET_LENGTH(rep));  |  424  |                 }  |  425  |  |  426  |                 if (p == NULL)   Branch (426:21): [True: 0, False: 0]
  |  427  |                     goto error;  |  428  |                 Py_CLEAR(rep);  |  429  | 
  |  430  |                 i = newpos;  |  431  |             }  |  432  |  |  433  |             /* If overallocation was disabled, ensure that it was the last  |  434  |                write. Otherwise, we missed an optimization */  |  435  |             assert(writer->overallocate || i == size);  |  436  |         }  |  437  |         else  |  438  | #if STRINGLIB_SIZEOF_CHAR > 2  |  439  |         if (ch < 0x10000)   Branch (439:13): [True: 75.0k, False: 1.05M]
  |  440  | #endif  |  441  |         { |  442  |             *p++ = (char)(0xe0 | (ch >> 12));  |  443  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  444  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  445  |         }  |  446  | #if STRINGLIB_SIZEOF_CHAR > 2  |  447  |         else /* ch >= 0x10000 */  |  448  |         { |  449  |             assert(ch <= MAX_UNICODE);  |  450  |             /* Encode UCS4 Unicode ordinals */  |  451  |             *p++ = (char)(0xf0 | (ch >> 18));  |  452  |             *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));  |  453  |             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  |  454  |             *p++ = (char)(0x80 | (ch & 0x3f));  |  455  |         }  |  456  | #endif /* STRINGLIB_SIZEOF_CHAR > 2 */  |  457  | #endif /* STRINGLIB_SIZEOF_CHAR > 1 */  |  458  |     }  |  459  |  |  460  | #if STRINGLIB_SIZEOF_CHAR > 1  |  461  |     Py_XDECREF(error_handler_obj);  |  462  |     Py_XDECREF(exc);  |  463  | #endif  |  464  |     return p;  |  465  |  |  466  | #if STRINGLIB_SIZEOF_CHAR > 1  |  467  |  error:  |  468  |     Py_XDECREF(rep);  |  469  |     Py_XDECREF(error_handler_obj);  |  470  |     Py_XDECREF(exc);  |  471  |     return NULL;  |  472  | #endif  |  473  | }  |  
 Unexecuted instantiation: unicodeobject.c:asciilib_utf8_encoder  | 
474  |  | 
475  | /* The pattern for constructing UCS2-repeated masks. */  | 
476  | #if SIZEOF_LONG == 8  | 
477  | # define UCS2_REPEAT_MASK 0x0001000100010001ul  | 
478  | #elif SIZEOF_LONG == 4  | 
479  | # define UCS2_REPEAT_MASK 0x00010001ul  | 
480  | #else  | 
481  | # error C 'long' size should be either 4 or 8!  | 
482  | #endif  | 
483  |  | 
484  | /* The mask for fast checking. */  | 
485  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
486  | /* The mask for fast checking of whether a C 'long' contains a  | 
487  |    non-ASCII or non-Latin1 UTF16-encoded characters. */  | 
488  | # define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))  | 
489  | #else  | 
490  | /* The mask for fast checking of whether a C 'long' may contain  | 
491  |    UTF16-encoded surrogate characters. This is an efficient heuristic,  | 
492  |    assuming that non-surrogate characters with a code point >= 0x8000 are  | 
493  |    rare in most input.  | 
494  | */  | 
495  | # define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)  | 
496  | #endif  | 
497  | /* The mask for fast byte-swapping. */  | 
498  | #define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)  | 
499  | /* Swap bytes. */  | 
500  | #define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \  | 
501  |                                  (((value) & STRIPPED_MASK) << 8))  | 
502  |  | 
503  | Py_LOCAL_INLINE(Py_UCS4)  | 
504  | STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,  | 
505  |                         STRINGLIB_CHAR *dest, Py_ssize_t *outpos,  | 
506  |                         int native_ordering)  | 
507  | { | 
508  |     Py_UCS4 ch;  | 
509  |     const unsigned char *q = *inptr;  | 
510  |     STRINGLIB_CHAR *p = dest + *outpos;  | 
511  |     /* Offsets from q for retrieving byte pairs in the right order. */  | 
512  | #if PY_LITTLE_ENDIAN  | 
513  |     int ihi = !!native_ordering, ilo = !native_ordering;  | 
514  | #else  | 
515  |     int ihi = !native_ordering, ilo = !!native_ordering;  | 
516  | #endif  | 
517  |     --e;  | 
518  |  | 
519  |     while (q < e) {  Branch (519:12): [True: 100k, False: 24.2k]
   Branch (519:12): [True: 67, False: 10]
   Branch (519:12): [True: 20.6k, False: 4.07k]
   Branch (519:12): [True: 85, False: 23]
  | 
520  |         Py_UCS4 ch2;  | 
521  |         /* First check for possible aligned read of a C 'long'. Unaligned  | 
522  |            reads are more expensive, better to defer to another iteration. */  | 
523  |         if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) { | 
524  |             /* Fast path for runs of in-range non-surrogate chars. */  | 
525  |             const unsigned char *_q = q;  | 
526  |             while (_q + SIZEOF_LONG <= e) {  Branch (526:20): [True: 191k, False: 26.4k]
   Branch (526:20): [True: 9, False: 10]
   Branch (526:20): [True: 16.5k, False: 3.83k]
   Branch (526:20): [True: 18, False: 14]
  | 
527  |                 unsigned long block = * (const unsigned long *) _q;  | 
528  |                 if (native_ordering) {  Branch (528:21): [True: 129k, False: 62.3k]
   Branch (528:21): [True: 2, False: 7]
   Branch (528:21): [True: 10.9k, False: 5.66k]
   Branch (528:21): [True: 12, False: 6]
  | 
529  |                     /* Can use buffer directly */  | 
530  |                     if (block & FAST_CHAR_MASK)   Branch (530:25): [True: 2.53k, False: 126k]
   Branch (530:25): [True: 2, False: 0]
   Branch (530:25): [True: 0, False: 10.9k]
   Branch (530:25): [True: 5, False: 7]
  | 
531  |                         break;  | 
532  |                 }  | 
533  |                 else { | 
534  |                     /* Need to byte-swap */  | 
535  |                     if (block & SWAB(FAST_CHAR_MASK))   Branch (535:25): [True: 1.31k, False: 61.0k]
   Branch (535:25): [True: 2, False: 5]
   Branch (535:25): [True: 0, False: 5.66k]
   Branch (535:25): [True: 3, False: 3]
  | 
536  |                         break;  | 
537  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
538  |                     block >>= 8;  | 
539  | #else  | 
540  |                     block = SWAB(block);  | 
541  | #endif  | 
542  |                 }  | 
543  | #if PY_LITTLE_ENDIAN  | 
544  | # if SIZEOF_LONG == 4  | 
545  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  | 
546  |                 p[1] = (STRINGLIB_CHAR)(block >> 16);  | 
547  | # elif SIZEOF_LONG == 8  | 
548  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  | 
549  |                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  | 
550  |                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  | 
551  |                 p[3] = (STRINGLIB_CHAR)(block >> 48);  | 
552  | # endif  | 
553  | #else  | 
554  | # if SIZEOF_LONG == 4  | 
555  |                 p[0] = (STRINGLIB_CHAR)(block >> 16);  | 
556  |                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  | 
557  | # elif SIZEOF_LONG == 8  | 
558  |                 p[0] = (STRINGLIB_CHAR)(block >> 48);  | 
559  |                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  | 
560  |                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  | 
561  |                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  | 
562  | # endif  | 
563  | #endif  | 
564  |                 _q += SIZEOF_LONG;  | 
565  |                 p += SIZEOF_LONG / 2;  | 
566  |             }  | 
567  |             q = _q;  | 
568  |             if (q >= e)   Branch (568:17): [True: 88, False: 30.2k]
   Branch (568:17): [True: 0, False: 14]
   Branch (568:17): [True: 0, False: 3.83k]
   Branch (568:17): [True: 0, False: 22]
  | 
569  |                 break;  | 
570  |         }  | 
571  |  | 
572  |         ch = (q[ihi] << 8) | q[ilo];  | 
573  |         q += 2;  | 
574  |         if (!Py_UNICODE_IS_SURROGATE(ch)) {  Branch (574:13): [True: 99.8k, False: 179]
   Branch (574:13): [True: 65, False: 2]
   Branch (574:13): [True: 20.6k, False: 18]
   Branch (574:13): [True: 50, False: 35]
  | 
575  | #if STRINGLIB_SIZEOF_CHAR < 2  | 
576  |             if (ch > STRINGLIB_MAX_CHAR)   Branch (576:17): [True: 6.92k, False: 92.9k]
   Branch (576:17): [True: 6, False: 59]
  | 
577  |                 /* Out-of-range */  | 
578  |                 goto Return;  | 
579  | #endif  | 
580  |             *p++ = (STRINGLIB_CHAR)ch;  | 
581  |             continue;  | 
582  |         }  | 
583  |  | 
584  |         /* UTF-16 code pair: */  | 
585  |         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))   Branch (585:13): [True: 90, False: 89]
   Branch (585:13): [True: 0, False: 2]
   Branch (585:13): [True: 0, False: 18]
   Branch (585:13): [True: 15, False: 20]
  | 
586  |             goto IllegalEncoding;  | 
587  |         if (q >= e)   Branch (587:13): [True: 44, False: 45]
   Branch (587:13): [True: 0, False: 2]
   Branch (587:13): [True: 0, False: 18]
   Branch (587:13): [True: 0, False: 20]
  | 
588  |             goto UnexpectedEnd;  | 
589  |         ch2 = (q[ihi] << 8) | q[ilo];  | 
590  |         q += 2;  | 
591  |         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))   Branch (591:13): [True: 6, False: 39]
   Branch (591:13): [True: 0, False: 2]
   Branch (591:13): [True: 6, False: 12]
   Branch (591:13): [True: 0, False: 20]
  | 
592  |             goto IllegalSurrogate;  | 
593  |         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  | 
594  | #if STRINGLIB_SIZEOF_CHAR < 4  | 
595  |         /* Out-of-range */  | 
596  |         goto Return;  | 
597  | #else  | 
598  |         *p++ = (STRINGLIB_CHAR)ch;  | 
599  | #endif  | 
600  |     }  | 
601  |     ch = 0;  | 
602  | Return:  | 
603  |     *inptr = q;  | 
604  |     *outpos = p - dest;  | 
605  |     return ch;  | 
606  | UnexpectedEnd:  | 
607  |     ch = 1;  | 
608  |     goto Return;  | 
609  | IllegalEncoding:  | 
610  |     ch = 2;  | 
611  |     goto Return;  | 
612  | IllegalSurrogate:  | 
613  |     ch = 3;  | 
614  |     goto Return;  | 
615  | } unicodeobject.c:asciilib_utf16_decode Line  | Count  | Source  |  507  | { |  508  |     Py_UCS4 ch;  |  509  |     const unsigned char *q = *inptr;  |  510  |     STRINGLIB_CHAR *p = dest + *outpos;  |  511  |     /* Offsets from q for retrieving byte pairs in the right order. */  |  512  | #if PY_LITTLE_ENDIAN  |  513  |     int ihi = !!native_ordering, ilo = !native_ordering;  |  514  | #else  |  515  |     int ihi = !native_ordering, ilo = !!native_ordering;  |  516  | #endif  |  517  |     --e;  |  518  |  |  519  |     while (q < e) {  Branch (519:12): [True: 100k, False: 24.2k]
  |  520  |         Py_UCS4 ch2;  |  521  |         /* First check for possible aligned read of a C 'long'. Unaligned  |  522  |            reads are more expensive, better to defer to another iteration. */  |  523  |         if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) { |  524  |             /* Fast path for runs of in-range non-surrogate chars. */  |  525  |             const unsigned char *_q = q;  |  526  |             while (_q + SIZEOF_LONG <= e) {  Branch (526:20): [True: 191k, False: 26.4k]
  |  527  |                 unsigned long block = * (const unsigned long *) _q;  |  528  |                 if (native_ordering) {  Branch (528:21): [True: 129k, False: 62.3k]
  |  529  |                     /* Can use buffer directly */  |  530  |                     if (block & FAST_CHAR_MASK)   Branch (530:25): [True: 2.53k, False: 126k]
  |  531  |                         break;  |  532  |                 }  |  533  |                 else { |  534  |                     /* Need to byte-swap */  |  535  |                     if (block & SWAB(FAST_CHAR_MASK))   Branch (535:25): [True: 1.31k, False: 61.0k]
  |  536  |                         break;  |  537  | #if STRINGLIB_SIZEOF_CHAR == 1  |  538  |                     block >>= 8;  |  539  | #else  |  540  |                     block = SWAB(block);  |  541  | #endif  |  542  |                 }  |  543  | #if PY_LITTLE_ENDIAN  |  544  | # if SIZEOF_LONG == 4  |  545  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  546  |                 p[1] = (STRINGLIB_CHAR)(block >> 16);  |  547  | # elif SIZEOF_LONG == 8  |  548  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  549  |                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  550  |                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  551  |                 p[3] = (STRINGLIB_CHAR)(block >> 48);  |  552  | # endif  |  553  | #else  |  554  | # if SIZEOF_LONG == 4  |  555  |                 p[0] = (STRINGLIB_CHAR)(block >> 16);  |  556  |                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  557  | # elif SIZEOF_LONG == 8  |  558  |                 p[0] = (STRINGLIB_CHAR)(block >> 48);  |  559  |                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  560  |                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  561  |                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  562  | # endif  |  563  | #endif  |  564  |                 _q += SIZEOF_LONG;  |  565  |                 p += SIZEOF_LONG / 2;  |  566  |             }  |  567  |             q = _q;  |  568  |             if (q >= e)   Branch (568:17): [True: 88, False: 30.2k]
  |  569  |                 break;  |  570  |         }  |  571  |  |  572  |         ch = (q[ihi] << 8) | q[ilo];  |  573  |         q += 2;  |  574  |         if (!Py_UNICODE_IS_SURROGATE(ch)) {  Branch (574:13): [True: 99.8k, False: 179]
  |  575  | #if STRINGLIB_SIZEOF_CHAR < 2  |  576  |             if (ch > STRINGLIB_MAX_CHAR)   Branch (576:17): [True: 6.92k, False: 92.9k]
  |  577  |                 /* Out-of-range */  |  578  |                 goto Return;  |  579  | #endif  |  580  |             *p++ = (STRINGLIB_CHAR)ch;  |  581  |             continue;  |  582  |         }  |  583  |  |  584  |         /* UTF-16 code pair: */  |  585  |         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))   Branch (585:13): [True: 90, False: 89]
  |  586  |             goto IllegalEncoding;  |  587  |         if (q >= e)   Branch (587:13): [True: 44, False: 45]
  |  588  |             goto UnexpectedEnd;  |  589  |         ch2 = (q[ihi] << 8) | q[ilo];  |  590  |         q += 2;  |  591  |         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))   Branch (591:13): [True: 6, False: 39]
  |  592  |             goto IllegalSurrogate;  |  593  |         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  |  594  | #if STRINGLIB_SIZEOF_CHAR < 4  |  595  |         /* Out-of-range */  |  596  |         goto Return;  |  597  | #else  |  598  |         *p++ = (STRINGLIB_CHAR)ch;  |  599  | #endif  |  600  |     }  |  601  |     ch = 0;  |  602  | Return:  |  603  |     *inptr = q;  |  604  |     *outpos = p - dest;  |  605  |     return ch;  |  606  | UnexpectedEnd:  |  607  |     ch = 1;  |  608  |     goto Return;  |  609  | IllegalEncoding:  |  610  |     ch = 2;  |  611  |     goto Return;  |  612  | IllegalSurrogate:  |  613  |     ch = 3;  |  614  |     goto Return;  |  615  | }  |  
 unicodeobject.c:ucs1lib_utf16_decode Line  | Count  | Source  |  507  | { |  508  |     Py_UCS4 ch;  |  509  |     const unsigned char *q = *inptr;  |  510  |     STRINGLIB_CHAR *p = dest + *outpos;  |  511  |     /* Offsets from q for retrieving byte pairs in the right order. */  |  512  | #if PY_LITTLE_ENDIAN  |  513  |     int ihi = !!native_ordering, ilo = !native_ordering;  |  514  | #else  |  515  |     int ihi = !native_ordering, ilo = !!native_ordering;  |  516  | #endif  |  517  |     --e;  |  518  |  |  519  |     while (q < e) {  Branch (519:12): [True: 67, False: 10]
  |  520  |         Py_UCS4 ch2;  |  521  |         /* First check for possible aligned read of a C 'long'. Unaligned  |  522  |            reads are more expensive, better to defer to another iteration. */  |  523  |         if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) { |  524  |             /* Fast path for runs of in-range non-surrogate chars. */  |  525  |             const unsigned char *_q = q;  |  526  |             while (_q + SIZEOF_LONG <= e) {  Branch (526:20): [True: 9, False: 10]
  |  527  |                 unsigned long block = * (const unsigned long *) _q;  |  528  |                 if (native_ordering) {  Branch (528:21): [True: 2, False: 7]
  |  529  |                     /* Can use buffer directly */  |  530  |                     if (block & FAST_CHAR_MASK)   Branch (530:25): [True: 2, False: 0]
  |  531  |                         break;  |  532  |                 }  |  533  |                 else { |  534  |                     /* Need to byte-swap */  |  535  |                     if (block & SWAB(FAST_CHAR_MASK))   Branch (535:25): [True: 2, False: 5]
  |  536  |                         break;  |  537  | #if STRINGLIB_SIZEOF_CHAR == 1  |  538  |                     block >>= 8;  |  539  | #else  |  540  |                     block = SWAB(block);  |  541  | #endif  |  542  |                 }  |  543  | #if PY_LITTLE_ENDIAN  |  544  | # if SIZEOF_LONG == 4  |  545  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  546  |                 p[1] = (STRINGLIB_CHAR)(block >> 16);  |  547  | # elif SIZEOF_LONG == 8  |  548  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  549  |                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  550  |                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  551  |                 p[3] = (STRINGLIB_CHAR)(block >> 48);  |  552  | # endif  |  553  | #else  |  554  | # if SIZEOF_LONG == 4  |  555  |                 p[0] = (STRINGLIB_CHAR)(block >> 16);  |  556  |                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  557  | # elif SIZEOF_LONG == 8  |  558  |                 p[0] = (STRINGLIB_CHAR)(block >> 48);  |  559  |                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  560  |                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  561  |                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  562  | # endif  |  563  | #endif  |  564  |                 _q += SIZEOF_LONG;  |  565  |                 p += SIZEOF_LONG / 2;  |  566  |             }  |  567  |             q = _q;  |  568  |             if (q >= e)   Branch (568:17): [True: 0, False: 14]
  |  569  |                 break;  |  570  |         }  |  571  |  |  572  |         ch = (q[ihi] << 8) | q[ilo];  |  573  |         q += 2;  |  574  |         if (!Py_UNICODE_IS_SURROGATE(ch)) {  Branch (574:13): [True: 65, False: 2]
  |  575  | #if STRINGLIB_SIZEOF_CHAR < 2  |  576  |             if (ch > STRINGLIB_MAX_CHAR)   Branch (576:17): [True: 6, False: 59]
  |  577  |                 /* Out-of-range */  |  578  |                 goto Return;  |  579  | #endif  |  580  |             *p++ = (STRINGLIB_CHAR)ch;  |  581  |             continue;  |  582  |         }  |  583  |  |  584  |         /* UTF-16 code pair: */  |  585  |         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))   Branch (585:13): [True: 0, False: 2]
  |  586  |             goto IllegalEncoding;  |  587  |         if (q >= e)   Branch (587:13): [True: 0, False: 2]
  |  588  |             goto UnexpectedEnd;  |  589  |         ch2 = (q[ihi] << 8) | q[ilo];  |  590  |         q += 2;  |  591  |         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))   Branch (591:13): [True: 0, False: 2]
  |  592  |             goto IllegalSurrogate;  |  593  |         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  |  594  | #if STRINGLIB_SIZEOF_CHAR < 4  |  595  |         /* Out-of-range */  |  596  |         goto Return;  |  597  | #else  |  598  |         *p++ = (STRINGLIB_CHAR)ch;  |  599  | #endif  |  600  |     }  |  601  |     ch = 0;  |  602  | Return:  |  603  |     *inptr = q;  |  604  |     *outpos = p - dest;  |  605  |     return ch;  |  606  | UnexpectedEnd:  |  607  |     ch = 1;  |  608  |     goto Return;  |  609  | IllegalEncoding:  |  610  |     ch = 2;  |  611  |     goto Return;  |  612  | IllegalSurrogate:  |  613  |     ch = 3;  |  614  |     goto Return;  |  615  | }  |  
 unicodeobject.c:ucs2lib_utf16_decode Line  | Count  | Source  |  507  | { |  508  |     Py_UCS4 ch;  |  509  |     const unsigned char *q = *inptr;  |  510  |     STRINGLIB_CHAR *p = dest + *outpos;  |  511  |     /* Offsets from q for retrieving byte pairs in the right order. */  |  512  | #if PY_LITTLE_ENDIAN  |  513  |     int ihi = !!native_ordering, ilo = !native_ordering;  |  514  | #else  |  515  |     int ihi = !native_ordering, ilo = !!native_ordering;  |  516  | #endif  |  517  |     --e;  |  518  |  |  519  |     while (q < e) {  Branch (519:12): [True: 20.6k, False: 4.07k]
  |  520  |         Py_UCS4 ch2;  |  521  |         /* First check for possible aligned read of a C 'long'. Unaligned  |  522  |            reads are more expensive, better to defer to another iteration. */  |  523  |         if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) { |  524  |             /* Fast path for runs of in-range non-surrogate chars. */  |  525  |             const unsigned char *_q = q;  |  526  |             while (_q + SIZEOF_LONG <= e) {  Branch (526:20): [True: 16.5k, False: 3.83k]
  |  527  |                 unsigned long block = * (const unsigned long *) _q;  |  528  |                 if (native_ordering) {  Branch (528:21): [True: 10.9k, False: 5.66k]
  |  529  |                     /* Can use buffer directly */  |  530  |                     if (block & FAST_CHAR_MASK)   Branch (530:25): [True: 0, False: 10.9k]
  |  531  |                         break;  |  532  |                 }  |  533  |                 else { |  534  |                     /* Need to byte-swap */  |  535  |                     if (block & SWAB(FAST_CHAR_MASK))   Branch (535:25): [True: 0, False: 5.66k]
  |  536  |                         break;  |  537  | #if STRINGLIB_SIZEOF_CHAR == 1  |  538  |                     block >>= 8;  |  539  | #else  |  540  |                     block = SWAB(block);  |  541  | #endif  |  542  |                 }  |  543  | #if PY_LITTLE_ENDIAN  |  544  | # if SIZEOF_LONG == 4  |  545  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  546  |                 p[1] = (STRINGLIB_CHAR)(block >> 16);  |  547  | # elif SIZEOF_LONG == 8  |  548  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  549  |                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  550  |                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  551  |                 p[3] = (STRINGLIB_CHAR)(block >> 48);  |  552  | # endif  |  553  | #else  |  554  | # if SIZEOF_LONG == 4  |  555  |                 p[0] = (STRINGLIB_CHAR)(block >> 16);  |  556  |                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  557  | # elif SIZEOF_LONG == 8  |  558  |                 p[0] = (STRINGLIB_CHAR)(block >> 48);  |  559  |                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  560  |                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  561  |                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  562  | # endif  |  563  | #endif  |  564  |                 _q += SIZEOF_LONG;  |  565  |                 p += SIZEOF_LONG / 2;  |  566  |             }  |  567  |             q = _q;  |  568  |             if (q >= e)   Branch (568:17): [True: 0, False: 3.83k]
  |  569  |                 break;  |  570  |         }  |  571  |  |  572  |         ch = (q[ihi] << 8) | q[ilo];  |  573  |         q += 2;  |  574  |         if (!Py_UNICODE_IS_SURROGATE(ch)) {  Branch (574:13): [True: 20.6k, False: 18]
  |  575  | #if STRINGLIB_SIZEOF_CHAR < 2  |  576  |             if (ch > STRINGLIB_MAX_CHAR)  |  577  |                 /* Out-of-range */  |  578  |                 goto Return;  |  579  | #endif  |  580  |             *p++ = (STRINGLIB_CHAR)ch;  |  581  |             continue;  |  582  |         }  |  583  |  |  584  |         /* UTF-16 code pair: */  |  585  |         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))   Branch (585:13): [True: 0, False: 18]
  |  586  |             goto IllegalEncoding;  |  587  |         if (q >= e)   Branch (587:13): [True: 0, False: 18]
  |  588  |             goto UnexpectedEnd;  |  589  |         ch2 = (q[ihi] << 8) | q[ilo];  |  590  |         q += 2;  |  591  |         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))   Branch (591:13): [True: 6, False: 12]
  |  592  |             goto IllegalSurrogate;  |  593  |         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  |  594  | #if STRINGLIB_SIZEOF_CHAR < 4  |  595  |         /* Out-of-range */  |  596  |         goto Return;  |  597  | #else  |  598  |         *p++ = (STRINGLIB_CHAR)ch;  |  599  | #endif  |  600  |     }  |  601  |     ch = 0;  |  602  | Return:  |  603  |     *inptr = q;  |  604  |     *outpos = p - dest;  |  605  |     return ch;  |  606  | UnexpectedEnd:  |  607  |     ch = 1;  |  608  |     goto Return;  |  609  | IllegalEncoding:  |  610  |     ch = 2;  |  611  |     goto Return;  |  612  | IllegalSurrogate:  |  613  |     ch = 3;  |  614  |     goto Return;  |  615  | }  |  
 unicodeobject.c:ucs4lib_utf16_decode Line  | Count  | Source  |  507  | { |  508  |     Py_UCS4 ch;  |  509  |     const unsigned char *q = *inptr;  |  510  |     STRINGLIB_CHAR *p = dest + *outpos;  |  511  |     /* Offsets from q for retrieving byte pairs in the right order. */  |  512  | #if PY_LITTLE_ENDIAN  |  513  |     int ihi = !!native_ordering, ilo = !native_ordering;  |  514  | #else  |  515  |     int ihi = !native_ordering, ilo = !!native_ordering;  |  516  | #endif  |  517  |     --e;  |  518  |  |  519  |     while (q < e) {  Branch (519:12): [True: 85, False: 23]
  |  520  |         Py_UCS4 ch2;  |  521  |         /* First check for possible aligned read of a C 'long'. Unaligned  |  522  |            reads are more expensive, better to defer to another iteration. */  |  523  |         if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) { |  524  |             /* Fast path for runs of in-range non-surrogate chars. */  |  525  |             const unsigned char *_q = q;  |  526  |             while (_q + SIZEOF_LONG <= e) {  Branch (526:20): [True: 18, False: 14]
  |  527  |                 unsigned long block = * (const unsigned long *) _q;  |  528  |                 if (native_ordering) {  Branch (528:21): [True: 12, False: 6]
  |  529  |                     /* Can use buffer directly */  |  530  |                     if (block & FAST_CHAR_MASK)   Branch (530:25): [True: 5, False: 7]
  |  531  |                         break;  |  532  |                 }  |  533  |                 else { |  534  |                     /* Need to byte-swap */  |  535  |                     if (block & SWAB(FAST_CHAR_MASK))   Branch (535:25): [True: 3, False: 3]
  |  536  |                         break;  |  537  | #if STRINGLIB_SIZEOF_CHAR == 1  |  538  |                     block >>= 8;  |  539  | #else  |  540  |                     block = SWAB(block);  |  541  | #endif  |  542  |                 }  |  543  | #if PY_LITTLE_ENDIAN  |  544  | # if SIZEOF_LONG == 4  |  545  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  546  |                 p[1] = (STRINGLIB_CHAR)(block >> 16);  |  547  | # elif SIZEOF_LONG == 8  |  548  |                 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  549  |                 p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  550  |                 p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  551  |                 p[3] = (STRINGLIB_CHAR)(block >> 48);  |  552  | # endif  |  553  | #else  |  554  | # if SIZEOF_LONG == 4  |  555  |                 p[0] = (STRINGLIB_CHAR)(block >> 16);  |  556  |                 p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  557  | # elif SIZEOF_LONG == 8  |  558  |                 p[0] = (STRINGLIB_CHAR)(block >> 48);  |  559  |                 p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  |  560  |                 p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  |  561  |                 p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  |  562  | # endif  |  563  | #endif  |  564  |                 _q += SIZEOF_LONG;  |  565  |                 p += SIZEOF_LONG / 2;  |  566  |             }  |  567  |             q = _q;  |  568  |             if (q >= e)   Branch (568:17): [True: 0, False: 22]
  |  569  |                 break;  |  570  |         }  |  571  |  |  572  |         ch = (q[ihi] << 8) | q[ilo];  |  573  |         q += 2;  |  574  |         if (!Py_UNICODE_IS_SURROGATE(ch)) {  Branch (574:13): [True: 50, False: 35]
  |  575  | #if STRINGLIB_SIZEOF_CHAR < 2  |  576  |             if (ch > STRINGLIB_MAX_CHAR)  |  577  |                 /* Out-of-range */  |  578  |                 goto Return;  |  579  | #endif  |  580  |             *p++ = (STRINGLIB_CHAR)ch;  |  581  |             continue;  |  582  |         }  |  583  |  |  584  |         /* UTF-16 code pair: */  |  585  |         if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))   Branch (585:13): [True: 15, False: 20]
  |  586  |             goto IllegalEncoding;  |  587  |         if (q >= e)   Branch (587:13): [True: 0, False: 20]
  |  588  |             goto UnexpectedEnd;  |  589  |         ch2 = (q[ihi] << 8) | q[ilo];  |  590  |         q += 2;  |  591  |         if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))   Branch (591:13): [True: 0, False: 20]
  |  592  |             goto IllegalSurrogate;  |  593  |         ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  |  594  | #if STRINGLIB_SIZEOF_CHAR < 4  |  595  |         /* Out-of-range */  |  596  |         goto Return;  |  597  | #else  |  598  |         *p++ = (STRINGLIB_CHAR)ch;  |  599  | #endif  |  600  |     }  |  601  |     ch = 0;  |  602  | Return:  |  603  |     *inptr = q;  |  604  |     *outpos = p - dest;  |  605  |     return ch;  |  606  | UnexpectedEnd:  |  607  |     ch = 1;  |  608  |     goto Return;  |  609  | IllegalEncoding:  |  610  |     ch = 2;  |  611  |     goto Return;  |  612  | IllegalSurrogate:  |  613  |     ch = 3;  |  614  |     goto Return;  |  615  | }  |  
  | 
616  | #undef UCS2_REPEAT_MASK  | 
617  | #undef FAST_CHAR_MASK  | 
618  | #undef STRIPPED_MASK  | 
619  | #undef SWAB  | 
620  |  | 
621  |  | 
622  | #if STRINGLIB_MAX_CHAR >= 0x80  | 
623  | Py_LOCAL_INLINE(Py_ssize_t)  | 
624  | STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,  | 
625  |                         Py_ssize_t len,  | 
626  |                         unsigned short **outptr,  | 
627  |                         int native_ordering)  | 
628  | { | 
629  |     unsigned short *out = *outptr;  | 
630  |     const STRINGLIB_CHAR *end = in + len;  | 
631  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
632  |     if (native_ordering) {  Branch (632:9): [True: 2.00k, False: 902]
  | 
633  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
634  |         while (in < unrolled_end) {  Branch (634:16): [True: 122k, False: 2.00k]
  | 
635  |             out[0] = in[0];  | 
636  |             out[1] = in[1];  | 
637  |             out[2] = in[2];  | 
638  |             out[3] = in[3];  | 
639  |             in += 4; out += 4;  | 
640  |         }  | 
641  |         while (in < end) {  Branch (641:16): [True: 2.13k, False: 2.00k]
  | 
642  |             *out++ = *in++;  | 
643  |         }  | 
644  |     } else { | 
645  | # define SWAB2(CH)  ((CH) << 8) /* high byte is zero */  | 
646  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
647  |         while (in < unrolled_end) {  Branch (647:16): [True: 55.5k, False: 902]
  | 
648  |             out[0] = SWAB2(in[0]);  | 
649  |             out[1] = SWAB2(in[1]);  | 
650  |             out[2] = SWAB2(in[2]);  | 
651  |             out[3] = SWAB2(in[3]);  | 
652  |             in += 4; out += 4;  | 
653  |         }  | 
654  |         while (in < end) {  Branch (654:16): [True: 954, False: 902]
  | 
655  |             Py_UCS4 ch = *in++;  | 
656  |             *out++ = SWAB2((Py_UCS2)ch);  | 
657  |         }  | 
658  | #undef SWAB2  | 
659  |     }  | 
660  |     *outptr = out;  | 
661  |     return len;  | 
662  | #else  | 
663  |     if (native_ordering) {  Branch (663:9): [True: 2.02k, False: 979]
   Branch (663:9): [True: 40, False: 22]
  | 
664  | #if STRINGLIB_MAX_CHAR < 0x10000  | 
665  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
666  |         while (in < unrolled_end) {  Branch (666:16): [True: 40.7k, False: 2.02k]
  | 
667  |             /* check if any character is a surrogate character */  | 
668  |             if (((in[0] ^ 0xd800) &   Branch (668:17): [True: 4, False: 40.7k]
  | 
669  |                  (in[1] ^ 0xd800) &  | 
670  |                  (in[2] ^ 0xd800) &  | 
671  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  | 
672  |                 break;  | 
673  |             out[0] = in[0];  | 
674  |             out[1] = in[1];  | 
675  |             out[2] = in[2];  | 
676  |             out[3] = in[3];  | 
677  |             in += 4; out += 4;  | 
678  |         }  | 
679  | #endif  | 
680  |         while (in < end) {  Branch (680:16): [True: 2.07k, False: 1.94k]
   Branch (680:16): [True: 923, False: 36]
  | 
681  |             Py_UCS4 ch;  | 
682  |             ch = *in++;  | 
683  |             if (ch < 0xd800)   Branch (683:17): [True: 1.99k, False: 83]
   Branch (683:17): [True: 850, False: 73]
  | 
684  |                 *out++ = ch;  | 
685  |             else if (ch < 0xe000)   Branch (685:22): [True: 83, False: 0]
   Branch (685:22): [True: 4, False: 69]
  | 
686  |                 /* reject surrogate characters (U+D800-U+DFFF) */  | 
687  |                 goto fail;  | 
688  | #if STRINGLIB_MAX_CHAR >= 0x10000  | 
689  |             else if (ch >= 0x10000) {  Branch (689:22): [True: 61, False: 8]
  | 
690  |                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);  | 
691  |                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);  | 
692  |                 out += 2;  | 
693  |             }  | 
694  | #endif  | 
695  |             else  | 
696  |                 *out++ = ch;  | 
697  |         }  | 
698  |     } else { | 
699  | #define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))  | 
700  | #if STRINGLIB_MAX_CHAR < 0x10000  | 
701  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
702  |         while (in < unrolled_end) {  Branch (702:16): [True: 18.8k, False: 977]
  | 
703  |             /* check if any character is a surrogate character */  | 
704  |             if (((in[0] ^ 0xd800) &   Branch (704:17): [True: 2, False: 18.8k]
  | 
705  |                  (in[1] ^ 0xd800) &  | 
706  |                  (in[2] ^ 0xd800) &  | 
707  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  | 
708  |                 break;  | 
709  |             out[0] = SWAB2(in[0]);  | 
710  |             out[1] = SWAB2(in[1]);  | 
711  |             out[2] = SWAB2(in[2]);  | 
712  |             out[3] = SWAB2(in[3]);  | 
713  |             in += 4; out += 4;  | 
714  |         }  | 
715  | #endif  | 
716  |         while (in < end) {  Branch (716:16): [True: 996, False: 961]
   Branch (716:16): [True: 475, False: 20]
  | 
717  |             Py_UCS4 ch = *in++;  | 
718  |             if (ch < 0xd800)   Branch (718:17): [True: 978, False: 18]
   Branch (718:17): [True: 436, False: 39]
  | 
719  |                 *out++ = SWAB2((Py_UCS2)ch);  | 
720  |             else if (ch < 0xe000)   Branch (720:22): [True: 18, False: 0]
   Branch (720:22): [True: 2, False: 37]
  | 
721  |                 /* reject surrogate characters (U+D800-U+DFFF) */  | 
722  |                 goto fail;  | 
723  | #if STRINGLIB_MAX_CHAR >= 0x10000  | 
724  |             else if (ch >= 0x10000) {  Branch (724:22): [True: 33, False: 4]
  | 
725  |                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);  | 
726  |                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);  | 
727  |                 out[0] = SWAB2(ch1);  | 
728  |                 out[1] = SWAB2(ch2);  | 
729  |                 out += 2;  | 
730  |             }  | 
731  | #endif  | 
732  |             else  | 
733  |                 *out++ = SWAB2((Py_UCS2)ch);  | 
734  |         }  | 
735  | #undef SWAB2  | 
736  |     }  | 
737  |     *outptr = out;  | 
738  |     return len;  | 
739  |   fail:  | 
740  |     *outptr = out;  | 
741  |     return len - (end - in + 1);  | 
742  | #endif  | 
743  | } unicodeobject.c:ucs1lib_utf16_encode Line  | Count  | Source  |  628  | { |  629  |     unsigned short *out = *outptr;  |  630  |     const STRINGLIB_CHAR *end = in + len;  |  631  | #if STRINGLIB_SIZEOF_CHAR == 1  |  632  |     if (native_ordering) {  Branch (632:9): [True: 2.00k, False: 902]
  |  633  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  634  |         while (in < unrolled_end) {  Branch (634:16): [True: 122k, False: 2.00k]
  |  635  |             out[0] = in[0];  |  636  |             out[1] = in[1];  |  637  |             out[2] = in[2];  |  638  |             out[3] = in[3];  |  639  |             in += 4; out += 4;  |  640  |         }  |  641  |         while (in < end) {  Branch (641:16): [True: 2.13k, False: 2.00k]
  |  642  |             *out++ = *in++;  |  643  |         }  |  644  |     } else { |  645  | # define SWAB2(CH)  ((CH) << 8) /* high byte is zero */  |  646  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  647  |         while (in < unrolled_end) {  Branch (647:16): [True: 55.5k, False: 902]
  |  648  |             out[0] = SWAB2(in[0]);  |  649  |             out[1] = SWAB2(in[1]);  |  650  |             out[2] = SWAB2(in[2]);  |  651  |             out[3] = SWAB2(in[3]);  |  652  |             in += 4; out += 4;  |  653  |         }  |  654  |         while (in < end) {  Branch (654:16): [True: 954, False: 902]
  |  655  |             Py_UCS4 ch = *in++;  |  656  |             *out++ = SWAB2((Py_UCS2)ch);  |  657  |         }  |  658  | #undef SWAB2  |  659  |     }  |  660  |     *outptr = out;  |  661  |     return len;  |  662  | #else  |  663  |     if (native_ordering) { |  664  | #if STRINGLIB_MAX_CHAR < 0x10000  |  665  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  666  |         while (in < unrolled_end) { |  667  |             /* check if any character is a surrogate character */  |  668  |             if (((in[0] ^ 0xd800) &  |  669  |                  (in[1] ^ 0xd800) &  |  670  |                  (in[2] ^ 0xd800) &  |  671  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  672  |                 break;  |  673  |             out[0] = in[0];  |  674  |             out[1] = in[1];  |  675  |             out[2] = in[2];  |  676  |             out[3] = in[3];  |  677  |             in += 4; out += 4;  |  678  |         }  |  679  | #endif  |  680  |         while (in < end) { |  681  |             Py_UCS4 ch;  |  682  |             ch = *in++;  |  683  |             if (ch < 0xd800)  |  684  |                 *out++ = ch;  |  685  |             else if (ch < 0xe000)  |  686  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  687  |                 goto fail;  |  688  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  689  |             else if (ch >= 0x10000) { |  690  |                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);  |  691  |                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);  |  692  |                 out += 2;  |  693  |             }  |  694  | #endif  |  695  |             else  |  696  |                 *out++ = ch;  |  697  |         }  |  698  |     } else { |  699  | #define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))  |  700  | #if STRINGLIB_MAX_CHAR < 0x10000  |  701  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  702  |         while (in < unrolled_end) { |  703  |             /* check if any character is a surrogate character */  |  704  |             if (((in[0] ^ 0xd800) &  |  705  |                  (in[1] ^ 0xd800) &  |  706  |                  (in[2] ^ 0xd800) &  |  707  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  708  |                 break;  |  709  |             out[0] = SWAB2(in[0]);  |  710  |             out[1] = SWAB2(in[1]);  |  711  |             out[2] = SWAB2(in[2]);  |  712  |             out[3] = SWAB2(in[3]);  |  713  |             in += 4; out += 4;  |  714  |         }  |  715  | #endif  |  716  |         while (in < end) { |  717  |             Py_UCS4 ch = *in++;  |  718  |             if (ch < 0xd800)  |  719  |                 *out++ = SWAB2((Py_UCS2)ch);  |  720  |             else if (ch < 0xe000)  |  721  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  722  |                 goto fail;  |  723  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  724  |             else if (ch >= 0x10000) { |  725  |                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);  |  726  |                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);  |  727  |                 out[0] = SWAB2(ch1);  |  728  |                 out[1] = SWAB2(ch2);  |  729  |                 out += 2;  |  730  |             }  |  731  | #endif  |  732  |             else  |  733  |                 *out++ = SWAB2((Py_UCS2)ch);  |  734  |         }  |  735  | #undef SWAB2  |  736  |     }  |  737  |     *outptr = out;  |  738  |     return len;  |  739  |   fail:  |  740  |     *outptr = out;  |  741  |     return len - (end - in + 1);  |  742  | #endif  |  743  | }  |  
 unicodeobject.c:ucs2lib_utf16_encode Line  | Count  | Source  |  628  | { |  629  |     unsigned short *out = *outptr;  |  630  |     const STRINGLIB_CHAR *end = in + len;  |  631  | #if STRINGLIB_SIZEOF_CHAR == 1  |  632  |     if (native_ordering) { |  633  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  634  |         while (in < unrolled_end) { |  635  |             out[0] = in[0];  |  636  |             out[1] = in[1];  |  637  |             out[2] = in[2];  |  638  |             out[3] = in[3];  |  639  |             in += 4; out += 4;  |  640  |         }  |  641  |         while (in < end) { |  642  |             *out++ = *in++;  |  643  |         }  |  644  |     } else { |  645  | # define SWAB2(CH)  ((CH) << 8) /* high byte is zero */  |  646  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  647  |         while (in < unrolled_end) { |  648  |             out[0] = SWAB2(in[0]);  |  649  |             out[1] = SWAB2(in[1]);  |  650  |             out[2] = SWAB2(in[2]);  |  651  |             out[3] = SWAB2(in[3]);  |  652  |             in += 4; out += 4;  |  653  |         }  |  654  |         while (in < end) { |  655  |             Py_UCS4 ch = *in++;  |  656  |             *out++ = SWAB2((Py_UCS2)ch);  |  657  |         }  |  658  | #undef SWAB2  |  659  |     }  |  660  |     *outptr = out;  |  661  |     return len;  |  662  | #else  |  663  |     if (native_ordering) {  Branch (663:9): [True: 2.02k, False: 979]
  |  664  | #if STRINGLIB_MAX_CHAR < 0x10000  |  665  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  666  |         while (in < unrolled_end) {  Branch (666:16): [True: 40.7k, False: 2.02k]
  |  667  |             /* check if any character is a surrogate character */  |  668  |             if (((in[0] ^ 0xd800) &   Branch (668:17): [True: 4, False: 40.7k]
  |  669  |                  (in[1] ^ 0xd800) &  |  670  |                  (in[2] ^ 0xd800) &  |  671  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  672  |                 break;  |  673  |             out[0] = in[0];  |  674  |             out[1] = in[1];  |  675  |             out[2] = in[2];  |  676  |             out[3] = in[3];  |  677  |             in += 4; out += 4;  |  678  |         }  |  679  | #endif  |  680  |         while (in < end) {  Branch (680:16): [True: 2.07k, False: 1.94k]
  |  681  |             Py_UCS4 ch;  |  682  |             ch = *in++;  |  683  |             if (ch < 0xd800)   Branch (683:17): [True: 1.99k, False: 83]
  |  684  |                 *out++ = ch;  |  685  |             else if (ch < 0xe000)   Branch (685:22): [True: 83, False: 0]
  |  686  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  687  |                 goto fail;  |  688  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  689  |             else if (ch >= 0x10000) { |  690  |                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);  |  691  |                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);  |  692  |                 out += 2;  |  693  |             }  |  694  | #endif  |  695  |             else  |  696  |                 *out++ = ch;  |  697  |         }  |  698  |     } else { |  699  | #define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))  |  700  | #if STRINGLIB_MAX_CHAR < 0x10000  |  701  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  702  |         while (in < unrolled_end) {  Branch (702:16): [True: 18.8k, False: 977]
  |  703  |             /* check if any character is a surrogate character */  |  704  |             if (((in[0] ^ 0xd800) &   Branch (704:17): [True: 2, False: 18.8k]
  |  705  |                  (in[1] ^ 0xd800) &  |  706  |                  (in[2] ^ 0xd800) &  |  707  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  708  |                 break;  |  709  |             out[0] = SWAB2(in[0]);  |  710  |             out[1] = SWAB2(in[1]);  |  711  |             out[2] = SWAB2(in[2]);  |  712  |             out[3] = SWAB2(in[3]);  |  713  |             in += 4; out += 4;  |  714  |         }  |  715  | #endif  |  716  |         while (in < end) {  Branch (716:16): [True: 996, False: 961]
  |  717  |             Py_UCS4 ch = *in++;  |  718  |             if (ch < 0xd800)   Branch (718:17): [True: 978, False: 18]
  |  719  |                 *out++ = SWAB2((Py_UCS2)ch);  |  720  |             else if (ch < 0xe000)   Branch (720:22): [True: 18, False: 0]
  |  721  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  722  |                 goto fail;  |  723  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  724  |             else if (ch >= 0x10000) { |  725  |                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);  |  726  |                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);  |  727  |                 out[0] = SWAB2(ch1);  |  728  |                 out[1] = SWAB2(ch2);  |  729  |                 out += 2;  |  730  |             }  |  731  | #endif  |  732  |             else  |  733  |                 *out++ = SWAB2((Py_UCS2)ch);  |  734  |         }  |  735  | #undef SWAB2  |  736  |     }  |  737  |     *outptr = out;  |  738  |     return len;  |  739  |   fail:  |  740  |     *outptr = out;  |  741  |     return len - (end - in + 1);  |  742  | #endif  |  743  | }  |  
 unicodeobject.c:ucs4lib_utf16_encode Line  | Count  | Source  |  628  | { |  629  |     unsigned short *out = *outptr;  |  630  |     const STRINGLIB_CHAR *end = in + len;  |  631  | #if STRINGLIB_SIZEOF_CHAR == 1  |  632  |     if (native_ordering) { |  633  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  634  |         while (in < unrolled_end) { |  635  |             out[0] = in[0];  |  636  |             out[1] = in[1];  |  637  |             out[2] = in[2];  |  638  |             out[3] = in[3];  |  639  |             in += 4; out += 4;  |  640  |         }  |  641  |         while (in < end) { |  642  |             *out++ = *in++;  |  643  |         }  |  644  |     } else { |  645  | # define SWAB2(CH)  ((CH) << 8) /* high byte is zero */  |  646  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  647  |         while (in < unrolled_end) { |  648  |             out[0] = SWAB2(in[0]);  |  649  |             out[1] = SWAB2(in[1]);  |  650  |             out[2] = SWAB2(in[2]);  |  651  |             out[3] = SWAB2(in[3]);  |  652  |             in += 4; out += 4;  |  653  |         }  |  654  |         while (in < end) { |  655  |             Py_UCS4 ch = *in++;  |  656  |             *out++ = SWAB2((Py_UCS2)ch);  |  657  |         }  |  658  | #undef SWAB2  |  659  |     }  |  660  |     *outptr = out;  |  661  |     return len;  |  662  | #else  |  663  |     if (native_ordering) {  Branch (663:9): [True: 40, False: 22]
  |  664  | #if STRINGLIB_MAX_CHAR < 0x10000  |  665  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  666  |         while (in < unrolled_end) { |  667  |             /* check if any character is a surrogate character */  |  668  |             if (((in[0] ^ 0xd800) &  |  669  |                  (in[1] ^ 0xd800) &  |  670  |                  (in[2] ^ 0xd800) &  |  671  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  672  |                 break;  |  673  |             out[0] = in[0];  |  674  |             out[1] = in[1];  |  675  |             out[2] = in[2];  |  676  |             out[3] = in[3];  |  677  |             in += 4; out += 4;  |  678  |         }  |  679  | #endif  |  680  |         while (in < end) {  Branch (680:16): [True: 923, False: 36]
  |  681  |             Py_UCS4 ch;  |  682  |             ch = *in++;  |  683  |             if (ch < 0xd800)   Branch (683:17): [True: 850, False: 73]
  |  684  |                 *out++ = ch;  |  685  |             else if (ch < 0xe000)   Branch (685:22): [True: 4, False: 69]
  |  686  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  687  |                 goto fail;  |  688  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  689  |             else if (ch >= 0x10000) {  Branch (689:22): [True: 61, False: 8]
  |  690  |                 out[0] = Py_UNICODE_HIGH_SURROGATE(ch);  |  691  |                 out[1] = Py_UNICODE_LOW_SURROGATE(ch);  |  692  |                 out += 2;  |  693  |             }  |  694  | #endif  |  695  |             else  |  696  |                 *out++ = ch;  |  697  |         }  |  698  |     } else { |  699  | #define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))  |  700  | #if STRINGLIB_MAX_CHAR < 0x10000  |  701  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  702  |         while (in < unrolled_end) { |  703  |             /* check if any character is a surrogate character */  |  704  |             if (((in[0] ^ 0xd800) &  |  705  |                  (in[1] ^ 0xd800) &  |  706  |                  (in[2] ^ 0xd800) &  |  707  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  708  |                 break;  |  709  |             out[0] = SWAB2(in[0]);  |  710  |             out[1] = SWAB2(in[1]);  |  711  |             out[2] = SWAB2(in[2]);  |  712  |             out[3] = SWAB2(in[3]);  |  713  |             in += 4; out += 4;  |  714  |         }  |  715  | #endif  |  716  |         while (in < end) {  Branch (716:16): [True: 475, False: 20]
  |  717  |             Py_UCS4 ch = *in++;  |  718  |             if (ch < 0xd800)   Branch (718:17): [True: 436, False: 39]
  |  719  |                 *out++ = SWAB2((Py_UCS2)ch);  |  720  |             else if (ch < 0xe000)   Branch (720:22): [True: 2, False: 37]
  |  721  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  722  |                 goto fail;  |  723  | #if STRINGLIB_MAX_CHAR >= 0x10000  |  724  |             else if (ch >= 0x10000) {  Branch (724:22): [True: 33, False: 4]
  |  725  |                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);  |  726  |                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);  |  727  |                 out[0] = SWAB2(ch1);  |  728  |                 out[1] = SWAB2(ch2);  |  729  |                 out += 2;  |  730  |             }  |  731  | #endif  |  732  |             else  |  733  |                 *out++ = SWAB2((Py_UCS2)ch);  |  734  |         }  |  735  | #undef SWAB2  |  736  |     }  |  737  |     *outptr = out;  |  738  |     return len;  |  739  |   fail:  |  740  |     *outptr = out;  |  741  |     return len - (end - in + 1);  |  742  | #endif  |  743  | }  |  
  | 
744  |  | 
745  | static inline uint32_t  | 
746  | STRINGLIB(SWAB4)(STRINGLIB_CHAR ch)  | 
747  | { | 
748  |     uint32_t word = ch;  | 
749  | #if STRINGLIB_SIZEOF_CHAR == 1  | 
750  |     /* high bytes are zero */  | 
751  |     return (word << 24);  | 
752  | #elif STRINGLIB_SIZEOF_CHAR == 2  | 
753  |     /* high bytes are zero */  | 
754  |     return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);  | 
755  | #else  | 
756  |     return _Py_bswap32(word);  | 
757  | #endif  | 
758  | } unicodeobject.c:ucs1lib_SWAB4 Line  | Count  | Source  |  747  | { |  748  |     uint32_t word = ch;  |  749  | #if STRINGLIB_SIZEOF_CHAR == 1  |  750  |     /* high bytes are zero */  |  751  |     return (word << 24);  |  752  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  753  |     /* high bytes are zero */  |  754  |     return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);  |  755  | #else  |  756  |     return _Py_bswap32(word);  |  757  | #endif  |  758  | }  |  
 unicodeobject.c:ucs2lib_SWAB4 Line  | Count  | Source  |  747  | { |  748  |     uint32_t word = ch;  |  749  | #if STRINGLIB_SIZEOF_CHAR == 1  |  750  |     /* high bytes are zero */  |  751  |     return (word << 24);  |  752  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  753  |     /* high bytes are zero */  |  754  |     return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);  |  755  | #else  |  756  |     return _Py_bswap32(word);  |  757  | #endif  |  758  | }  |  
 unicodeobject.c:ucs4lib_SWAB4 Line  | Count  | Source  |  747  | { |  748  |     uint32_t word = ch;  |  749  | #if STRINGLIB_SIZEOF_CHAR == 1  |  750  |     /* high bytes are zero */  |  751  |     return (word << 24);  |  752  | #elif STRINGLIB_SIZEOF_CHAR == 2  |  753  |     /* high bytes are zero */  |  754  |     return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);  |  755  | #else  |  756  |     return _Py_bswap32(word);  |  757  | #endif  |  758  | }  |  
  | 
759  |  | 
760  | Py_LOCAL_INLINE(Py_ssize_t)  | 
761  | STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,  | 
762  |                         Py_ssize_t len,  | 
763  |                         uint32_t **outptr,  | 
764  |                         int native_ordering)  | 
765  | { | 
766  |     uint32_t *out = *outptr;  | 
767  |     const STRINGLIB_CHAR *end = in + len;  | 
768  |     if (native_ordering) {  Branch (768:9): [True: 1.31k, False: 577]
   Branch (768:9): [True: 480, False: 210]
   Branch (768:9): [True: 22, False: 12]
  | 
769  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
770  |         while (in < unrolled_end) {  Branch (770:16): [True: 119k, False: 1.31k]
   Branch (770:16): [True: 40.7k, False: 476]
   Branch (770:16): [True: 12, False: 14]
  | 
771  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
772  |             /* check if any character is a surrogate character */  | 
773  |             if (((in[0] ^ 0xd800) &   Branch (773:17): [True: 4, False: 40.7k]
   Branch (773:17): [True: 8, False: 4]
  | 
774  |                  (in[1] ^ 0xd800) &  | 
775  |                  (in[2] ^ 0xd800) &  | 
776  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  | 
777  |                 break;  | 
778  | #endif  | 
779  |             out[0] = in[0];  | 
780  |             out[1] = in[1];  | 
781  |             out[2] = in[2];  | 
782  |             out[3] = in[3];  | 
783  |             in += 4; out += 4;  | 
784  |         }  | 
785  |         while (in < end) {  Branch (785:16): [True: 1.40k, False: 1.31k]
   Branch (785:16): [True: 511, False: 395]
   Branch (785:16): [True: 57, False: 18]
  | 
786  |             Py_UCS4 ch;  | 
787  |             ch = *in++;  | 
788  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
789  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (789:17): [True: 85, False: 426]
   Branch (789:17): [True: 4, False: 53]
  | 
790  |                 /* reject surrogate characters (U+D800-U+DFFF) */  | 
791  |                 goto fail;  | 
792  |             }  | 
793  | #endif  | 
794  |             *out++ = ch;  | 
795  |         }  | 
796  |     } else { | 
797  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  | 
798  |         while (in < unrolled_end) {  Branch (798:16): [True: 54.6k, False: 577]
   Branch (798:16): [True: 18.8k, False: 208]
   Branch (798:16): [True: 8, False: 8]
  | 
799  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
800  |             /* check if any character is a surrogate character */  | 
801  |             if (((in[0] ^ 0xd800) &   Branch (801:17): [True: 2, False: 18.8k]
   Branch (801:17): [True: 4, False: 4]
  | 
802  |                  (in[1] ^ 0xd800) &  | 
803  |                  (in[2] ^ 0xd800) &  | 
804  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  | 
805  |                 break;  | 
806  | #endif  | 
807  |             out[0] = 18.8k STRINGLIB(SWAB4)(in[0]);  | 
808  |             out[1] = STRINGLIB(SWAB4)(in[1]);  | 
809  |             out[2] = STRINGLIB(SWAB4)(in[2]);  | 
810  |             out[3] = STRINGLIB(SWAB4)(in[3]);  | 
811  |             in += 4; out += 4;  | 
812  |         }  | 
813  |         while (in < end) {  Branch (813:16): [True: 603, False: 577]
   Branch (813:16): [True: 229, False: 192]
   Branch (813:16): [True: 29, False: 10]
  | 
814  |             Py_UCS4 ch = *in++;  | 
815  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
816  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (816:17): [True: 18, False: 211]
   Branch (816:17): [True: 2, False: 27]
  | 
817  |                 /* reject surrogate characters (U+D800-U+DFFF) */  | 
818  |                 goto fail;  | 
819  |             }  | 
820  | #endif  | 
821  |             *out++ = 238 STRINGLIB(SWAB4)(ch);  | 
822  |         }  | 
823  |     }  | 
824  |     *outptr = out;  | 
825  |     return len;  | 
826  | #if STRINGLIB_SIZEOF_CHAR > 1  | 
827  |   fail:  | 
828  |     *outptr = out;  | 
829  |     return len - (end - in + 1);  | 
830  | #endif  | 
831  | } unicodeobject.c:ucs1lib_utf32_encode Line  | Count  | Source  |  765  | { |  766  |     uint32_t *out = *outptr;  |  767  |     const STRINGLIB_CHAR *end = in + len;  |  768  |     if (native_ordering) {  Branch (768:9): [True: 1.31k, False: 577]
  |  769  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  770  |         while (in < unrolled_end) {  Branch (770:16): [True: 119k, False: 1.31k]
  |  771  | #if STRINGLIB_SIZEOF_CHAR > 1  |  772  |             /* check if any character is a surrogate character */  |  773  |             if (((in[0] ^ 0xd800) &  |  774  |                  (in[1] ^ 0xd800) &  |  775  |                  (in[2] ^ 0xd800) &  |  776  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  777  |                 break;  |  778  | #endif  |  779  |             out[0] = in[0];  |  780  |             out[1] = in[1];  |  781  |             out[2] = in[2];  |  782  |             out[3] = in[3];  |  783  |             in += 4; out += 4;  |  784  |         }  |  785  |         while (in < end) {  Branch (785:16): [True: 1.40k, False: 1.31k]
  |  786  |             Py_UCS4 ch;  |  787  |             ch = *in++;  |  788  | #if STRINGLIB_SIZEOF_CHAR > 1  |  789  |             if (Py_UNICODE_IS_SURROGATE(ch)) { |  790  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  791  |                 goto fail;  |  792  |             }  |  793  | #endif  |  794  |             *out++ = ch;  |  795  |         }  |  796  |     } else { |  797  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  798  |         while (in < unrolled_end) {  Branch (798:16): [True: 54.6k, False: 577]
  |  799  | #if STRINGLIB_SIZEOF_CHAR > 1  |  800  |             /* check if any character is a surrogate character */  |  801  |             if (((in[0] ^ 0xd800) &  |  802  |                  (in[1] ^ 0xd800) &  |  803  |                  (in[2] ^ 0xd800) &  |  804  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  805  |                 break;  |  806  | #endif  |  807  |             out[0] = STRINGLIB(SWAB4)(in[0]);  |  808  |             out[1] = STRINGLIB(SWAB4)(in[1]);  |  809  |             out[2] = STRINGLIB(SWAB4)(in[2]);  |  810  |             out[3] = STRINGLIB(SWAB4)(in[3]);  |  811  |             in += 4; out += 4;  |  812  |         }  |  813  |         while (in < end) {  Branch (813:16): [True: 603, False: 577]
  |  814  |             Py_UCS4 ch = *in++;  |  815  | #if STRINGLIB_SIZEOF_CHAR > 1  |  816  |             if (Py_UNICODE_IS_SURROGATE(ch)) { |  817  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  818  |                 goto fail;  |  819  |             }  |  820  | #endif  |  821  |             *out++ = STRINGLIB(SWAB4)(ch);  |  822  |         }  |  823  |     }  |  824  |     *outptr = out;  |  825  |     return len;  |  826  | #if STRINGLIB_SIZEOF_CHAR > 1  |  827  |   fail:  |  828  |     *outptr = out;  |  829  |     return len - (end - in + 1);  |  830  | #endif  |  831  | }  |  
 unicodeobject.c:ucs2lib_utf32_encode Line  | Count  | Source  |  765  | { |  766  |     uint32_t *out = *outptr;  |  767  |     const STRINGLIB_CHAR *end = in + len;  |  768  |     if (native_ordering) {  Branch (768:9): [True: 480, False: 210]
  |  769  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  770  |         while (in < unrolled_end) {  Branch (770:16): [True: 40.7k, False: 476]
  |  771  | #if STRINGLIB_SIZEOF_CHAR > 1  |  772  |             /* check if any character is a surrogate character */  |  773  |             if (((in[0] ^ 0xd800) &   Branch (773:17): [True: 4, False: 40.7k]
  |  774  |                  (in[1] ^ 0xd800) &  |  775  |                  (in[2] ^ 0xd800) &  |  776  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  777  |                 break;  |  778  | #endif  |  779  |             out[0] = in[0];  |  780  |             out[1] = in[1];  |  781  |             out[2] = in[2];  |  782  |             out[3] = in[3];  |  783  |             in += 4; out += 4;  |  784  |         }  |  785  |         while (in < end) {  Branch (785:16): [True: 511, False: 395]
  |  786  |             Py_UCS4 ch;  |  787  |             ch = *in++;  |  788  | #if STRINGLIB_SIZEOF_CHAR > 1  |  789  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (789:17): [True: 85, False: 426]
  |  790  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  791  |                 goto fail;  |  792  |             }  |  793  | #endif  |  794  |             *out++ = ch;  |  795  |         }  |  796  |     } else { |  797  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  798  |         while (in < unrolled_end) {  Branch (798:16): [True: 18.8k, False: 208]
  |  799  | #if STRINGLIB_SIZEOF_CHAR > 1  |  800  |             /* check if any character is a surrogate character */  |  801  |             if (((in[0] ^ 0xd800) &   Branch (801:17): [True: 2, False: 18.8k]
  |  802  |                  (in[1] ^ 0xd800) &  |  803  |                  (in[2] ^ 0xd800) &  |  804  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  805  |                 break;  |  806  | #endif  |  807  |             out[0] = STRINGLIB(SWAB4)(in[0]);  |  808  |             out[1] = STRINGLIB(SWAB4)(in[1]);  |  809  |             out[2] = STRINGLIB(SWAB4)(in[2]);  |  810  |             out[3] = STRINGLIB(SWAB4)(in[3]);  |  811  |             in += 4; out += 4;  |  812  |         }  |  813  |         while (in < end) {  Branch (813:16): [True: 229, False: 192]
  |  814  |             Py_UCS4 ch = *in++;  |  815  | #if STRINGLIB_SIZEOF_CHAR > 1  |  816  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (816:17): [True: 18, False: 211]
  |  817  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  818  |                 goto fail;  |  819  |             }  |  820  | #endif  |  821  |             *out++ = STRINGLIB(SWAB4)(ch);  |  822  |         }  |  823  |     }  |  824  |     *outptr = out;  |  825  |     return len;  |  826  | #if STRINGLIB_SIZEOF_CHAR > 1  |  827  |   fail:  |  828  |     *outptr = out;  |  829  |     return len - (end - in + 1);  |  830  | #endif  |  831  | }  |  
 unicodeobject.c:ucs4lib_utf32_encode Line  | Count  | Source  |  765  | { |  766  |     uint32_t *out = *outptr;  |  767  |     const STRINGLIB_CHAR *end = in + len;  |  768  |     if (native_ordering) {  Branch (768:9): [True: 22, False: 12]
  |  769  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  770  |         while (in < unrolled_end) {  Branch (770:16): [True: 12, False: 14]
  |  771  | #if STRINGLIB_SIZEOF_CHAR > 1  |  772  |             /* check if any character is a surrogate character */  |  773  |             if (((in[0] ^ 0xd800) &   Branch (773:17): [True: 8, False: 4]
  |  774  |                  (in[1] ^ 0xd800) &  |  775  |                  (in[2] ^ 0xd800) &  |  776  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  777  |                 break;  |  778  | #endif  |  779  |             out[0] = in[0];  |  780  |             out[1] = in[1];  |  781  |             out[2] = in[2];  |  782  |             out[3] = in[3];  |  783  |             in += 4; out += 4;  |  784  |         }  |  785  |         while (in < end) {  Branch (785:16): [True: 57, False: 18]
  |  786  |             Py_UCS4 ch;  |  787  |             ch = *in++;  |  788  | #if STRINGLIB_SIZEOF_CHAR > 1  |  789  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (789:17): [True: 4, False: 53]
  |  790  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  791  |                 goto fail;  |  792  |             }  |  793  | #endif  |  794  |             *out++ = ch;  |  795  |         }  |  796  |     } else { |  797  |         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  |  798  |         while (in < unrolled_end) {  Branch (798:16): [True: 8, False: 8]
  |  799  | #if STRINGLIB_SIZEOF_CHAR > 1  |  800  |             /* check if any character is a surrogate character */  |  801  |             if (((in[0] ^ 0xd800) &   Branch (801:17): [True: 4, False: 4]
  |  802  |                  (in[1] ^ 0xd800) &  |  803  |                  (in[2] ^ 0xd800) &  |  804  |                  (in[3] ^ 0xd800) & 0xf800) == 0)  |  805  |                 break;  |  806  | #endif  |  807  |             out[0] = STRINGLIB(SWAB4)(in[0]);  |  808  |             out[1] = STRINGLIB(SWAB4)(in[1]);  |  809  |             out[2] = STRINGLIB(SWAB4)(in[2]);  |  810  |             out[3] = STRINGLIB(SWAB4)(in[3]);  |  811  |             in += 4; out += 4;  |  812  |         }  |  813  |         while (in < end) {  Branch (813:16): [True: 29, False: 10]
  |  814  |             Py_UCS4 ch = *in++;  |  815  | #if STRINGLIB_SIZEOF_CHAR > 1  |  816  |             if (Py_UNICODE_IS_SURROGATE(ch)) {  Branch (816:17): [True: 2, False: 27]
  |  817  |                 /* reject surrogate characters (U+D800-U+DFFF) */  |  818  |                 goto fail;  |  819  |             }  |  820  | #endif  |  821  |             *out++ = STRINGLIB(SWAB4)(ch);  |  822  |         }  |  823  |     }  |  824  |     *outptr = out;  |  825  |     return len;  |  826  | #if STRINGLIB_SIZEOF_CHAR > 1  |  827  |   fail:  |  828  |     *outptr = out;  |  829  |     return len - (end - in + 1);  |  830  | #endif  |  831  | }  |  
  | 
832  |  | 
833  | #endif  |