Coverage Report

Created: 2022-07-08 09:39

/home/mdboom/Work/builds/cpython/Python/fileutils.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_fileutils.h"     // fileutils definitions
3
#include "pycore_runtime.h"       // _PyRuntime
4
#include "osdefs.h"               // SEP
5
#include <locale.h>
6
#include <stdlib.h>               // mbstowcs()
7
8
#ifdef MS_WINDOWS
9
#  include <malloc.h>
10
#  include <windows.h>
11
#  include <pathcch.h>            // PathCchCombineEx
12
extern int winerror_to_errno(int);
13
#endif
14
15
#ifdef HAVE_LANGINFO_H
16
#include <langinfo.h>
17
#endif
18
19
#ifdef HAVE_SYS_IOCTL_H
20
#include <sys/ioctl.h>
21
#endif
22
23
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
24
#include <iconv.h>
25
#endif
26
27
#ifdef HAVE_FCNTL_H
28
#include <fcntl.h>
29
#endif /* HAVE_FCNTL_H */
30
31
#ifdef O_CLOEXEC
32
/* Does open() support the O_CLOEXEC flag? Possible values:
33
34
   -1: unknown
35
    0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
36
    1: open() supports O_CLOEXEC flag, close-on-exec is set
37
38
   The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
39
   and os.open(). */
40
int _Py_open_cloexec_works = -1;
41
#endif
42
43
// The value must be the same in unicodeobject.c.
44
#define MAX_UNICODE 0x10ffff
45
46
// mbstowcs() and mbrtowc() errors
47
static const size_t DECODE_ERROR = ((size_t)-1);
48
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
49
50
51
static int
52
get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
53
{
54
    switch (errors)
55
    {
56
    case _Py_ERROR_STRICT:
  Branch (56:5): [True: 20.0k, False: 271k]
57
        *surrogateescape = 0;
58
        return 0;
59
    case _Py_ERROR_SURROGATEESCAPE:
  Branch (59:5): [True: 271k, False: 20.0k]
60
        *surrogateescape = 1;
61
        return 0;
62
    default:
  Branch (62:5): [True: 4, False: 291k]
63
        return -1;
64
    }
65
}
66
67
68
PyObject *
69
_Py_device_encoding(int fd)
70
{
71
    int valid;
72
    Py_BEGIN_ALLOW_THREADS
73
    _Py_BEGIN_SUPPRESS_IPH
74
    valid = isatty(fd);
75
    _Py_END_SUPPRESS_IPH
76
    Py_END_ALLOW_THREADS
77
    if (!valid)
  Branch (77:9): [True: 1, False: 1]
78
        Py_RETURN_NONE;
79
80
#if defined(MS_WINDOWS)
81
    UINT cp;
82
    if (fd == 0)
83
        cp = GetConsoleCP();
84
    else if (fd == 1 || fd == 2)
85
        cp = GetConsoleOutputCP();
86
    else
87
        cp = 0;
88
    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
89
       has no console */
90
    if (cp == 0) {
91
        Py_RETURN_NONE;
92
    }
93
94
    return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
95
#else
96
    if (_PyRuntime.preconfig.utf8_mode) {
  Branch (96:9): [True: 0, False: 1]
97
        _Py_DECLARE_STR(utf_8, "utf-8");
98
        return Py_NewRef(&_Py_STR(utf_8));
99
    }
100
    return _Py_GetLocaleEncodingObject();
101
#endif
102
}
103
104
105
static size_t
106
is_valid_wide_char(wchar_t ch)
107
{
108
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
109
    /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
110
       for non-Unicode locales, which makes values higher than MAX_UNICODE
111
       possibly valid. */
112
    return 1;
113
#endif
114
    if (Py_UNICODE_IS_SURROGATE(ch)) {
  Branch (114:9): [True: 0, False: 5.22M]
115
        // Reject lone surrogate characters
116
        return 0;
117
    }
118
    if (ch > MAX_UNICODE) {
  Branch (118:9): [True: 0, False: 5.22M]
119
        // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
120
        // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
121
        // it creates characters outside the [U+0000; U+10ffff] range:
122
        // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
123
        return 0;
124
    }
125
    return 1;
126
}
127
128
129
static size_t
130
_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
131
{
132
    size_t count = mbstowcs(dest, src, n);
133
    if (dest != NULL && 
count != DECODE_ERROR282k
) {
  Branch (133:9): [True: 282k, False: 281k]
  Branch (133:25): [True: 281k, False: 1.15k]
134
        for (size_t i=0; i < count; 
i++5.22M
) {
  Branch (134:26): [True: 5.22M, False: 281k]
135
            wchar_t ch = dest[i];
136
            if (!is_valid_wide_char(ch)) {
  Branch (136:17): [True: 0, False: 5.22M]
137
                return DECODE_ERROR;
138
            }
139
        }
140
    }
141
    return count;
142
}
143
144
145
#ifdef HAVE_MBRTOWC
146
static size_t
147
_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
148
{
149
    assert(pwc != NULL);
150
    size_t count = mbrtowc(pwc, str, len, pmbs);
151
    if (count != 0 && 
count != DECODE_ERROR85
&&
count != INCOMPLETE_CHARACTER70
) {
  Branch (151:9): [True: 85, False: 4]
  Branch (151:23): [True: 70, False: 15]
  Branch (151:48): [True: 70, False: 0]
152
        if (!is_valid_wide_char(*pwc)) {
  Branch (152:13): [True: 0, False: 70]
153
            return DECODE_ERROR;
154
        }
155
    }
156
    return count;
157
}
158
#endif
159
160
161
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
162
163
#define USE_FORCE_ASCII
164
165
extern int _Py_normalize_encoding(const char *, char *, size_t);
166
167
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
168
   and POSIX locale. nl_langinfo(CODESET) announces an alias of the
169
   ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
170
   ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
171
   locale.getpreferredencoding() codec. For example, if command line arguments
172
   are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
173
   UnicodeEncodeError instead of retrieving the original byte string.
174
175
   The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
176
   nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
177
   one byte in range 0x80-0xff can be decoded from the locale encoding. The
178
   workaround is also enabled on error, for example if getting the locale
179
   failed.
180
181
   On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
182
   announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
183
   ASCII encoding in this case.
184
185
   Values of force_ascii:
186
187
       1: the workaround is used: Py_EncodeLocale() uses
188
          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
189
          decode_ascii()
190
       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
191
          Py_DecodeLocale() uses mbstowcs()
192
      -1: unknown, need to call check_force_ascii() to get the value
193
*/
194
static int force_ascii = -1;
195
196
static int
197
check_force_ascii(void)
198
{
199
    char *loc = setlocale(LC_CTYPE, NULL);
200
    if (loc == NULL) {
  Branch (200:9): [True: 0, False: 118]
201
        goto error;
202
    }
203
    if (strcmp(loc, "C") != 0 && 
strcmp(loc, "POSIX") != 0109
) {
  Branch (203:9): [True: 109, False: 9]
  Branch (203:34): [True: 109, False: 0]
204
        /* the LC_CTYPE locale is different than C and POSIX */
205
        return 0;
206
    }
207
208
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
209
    const char *codeset = nl_langinfo(CODESET);
210
    if (!codeset || codeset[0] == '\0') {
  Branch (210:9): [True: 0, False: 9]
  Branch (210:21): [True: 0, False: 9]
211
        /* CODESET is not set or empty */
212
        goto error;
213
    }
214
215
    char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
216
    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
  Branch (216:9): [True: 0, False: 9]
217
        goto error;
218
    }
219
220
#ifdef __hpux
221
    if (strcmp(encoding, "roman8") == 0) {
222
        unsigned char ch;
223
        wchar_t wch;
224
        size_t res;
225
226
        ch = (unsigned char)0xA7;
227
        res = _Py_mbstowcs(&wch, (char*)&ch, 1);
228
        if (res != DECODE_ERROR && wch == L'\xA7') {
229
            /* On HP-UX with C locale or the POSIX locale,
230
               nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
231
               Latin1 encoding in practice. Force ASCII in this case.
232
233
               Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
234
            return 1;
235
        }
236
    }
237
#else
238
    const char* ascii_aliases[] = {
239
        "ascii",
240
        /* Aliases from Lib/encodings/aliases.py */
241
        "646",
242
        "ansi_x3.4_1968",
243
        "ansi_x3.4_1986",
244
        "ansi_x3_4_1968",
245
        "cp367",
246
        "csascii",
247
        "ibm367",
248
        "iso646_us",
249
        "iso_646.irv_1991",
250
        "iso_ir_6",
251
        "us",
252
        "us_ascii",
253
        NULL
254
    };
255
256
    int is_ascii = 0;
257
    for (const char **alias=ascii_aliases; *alias != NULL; 
alias++18
) {
  Branch (257:44): [True: 27, False: 0]
258
        if (strcmp(encoding, *alias) == 0) {
  Branch (258:13): [True: 9, False: 18]
259
            is_ascii = 1;
260
            break;
261
        }
262
    }
263
    if (!is_ascii) {
  Branch (263:9): [True: 0, False: 9]
264
        /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
265
        return 0;
266
    }
267
268
    
for (unsigned int i=0x80; 9
i<=0xff;
i++1.15k
) {
  Branch (268:31): [True: 1.15k, False: 9]
269
        char ch[1];
270
        wchar_t wch[1];
271
        size_t res;
272
273
        unsigned uch = (unsigned char)i;
274
        ch[0] = (char)uch;
275
        res = _Py_mbstowcs(wch, ch, 1);
276
        if (res != DECODE_ERROR) {
  Branch (276:13): [True: 0, False: 1.15k]
277
            /* decoding a non-ASCII character from the locale encoding succeed:
278
               the locale encoding is not ASCII, force ASCII */
279
            return 1;
280
        }
281
    }
282
    /* None of the bytes in the range 0x80-0xff can be decoded from the locale
283
       encoding: the locale encoding is really ASCII */
284
#endif   /* !defined(__hpux) */
285
    return 0;
286
#else
287
    /* nl_langinfo(CODESET) is not available: always force ASCII */
288
    return 1;
289
#endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
290
291
error:
292
    /* if an error occurred, force the ASCII encoding */
293
    return 1;
294
}
295
296
297
int
298
_Py_GetForceASCII(void)
299
{
300
    if (force_ascii == -1) {
  Branch (300:9): [True: 78, False: 25]
301
        force_ascii = check_force_ascii();
302
    }
303
    return force_ascii;
304
}
305
306
307
void
308
_Py_ResetForceASCII(void)
309
{
310
    force_ascii = -1;
311
}
312
313
314
static int
315
encode_ascii(const wchar_t *text, char **str,
316
             size_t *error_pos, const char **reason,
317
             int raw_malloc, _Py_error_handler errors)
318
{
319
    char *result = NULL, *out;
320
    size_t len, i;
321
    wchar_t ch;
322
323
    int surrogateescape;
324
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
  Branch (324:9): [True: 0, False: 0]
325
        return -3;
326
    }
327
328
    len = wcslen(text);
329
330
    /* +1 for NULL byte */
331
    if (raw_malloc) {
  Branch (331:9): [True: 0, False: 0]
332
        result = PyMem_RawMalloc(len + 1);
333
    }
334
    else {
335
        result = PyMem_Malloc(len + 1);
336
    }
337
    if (result == NULL) {
  Branch (337:9): [True: 0, False: 0]
338
        return -1;
339
    }
340
341
    out = result;
342
    for (i=0; i<len; i++) {
  Branch (342:15): [True: 0, False: 0]
343
        ch = text[i];
344
345
        if (ch <= 0x7f) {
  Branch (345:13): [True: 0, False: 0]
346
            /* ASCII character */
347
            *out++ = (char)ch;
348
        }
349
        else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
  Branch (349:18): [True: 0, False: 0]
  Branch (349:37): [True: 0, False: 0]
  Branch (349:53): [True: 0, False: 0]
350
            /* UTF-8b surrogate */
351
            *out++ = (char)(ch - 0xdc00);
352
        }
353
        else {
354
            if (raw_malloc) {
  Branch (354:17): [True: 0, False: 0]
355
                PyMem_RawFree(result);
356
            }
357
            else {
358
                PyMem_Free(result);
359
            }
360
            if (error_pos != NULL) {
  Branch (360:17): [True: 0, False: 0]
361
                *error_pos = i;
362
            }
363
            if (reason) {
  Branch (363:17): [True: 0, False: 0]
364
                *reason = "encoding error";
365
            }
366
            return -2;
367
        }
368
    }
369
    *out = '\0';
370
    *str = result;
371
    return 0;
372
}
373
#else
374
int
375
_Py_GetForceASCII(void)
376
{
377
    return 0;
378
}
379
380
void
381
_Py_ResetForceASCII(void)
382
{
383
    /* nothing to do */
384
}
385
#endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
386
387
388
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
389
static int
390
decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
391
             const char **reason, _Py_error_handler errors)
392
{
393
    wchar_t *res;
394
    unsigned char *in;
395
    wchar_t *out;
396
    size_t argsize = strlen(arg) + 1;
397
398
    int surrogateescape;
399
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
  Branch (399:9): [True: 0, False: 0]
400
        return -3;
401
    }
402
403
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
  Branch (403:9): [True: 0, False: 0]
404
        return -1;
405
    }
406
    res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
407
    if (!res) {
  Branch (407:9): [True: 0, False: 0]
408
        return -1;
409
    }
410
411
    out = res;
412
    for (in = (unsigned char*)arg; *in; in++) {
  Branch (412:36): [True: 0, False: 0]
413
        unsigned char ch = *in;
414
        if (ch < 128) {
  Branch (414:13): [True: 0, False: 0]
415
            *out++ = ch;
416
        }
417
        else {
418
            if (!surrogateescape) {
  Branch (418:17): [True: 0, False: 0]
419
                PyMem_RawFree(res);
420
                if (wlen) {
  Branch (420:21): [True: 0, False: 0]
421
                    *wlen = in - (unsigned char*)arg;
422
                }
423
                if (reason) {
  Branch (423:21): [True: 0, False: 0]
424
                    *reason = "decoding error";
425
                }
426
                return -2;
427
            }
428
            *out++ = 0xdc00 + ch;
429
        }
430
    }
431
    *out = 0;
432
433
    if (wlen != NULL) {
  Branch (433:9): [True: 0, False: 0]
434
        *wlen = out - res;
435
    }
436
    *wstr = res;
437
    return 0;
438
}
439
#endif   /* !HAVE_MBRTOWC */
440
441
static int
442
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
443
                      const char **reason, _Py_error_handler errors)
444
{
445
    wchar_t *res;
446
    size_t argsize;
447
    size_t count;
448
#ifdef HAVE_MBRTOWC
449
    unsigned char *in;
450
    wchar_t *out;
451
    mbstate_t mbs;
452
#endif
453
454
    int surrogateescape;
455
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
  Branch (455:9): [True: 2, False: 281k]
456
        return -3;
457
    }
458
459
#ifdef HAVE_BROKEN_MBSTOWCS
460
    /* Some platforms have a broken implementation of
461
     * mbstowcs which does not count the characters that
462
     * would result from conversion.  Use an upper bound.
463
     */
464
    argsize = strlen(arg);
465
#else
466
    argsize = _Py_mbstowcs(NULL, arg, 0);
467
#endif
468
    if (argsize != DECODE_ERROR) {
  Branch (468:9): [True: 281k, False: 8]
469
        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
  Branch (469:13): [True: 0, False: 281k]
470
            return -1;
471
        }
472
        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
473
        if (!res) {
  Branch (473:13): [True: 0, False: 281k]
474
            return -1;
475
        }
476
477
        count = _Py_mbstowcs(res, arg, argsize + 1);
478
        if (count != DECODE_ERROR) {
  Branch (478:13): [True: 281k, False: 0]
479
            *wstr = res;
480
            if (wlen != NULL) {
  Branch (480:17): [True: 279k, False: 1.40k]
481
                *wlen = count;
482
            }
483
            return 0;
484
        }
485
        PyMem_RawFree(res);
486
    }
487
488
    /* Conversion failed. Fall back to escaping with surrogateescape. */
489
#ifdef HAVE_MBRTOWC
490
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
491
492
    /* Overallocate; as multi-byte characters are in the argument, the
493
       actual output could use less memory. */
494
    argsize = strlen(arg) + 1;
495
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
  Branch (495:9): [True: 0, False: 8]
496
        return -1;
497
    }
498
    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
499
    if (!res) {
  Branch (499:9): [True: 0, False: 8]
500
        return -1;
501
    }
502
503
    in = (unsigned char*)arg;
504
    out = res;
505
    memset(&mbs, 0, sizeof mbs);
506
    while (argsize) {
  Branch (506:12): [True: 89, False: 0]
507
        size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
508
        if (converted == 0) {
  Branch (508:13): [True: 4, False: 85]
509
            /* Reached end of string; null char stored. */
510
            break;
511
        }
512
513
        if (converted == INCOMPLETE_CHARACTER) {
  Branch (513:13): [True: 0, False: 85]
514
            /* Incomplete character. This should never happen,
515
               since we provide everything that we have -
516
               unless there is a bug in the C library, or I
517
               misunderstood how mbrtowc works. */
518
            goto decode_error;
519
        }
520
521
        if (converted == DECODE_ERROR) {
  Branch (521:13): [True: 15, False: 70]
522
            if (!surrogateescape) {
  Branch (522:17): [True: 4, False: 11]
523
                goto decode_error;
524
            }
525
526
            /* Decoding error. Escape as UTF-8b, and start over in the initial
527
               shift state. */
528
            *out++ = 0xdc00 + *in++;
529
            argsize--;
530
            memset(&mbs, 0, sizeof mbs);
531
            continue;
532
        }
533
534
        // _Py_mbrtowc() reject lone surrogate characters
535
        assert(!Py_UNICODE_IS_SURROGATE(*out));
536
537
        /* successfully converted some bytes */
538
        in += converted;
539
        argsize -= converted;
540
        out++;
541
    }
542
    if (wlen != NULL) {
  Branch (542:9): [True: 4, False: 0]
543
        *wlen = out - res;
544
    }
545
    *wstr = res;
546
    return 0;
547
548
decode_error:
549
    PyMem_RawFree(res);
550
    if (wlen) {
  Branch (550:9): [True: 4, False: 0]
551
        *wlen = in - (unsigned char*)arg;
552
    }
553
    if (reason) {
  Branch (553:9): [True: 4, False: 0]
554
        *reason = "decoding error";
555
    }
556
    return -2;
557
#else   /* HAVE_MBRTOWC */
558
    /* Cannot use C locale for escaping; manually escape as if charset
559
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
560
       correctly in the locale's charset, which must be an ASCII superset. */
561
    return decode_ascii(arg, wstr, wlen, reason, errors);
562
#endif   /* HAVE_MBRTOWC */
563
}
564
565
566
/* Decode a byte string from the locale encoding.
567
568
   Use the strict error handler if 'surrogateescape' is zero.  Use the
569
   surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
570
   bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
571
   can be decoded as a surrogate character, escape the bytes using the
572
   surrogateescape error handler instead of decoding them.
573
574
   On success, return 0 and write the newly allocated wide character string into
575
   *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
576
   the number of wide characters excluding the null character into *wlen.
577
578
   On memory allocation failure, return -1.
579
580
   On decoding error, return -2. If wlen is not NULL, write the start of
581
   invalid byte sequence in the input string into *wlen. If reason is not NULL,
582
   write the decoding error message into *reason.
583
584
   Return -3 if the error handler 'errors' is not supported.
585
586
   Use the Py_EncodeLocaleEx() function to encode the character string back to
587
   a byte string. */
588
int
589
_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
590
                   const char **reason,
591
                   int current_locale, _Py_error_handler errors)
592
{
593
    if (current_locale) {
  Branch (593:9): [True: 187k, False: 93.7k]
594
#ifdef _Py_FORCE_UTF8_LOCALE
595
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
596
                                errors);
597
#else
598
        return decode_current_locale(arg, wstr, wlen, reason, errors);
599
#endif
600
    }
601
602
#ifdef _Py_FORCE_UTF8_FS_ENCODING
603
    return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604
                            errors);
605
#else
606
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
607
#ifdef MS_WINDOWS
608
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
609
#endif
610
    if (use_utf8) {
  Branch (610:9): [True: 1.70k, False: 92.0k]
611
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
612
                                errors);
613
    }
614
615
#ifdef USE_FORCE_ASCII
616
    if (force_ascii == -1) {
  Branch (616:9): [True: 40, False: 92.0k]
617
        force_ascii = check_force_ascii();
618
    }
619
620
    if (force_ascii) {
  Branch (620:9): [True: 0, False: 92.0k]
621
        /* force ASCII encoding to workaround mbstowcs() issue */
622
        return decode_ascii(arg, wstr, wlen, reason, errors);
623
    }
624
#endif
625
626
    return decode_current_locale(arg, wstr, wlen, reason, errors);
627
#endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
628
}
629
630
631
/* Decode a byte string from the locale encoding with the
632
   surrogateescape error handler: undecodable bytes are decoded as characters
633
   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
634
   character, escape the bytes using the surrogateescape error handler instead
635
   of decoding them.
636
637
   Return a pointer to a newly allocated wide character string, use
638
   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
639
   wide characters excluding the null character into *size
640
641
   Return NULL on decoding error or memory allocation error. If *size* is not
642
   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
643
   decoding error.
644
645
   Decoding errors should never happen, unless there is a bug in the C
646
   library.
647
648
   Use the Py_EncodeLocale() function to encode the character string back to a
649
   byte string. */
650
wchar_t*
651
Py_DecodeLocale(const char* arg, size_t *wlen)
652
{
653
    wchar_t *wstr;
654
    int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
655
                                 NULL, 0,
656
                                 _Py_ERROR_SURROGATEESCAPE);
657
    if (res != 0) {
  Branch (657:9): [True: 0, False: 1.58k]
658
        assert(res != -3);
659
        if (wlen != NULL) {
  Branch (659:13): [True: 0, False: 0]
660
            *wlen = (size_t)res;
661
        }
662
        return NULL;
663
    }
664
    return wstr;
665
}
666
667
668
static int
669
encode_current_locale(const wchar_t *text, char **str,
670
                      size_t *error_pos, const char **reason,
671
                      int raw_malloc, _Py_error_handler errors)
672
{
673
    const size_t len = wcslen(text);
674
    char *result = NULL, *bytes = NULL;
675
    size_t i, size, converted;
676
    wchar_t c, buf[2];
677
678
    int surrogateescape;
679
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
  Branch (679:9): [True: 2, False: 9.94k]
680
        return -3;
681
    }
682
683
    /* The function works in two steps:
684
       1. compute the length of the output buffer in bytes (size)
685
       2. outputs the bytes */
686
    size = 0;
687
    buf[1] = 0;
688
    while (1) {
  Branch (688:12): [Folded - Ignored]
689
        for (i=0; i < len; 
i++922k
) {
  Branch (689:19): [True: 922k, False: 19.8k]
690
            c = text[i];
691
            if (c >= 0xdc80 && 
c <= 0xdcff9
) {
  Branch (691:17): [True: 9, False: 922k]
  Branch (691:32): [True: 5, False: 4]
692
                if (!surrogateescape) {
  Branch (692:21): [True: 1, False: 4]
693
                    goto encode_error;
694
                }
695
                /* UTF-8b surrogate */
696
                if (bytes != NULL) {
  Branch (696:21): [True: 2, False: 2]
697
                    *bytes++ = c - 0xdc00;
698
                    size--;
699
                }
700
                else {
701
                    size++;
702
                }
703
                continue;
704
            }
705
            else {
706
                buf[0] = c;
707
                if (bytes != NULL) {
  Branch (707:21): [True: 461k, False: 461k]
708
                    converted = wcstombs(bytes, buf, size);
709
                }
710
                else {
711
                    converted = wcstombs(NULL, buf, 0);
712
                }
713
                if (converted == DECODE_ERROR) {
  Branch (713:21): [True: 0, False: 922k]
714
                    goto encode_error;
715
                }
716
                if (bytes != NULL) {
  Branch (716:21): [True: 461k, False: 461k]
717
                    bytes += converted;
718
                    size -= converted;
719
                }
720
                else {
721
                    size += converted;
722
                }
723
            }
724
        }
725
        if (result != NULL) {
  Branch (725:13): [True: 9.94k, False: 9.94k]
726
            *bytes = '\0';
727
            break;
728
        }
729
730
        size += 1; /* nul byte at the end */
731
        if (raw_malloc) {
  Branch (731:13): [True: 9.94k, False: 0]
732
            result = PyMem_RawMalloc(size);
733
        }
734
        else {
735
            result = PyMem_Malloc(size);
736
        }
737
        if (result == NULL) {
  Branch (737:13): [True: 0, False: 9.94k]
738
            return -1;
739
        }
740
        bytes = result;
741
    }
742
    *str = result;
743
    return 0;
744
745
encode_error:
746
    if (raw_malloc) {
  Branch (746:9): [True: 1, False: 0]
747
        PyMem_RawFree(result);
748
    }
749
    else {
750
        PyMem_Free(result);
751
    }
752
    if (error_pos != NULL) {
  Branch (752:9): [True: 1, False: 0]
753
        *error_pos = i;
754
    }
755
    if (reason) {
  Branch (755:9): [True: 1, False: 0]
756
        *reason = "encoding error";
757
    }
758
    return -2;
759
}
760
761
762
/* Encode a string to the locale encoding.
763
764
   Parameters:
765
766
   * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
767
     of PyMem_Malloc().
768
   * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
769
     Python filesystem encoding.
770
   * errors: error handler like "strict" or "surrogateescape".
771
772
   Return value:
773
774
    0: success, *str is set to a newly allocated decoded string.
775
   -1: memory allocation failure
776
   -2: encoding error, set *error_pos and *reason (if set).
777
   -3: the error handler 'errors' is not supported.
778
 */
779
static int
780
encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
781
                 const char **reason,
782
                 int raw_malloc, int current_locale, _Py_error_handler errors)
783
{
784
    if (current_locale) {
  Branch (784:9): [True: 1.28k, False: 8.82k]
785
#ifdef _Py_FORCE_UTF8_LOCALE
786
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
787
                                raw_malloc, errors);
788
#else
789
        return encode_current_locale(text, str, error_pos, reason,
790
                                     raw_malloc, errors);
791
#endif
792
    }
793
794
#ifdef _Py_FORCE_UTF8_FS_ENCODING
795
    return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
796
                            raw_malloc, errors);
797
#else
798
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
799
#ifdef MS_WINDOWS
800
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
801
#endif
802
    if (use_utf8) {
  Branch (802:9): [True: 167, False: 8.65k]
803
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
804
                                raw_malloc, errors);
805
    }
806
807
#ifdef USE_FORCE_ASCII
808
    if (force_ascii == -1) {
  Branch (808:9): [True: 0, False: 8.65k]
809
        force_ascii = check_force_ascii();
810
    }
811
812
    if (force_ascii) {
  Branch (812:9): [True: 0, False: 8.65k]
813
        return encode_ascii(text, str, error_pos, reason,
814
                            raw_malloc, errors);
815
    }
816
#endif
817
818
    return encode_current_locale(text, str, error_pos, reason,
819
                                 raw_malloc, errors);
820
#endif   /* _Py_FORCE_UTF8_FS_ENCODING */
821
}
822
823
static char*
824
encode_locale(const wchar_t *text, size_t *error_pos,
825
              int raw_malloc, int current_locale)
826
{
827
    char *str;
828
    int res = encode_locale_ex(text, &str, error_pos, NULL,
829
                               raw_malloc, current_locale,
830
                               _Py_ERROR_SURROGATEESCAPE);
831
    if (res != -2 && error_pos) {
  Branch (831:9): [True: 2.10k, False: 0]
  Branch (831:22): [True: 0, False: 2.10k]
832
        *error_pos = (size_t)-1;
833
    }
834
    if (res != 0) {
  Branch (834:9): [True: 0, False: 2.10k]
835
        return NULL;
836
    }
837
    return str;
838
}
839
840
/* Encode a wide character string to the locale encoding with the
841
   surrogateescape error handler: surrogate characters in the range
842
   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
843
844
   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
845
   the memory. Return NULL on encoding or memory allocation error.
846
847
   If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
848
   to the index of the invalid character on encoding error.
849
850
   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
851
   character string. */
852
char*
853
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
854
{
855
    return encode_locale(text, error_pos, 0, 0);
856
}
857
858
859
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
860
   instead of PyMem_Free(). */
861
char*
862
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
863
{
864
    return encode_locale(text, error_pos, 1, 0);
865
}
866
867
868
int
869
_Py_EncodeLocaleEx(const wchar_t *text, char **str,
870
                   size_t *error_pos, const char **reason,
871
                   int current_locale, _Py_error_handler errors)
872
{
873
    return encode_locale_ex(text, str, error_pos, reason, 1,
874
                            current_locale, errors);
875
}
876
877
878
// Get the current locale encoding name:
879
//
880
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
881
// - Return "utf-8" if the UTF-8 Mode is enabled
882
// - On Windows, return the ANSI code page (ex: "cp1250")
883
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
884
// - Otherwise, return nl_langinfo(CODESET).
885
//
886
// Return NULL on memory allocation failure.
887
//
888
// See also config_get_locale_encoding()
889
wchar_t*
890
_Py_GetLocaleEncoding(void)
891
{
892
#ifdef _Py_FORCE_UTF8_LOCALE
893
    // On Android langinfo.h and CODESET are missing,
894
    // and UTF-8 is always used in mbstowcs() and wcstombs().
895
    return _PyMem_RawWcsdup(L"utf-8");
896
#else
897
898
#ifdef MS_WINDOWS
899
    wchar_t encoding[23];
900
    unsigned int ansi_codepage = GetACP();
901
    swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
902
    encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
903
    return _PyMem_RawWcsdup(encoding);
904
#else
905
    const char *encoding = nl_langinfo(CODESET);
906
    if (!encoding || encoding[0] == '\0') {
  Branch (906:9): [True: 0, False: 1.39k]
  Branch (906:22): [True: 0, False: 1.39k]
907
        // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
908
        // macOS if the LC_CTYPE locale is not supported.
909
        return _PyMem_RawWcsdup(L"utf-8");
910
    }
911
912
    wchar_t *wstr;
913
    int res = decode_current_locale(encoding, &wstr, NULL,
914
                                    NULL, _Py_ERROR_SURROGATEESCAPE);
915
    if (res < 0) {
  Branch (915:9): [True: 0, False: 1.39k]
916
        return NULL;
917
    }
918
    return wstr;
919
#endif  // !MS_WINDOWS
920
921
#endif  // !_Py_FORCE_UTF8_LOCALE
922
}
923
924
925
PyObject *
926
_Py_GetLocaleEncodingObject(void)
927
{
928
    wchar_t *encoding = _Py_GetLocaleEncoding();
929
    if (encoding == NULL) {
  Branch (929:9): [True: 0, False: 1.19k]
930
        PyErr_NoMemory();
931
        return NULL;
932
    }
933
934
    PyObject *str = PyUnicode_FromWideChar(encoding, -1);
935
    PyMem_RawFree(encoding);
936
    return str;
937
}
938
939
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
940
941
/* Check whether current locale uses Unicode as internal wchar_t form. */
942
int
943
_Py_LocaleUsesNonUnicodeWchar(void)
944
{
945
    /* Oracle Solaris uses non-Unicode internal wchar_t form for
946
       non-Unicode locales and hence needs conversion to UTF first. */
947
    char* codeset = nl_langinfo(CODESET);
948
    if (!codeset) {
949
        return 0;
950
    }
951
    /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
952
    return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
953
}
954
955
static wchar_t *
956
_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
957
                     const char *tocode, const char *fromcode)
958
{
959
    static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
960
961
    /* Ensure we won't overflow the size. */
962
    if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
963
        PyErr_NoMemory();
964
        return NULL;
965
    }
966
967
    /* the string doesn't have to be NULL terminated */
968
    wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
969
    if (target == NULL) {
970
        PyErr_NoMemory();
971
        return NULL;
972
    }
973
974
    iconv_t cd = iconv_open(tocode, fromcode);
975
    if (cd == (iconv_t)-1) {
976
        PyErr_Format(PyExc_ValueError, "iconv_open() failed");
977
        PyMem_Free(target);
978
        return NULL;
979
    }
980
981
    char *inbuf = (char *) source;
982
    char *outbuf = (char *) target;
983
    size_t inbytesleft = sizeof(wchar_t) * size;
984
    size_t outbytesleft = inbytesleft;
985
986
    size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
987
    if (ret == DECODE_ERROR) {
988
        PyErr_Format(PyExc_ValueError, "iconv() failed");
989
        PyMem_Free(target);
990
        iconv_close(cd);
991
        return NULL;
992
    }
993
994
    iconv_close(cd);
995
    return target;
996
}
997
998
/* Convert a wide character string to the UCS-4 encoded string. This
999
   is necessary on systems where internal form of wchar_t are not Unicode
1000
   code points (e.g. Oracle Solaris).
1001
1002
   Return a pointer to a newly allocated string, use PyMem_Free() to free
1003
   the memory. Return NULL and raise exception on conversion or memory
1004
   allocation error. */
1005
wchar_t *
1006
_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1007
{
1008
    return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1009
}
1010
1011
/* Convert a UCS-4 encoded string to native wide character string. This
1012
   is necessary on systems where internal form of wchar_t are not Unicode
1013
   code points (e.g. Oracle Solaris).
1014
1015
   The conversion is done in place. This can be done because both wchar_t
1016
   and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1017
   to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1018
   which is currently the only system using these functions; it doesn't have
1019
   to be for other systems).
1020
1021
   Return 0 on success. Return -1 and raise exception on conversion
1022
   or memory allocation error. */
1023
int
1024
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1025
{
1026
    wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1027
    if (!result) {
1028
        return -1;
1029
    }
1030
    memcpy(unicode, result, size * sizeof(wchar_t));
1031
    PyMem_Free(result);
1032
    return 0;
1033
}
1034
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1035
1036
#ifdef MS_WINDOWS
1037
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1038
1039
static void
1040
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1041
{
1042
    /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1043
    /* Cannot simply cast and dereference in_ptr,
1044
       since it might not be aligned properly */
1045
    __int64 in;
1046
    memcpy(&in, in_ptr, sizeof(in));
1047
    *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1048
    *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1049
}
1050
1051
void
1052
_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1053
{
1054
    /* XXX endianness */
1055
    __int64 out;
1056
    out = time_in + secs_between_epochs;
1057
    out = out * 10000000 + nsec_in / 100;
1058
    memcpy(out_ptr, &out, sizeof(out));
1059
}
1060
1061
/* Below, we *know* that ugo+r is 0444 */
1062
#if _S_IREAD != 0400
1063
#error Unsupported C library
1064
#endif
1065
static int
1066
attributes_to_mode(DWORD attr)
1067
{
1068
    int m = 0;
1069
    if (attr & FILE_ATTRIBUTE_DIRECTORY)
1070
        m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1071
    else
1072
        m |= _S_IFREG;
1073
    if (attr & FILE_ATTRIBUTE_READONLY)
1074
        m |= 0444;
1075
    else
1076
        m |= 0666;
1077
    return m;
1078
}
1079
1080
void
1081
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1082
                           struct _Py_stat_struct *result)
1083
{
1084
    memset(result, 0, sizeof(*result));
1085
    result->st_mode = attributes_to_mode(info->dwFileAttributes);
1086
    result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1087
    result->st_dev = info->dwVolumeSerialNumber;
1088
    result->st_rdev = result->st_dev;
1089
    FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1090
    FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1091
    FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1092
    result->st_nlink = info->nNumberOfLinks;
1093
    result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1094
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1095
       open other name surrogate reparse points without traversing them. To
1096
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1097
    result->st_reparse_tag = reparse_tag;
1098
    if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1099
        reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1100
        /* first clear the S_IFMT bits */
1101
        result->st_mode ^= (result->st_mode & S_IFMT);
1102
        /* now set the bits that make this a symlink */
1103
        result->st_mode |= S_IFLNK;
1104
    }
1105
    result->st_file_attributes = info->dwFileAttributes;
1106
}
1107
#endif
1108
1109
/* Return information about a file.
1110
1111
   On POSIX, use fstat().
1112
1113
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1114
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1115
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1116
   #23152.
1117
1118
   On Windows, set the last Windows error and return nonzero on error. On
1119
   POSIX, set errno and return nonzero on error. Fill status and return 0 on
1120
   success. */
1121
int
1122
_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1123
{
1124
#ifdef MS_WINDOWS
1125
    BY_HANDLE_FILE_INFORMATION info;
1126
    HANDLE h;
1127
    int type;
1128
1129
    h = _Py_get_osfhandle_noraise(fd);
1130
1131
    if (h == INVALID_HANDLE_VALUE) {
1132
        /* errno is already set by _get_osfhandle, but we also set
1133
           the Win32 error for callers who expect that */
1134
        SetLastError(ERROR_INVALID_HANDLE);
1135
        return -1;
1136
    }
1137
    memset(status, 0, sizeof(*status));
1138
1139
    type = GetFileType(h);
1140
    if (type == FILE_TYPE_UNKNOWN) {
1141
        DWORD error = GetLastError();
1142
        if (error != 0) {
1143
            errno = winerror_to_errno(error);
1144
            return -1;
1145
        }
1146
        /* else: valid but unknown file */
1147
    }
1148
1149
    if (type != FILE_TYPE_DISK) {
1150
        if (type == FILE_TYPE_CHAR)
1151
            status->st_mode = _S_IFCHR;
1152
        else if (type == FILE_TYPE_PIPE)
1153
            status->st_mode = _S_IFIFO;
1154
        return 0;
1155
    }
1156
1157
    if (!GetFileInformationByHandle(h, &info)) {
1158
        /* The Win32 error is already set, but we also set errno for
1159
           callers who expect it */
1160
        errno = winerror_to_errno(GetLastError());
1161
        return -1;
1162
    }
1163
1164
    _Py_attribute_data_to_stat(&info, 0, status);
1165
    /* specific to fstat() */
1166
    status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1167
    return 0;
1168
#else
1169
    return fstat(fd, status);
1170
#endif
1171
}
1172
1173
/* Return information about a file.
1174
1175
   On POSIX, use fstat().
1176
1177
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1178
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1179
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1180
   #23152.
1181
1182
   Raise an exception and return -1 on error. On Windows, set the last Windows
1183
   error on error. On POSIX, set errno on error. Fill status and return 0 on
1184
   success.
1185
1186
   Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1187
   to call fstat(). The caller must hold the GIL. */
1188
int
1189
_Py_fstat(int fd, struct _Py_stat_struct *status)
1190
{
1191
    int res;
1192
1193
    assert(PyGILState_Check());
1194
1195
    Py_BEGIN_ALLOW_THREADS
1196
    res = _Py_fstat_noraise(fd, status);
1197
    Py_END_ALLOW_THREADS
1198
1199
    if (res != 0) {
  Branch (1199:9): [True: 2, False: 295]
1200
#ifdef MS_WINDOWS
1201
        PyErr_SetFromWindowsErr(0);
1202
#else
1203
        PyErr_SetFromErrno(PyExc_OSError);
1204
#endif
1205
        return -1;
1206
    }
1207
    return 0;
1208
}
1209
1210
/* Like _Py_stat() but with a raw filename. */
1211
int
1212
_Py_wstat(const wchar_t* path, struct stat *buf)
1213
{
1214
    int err;
1215
#ifdef MS_WINDOWS
1216
    struct _stat wstatbuf;
1217
    err = _wstat(path, &wstatbuf);
1218
    if (!err) {
1219
        buf->st_mode = wstatbuf.st_mode;
1220
    }
1221
#else
1222
    char *fname;
1223
    fname = _Py_EncodeLocaleRaw(path, NULL);
1224
    if (fname == NULL) {
  Branch (1224:9): [True: 0, False: 468]
1225
        errno = EINVAL;
1226
        return -1;
1227
    }
1228
    err = stat(fname, buf);
1229
    PyMem_RawFree(fname);
1230
#endif
1231
    return err;
1232
}
1233
1234
1235
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1236
   call stat() otherwise. Only fill st_mode attribute on Windows.
1237
1238
   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1239
   raised. */
1240
1241
int
1242
_Py_stat(PyObject *path, struct stat *statbuf)
1243
{
1244
#ifdef MS_WINDOWS
1245
    int err;
1246
1247
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1248
    if (wpath == NULL)
1249
        return -2;
1250
1251
    err = _Py_wstat(wpath, statbuf);
1252
    PyMem_Free(wpath);
1253
    return err;
1254
#else
1255
    int ret;
1256
    PyObject *bytes;
1257
    char *cpath;
1258
1259
    bytes = PyUnicode_EncodeFSDefault(path);
1260
    if (bytes == NULL)
  Branch (1260:9): [True: 0, False: 0]
1261
        return -2;
1262
1263
    /* check for embedded null bytes */
1264
    if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
  Branch (1264:9): [True: 0, False: 0]
1265
        Py_DECREF(bytes);
1266
        return -2;
1267
    }
1268
1269
    ret = stat(cpath, statbuf);
1270
    Py_DECREF(bytes);
1271
    return ret;
1272
#endif
1273
}
1274
1275
1276
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1277
static int
1278
get_inheritable(int fd, int raise)
1279
{
1280
#ifdef MS_WINDOWS
1281
    HANDLE handle;
1282
    DWORD flags;
1283
1284
    handle = _Py_get_osfhandle_noraise(fd);
1285
    if (handle == INVALID_HANDLE_VALUE) {
1286
        if (raise)
1287
            PyErr_SetFromErrno(PyExc_OSError);
1288
        return -1;
1289
    }
1290
1291
    if (!GetHandleInformation(handle, &flags)) {
1292
        if (raise)
1293
            PyErr_SetFromWindowsErr(0);
1294
        return -1;
1295
    }
1296
1297
    return (flags & HANDLE_FLAG_INHERIT);
1298
#else
1299
    int flags;
1300
1301
    flags = fcntl(fd, F_GETFD, 0);
1302
    if (flags == -1) {
  Branch (1302:9): [True: 2, False: 201]
1303
        if (raise)
  Branch (1303:13): [True: 2, False: 0]
1304
            PyErr_SetFromErrno(PyExc_OSError);
1305
        return -1;
1306
    }
1307
    return !(flags & FD_CLOEXEC);
1308
#endif
1309
}
1310
1311
/* Get the inheritable flag of the specified file descriptor.
1312
   Return 1 if the file descriptor can be inherited, 0 if it cannot,
1313
   raise an exception and return -1 on error. */
1314
int
1315
_Py_get_inheritable(int fd)
1316
{
1317
    return get_inheritable(fd, 1);
1318
}
1319
1320
1321
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1322
static int
1323
set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1324
{
1325
#ifdef MS_WINDOWS
1326
    HANDLE handle;
1327
    DWORD flags;
1328
#else
1329
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1330
    static int ioctl_works = -1;
1331
    int request;
1332
    int err;
1333
#endif
1334
    int flags, new_flags;
1335
    int res;
1336
#endif
1337
1338
    /* atomic_flag_works can only be used to make the file descriptor
1339
       non-inheritable */
1340
    assert(!(atomic_flag_works != NULL && inheritable));
1341
1342
    if (atomic_flag_works != NULL && 
!inheritable236k
) {
  Branch (1342:9): [True: 236k, False: 4.23k]
  Branch (1342:38): [True: 236k, False: 0]
1343
        if (*atomic_flag_works == -1) {
  Branch (1343:13): [True: 73, False: 236k]
1344
            int isInheritable = get_inheritable(fd, raise);
1345
            if (isInheritable == -1)
  Branch (1345:17): [True: 0, False: 73]
1346
                return -1;
1347
            *atomic_flag_works = !isInheritable;
1348
        }
1349
1350
        if (*atomic_flag_works)
  Branch (1350:13): [True: 236k, False: 0]
1351
            return 0;
1352
    }
1353
1354
#ifdef MS_WINDOWS
1355
    handle = _Py_get_osfhandle_noraise(fd);
1356
    if (handle == INVALID_HANDLE_VALUE) {
1357
        if (raise)
1358
            PyErr_SetFromErrno(PyExc_OSError);
1359
        return -1;
1360
    }
1361
1362
    if (inheritable)
1363
        flags = HANDLE_FLAG_INHERIT;
1364
    else
1365
        flags = 0;
1366
1367
    /* This check can be removed once support for Windows 7 ends. */
1368
#define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1369
        GetFileType(handle) == FILE_TYPE_CHAR)
1370
1371
    if (!CONSOLE_PSEUDOHANDLE(handle) &&
1372
        !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1373
        if (raise)
1374
            PyErr_SetFromWindowsErr(0);
1375
        return -1;
1376
    }
1377
#undef CONSOLE_PSEUDOHANDLE
1378
    return 0;
1379
1380
#else
1381
1382
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1383
    if (ioctl_works != 0 && raise != 0) {
  Branch (1383:9): [True: 4.23k, False: 0]
  Branch (1383:29): [True: 1.66k, False: 2.57k]
1384
        /* fast-path: ioctl() only requires one syscall */
1385
        /* caveat: raise=0 is an indicator that we must be async-signal-safe
1386
         * thus avoid using ioctl() so we skip the fast-path. */
1387
        if (inheritable)
  Branch (1387:13): [True: 31, False: 1.63k]
1388
            request = FIONCLEX;
1389
        else
1390
            request = FIOCLEX;
1391
        err = ioctl(fd, request, NULL);
1392
        if (!err) {
  Branch (1392:13): [True: 1.65k, False: 5]
1393
            ioctl_works = 1;
1394
            return 0;
1395
        }
1396
1397
#ifdef O_PATH
1398
        if (errno == EBADF) {
  Branch (1398:13): [True: 5, False: 0]
1399
            // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1400
            // on O_PATH file descriptors. Fall through to the fcntl()
1401
            // implementation.
1402
        }
1403
        else
1404
#endif
1405
        if (errno != ENOTTY && errno != EACCES) {
  Branch (1405:13): [True: 0, False: 0]
  Branch (1405:32): [True: 0, False: 0]
1406
            if (raise)
  Branch (1406:17): [True: 0, False: 0]
1407
                PyErr_SetFromErrno(PyExc_OSError);
1408
            return -1;
1409
        }
1410
        else {
1411
            /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1412
               device". The ioctl is declared but not supported by the kernel.
1413
               Remember that ioctl() doesn't work. It is the case on
1414
               Illumos-based OS for example.
1415
1416
               Issue #27057: When SELinux policy disallows ioctl it will fail
1417
               with EACCES. While FIOCLEX is safe operation it may be
1418
               unavailable because ioctl was denied altogether.
1419
               This can be the case on Android. */
1420
            ioctl_works = 0;
1421
        }
1422
        /* fallback to fcntl() if ioctl() does not work */
1423
    }
1424
#endif
1425
1426
    /* slow-path: fcntl() requires two syscalls */
1427
    flags = fcntl(fd, F_GETFD);
1428
    if (flags < 0) {
  Branch (1428:9): [True: 3, False: 2.57k]
1429
        if (raise)
  Branch (1429:13): [True: 3, False: 0]
1430
            PyErr_SetFromErrno(PyExc_OSError);
1431
        return -1;
1432
    }
1433
1434
    if (inheritable) {
  Branch (1434:9): [True: 2.28k, False: 297]
1435
        new_flags = flags & ~FD_CLOEXEC;
1436
    }
1437
    else {
1438
        new_flags = flags | FD_CLOEXEC;
1439
    }
1440
1441
    if (new_flags == flags) {
  Branch (1441:9): [True: 66, False: 2.51k]
1442
        /* FD_CLOEXEC flag already set/cleared: nothing to do */
1443
        return 0;
1444
    }
1445
1446
    res = fcntl(fd, F_SETFD, new_flags);
1447
    if (res < 0) {
  Branch (1447:9): [True: 0, False: 2.51k]
1448
        if (raise)
  Branch (1448:13): [True: 0, False: 0]
1449
            PyErr_SetFromErrno(PyExc_OSError);
1450
        return -1;
1451
    }
1452
    return 0;
1453
#endif
1454
}
1455
1456
/* Make the file descriptor non-inheritable.
1457
   Return 0 on success, set errno and return -1 on error. */
1458
static int
1459
make_non_inheritable(int fd)
1460
{
1461
    return set_inheritable(fd, 0, 0, NULL);
1462
}
1463
1464
/* Set the inheritable flag of the specified file descriptor.
1465
   On success: return 0, on error: raise an exception and return -1.
1466
1467
   If atomic_flag_works is not NULL:
1468
1469
    * if *atomic_flag_works==-1, check if the inheritable is set on the file
1470
      descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1471
      set the inheritable flag
1472
    * if *atomic_flag_works==1: do nothing
1473
    * if *atomic_flag_works==0: set inheritable flag to False
1474
1475
   Set atomic_flag_works to NULL if no atomic flag was used to create the
1476
   file descriptor.
1477
1478
   atomic_flag_works can only be used to make a file descriptor
1479
   non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1480
int
1481
_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1482
{
1483
    return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1484
}
1485
1486
/* Same as _Py_set_inheritable() but on error, set errno and
1487
   don't raise an exception.
1488
   This function is async-signal-safe. */
1489
int
1490
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1491
{
1492
    return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1493
}
1494
1495
static int
1496
_Py_open_impl(const char *pathname, int flags, int gil_held)
1497
{
1498
    int fd;
1499
    int async_err = 0;
1500
#ifndef MS_WINDOWS
1501
    int *atomic_flag_works;
1502
#endif
1503
1504
#ifdef MS_WINDOWS
1505
    flags |= O_NOINHERIT;
1506
#elif defined(O_CLOEXEC)
1507
    atomic_flag_works = &_Py_open_cloexec_works;
1508
    flags |= O_CLOEXEC;
1509
#else
1510
    atomic_flag_works = NULL;
1511
#endif
1512
1513
    if (gil_held) {
  Branch (1513:9): [True: 0, False: 4.69k]
1514
        PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1515
        if (pathname_obj == NULL) {
  Branch (1515:13): [True: 0, False: 0]
1516
            return -1;
1517
        }
1518
        if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
  Branch (1518:13): [True: 0, False: 0]
1519
            Py_DECREF(pathname_obj);
1520
            return -1;
1521
        }
1522
1523
        do {
1524
            Py_BEGIN_ALLOW_THREADS
1525
            fd = open(pathname, flags);
1526
            Py_END_ALLOW_THREADS
1527
        } while (fd < 0
  Branch (1527:18): [True: 0, False: 0]
1528
                 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
  Branch (1528:21): [True: 0, False: 0]
  Branch (1528:39): [True: 0, False: 0]
1529
        if (async_err) {
  Branch (1529:13): [True: 0, False: 0]
1530
            Py_DECREF(pathname_obj);
1531
            return -1;
1532
        }
1533
        if (fd < 0) {
  Branch (1533:13): [True: 0, False: 0]
1534
            PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1535
            Py_DECREF(pathname_obj);
1536
            return -1;
1537
        }
1538
        Py_DECREF(pathname_obj);
1539
    }
1540
    else {
1541
        fd = open(pathname, flags);
1542
        if (fd < 0)
  Branch (1542:13): [True: 0, False: 4.69k]
1543
            return -1;
1544
    }
1545
1546
#ifndef MS_WINDOWS
1547
    if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
  Branch (1547:9): [True: 0, False: 4.69k]
1548
        close(fd);
1549
        return -1;
1550
    }
1551
#endif
1552
1553
    return fd;
1554
}
1555
1556
/* Open a file with the specified flags (wrapper to open() function).
1557
   Return a file descriptor on success. Raise an exception and return -1 on
1558
   error.
1559
1560
   The file descriptor is created non-inheritable.
1561
1562
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1563
   except if the Python signal handler raises an exception.
1564
1565
   Release the GIL to call open(). The caller must hold the GIL. */
1566
int
1567
_Py_open(const char *pathname, int flags)
1568
{
1569
    /* _Py_open() must be called with the GIL held. */
1570
    assert(PyGILState_Check());
1571
    return _Py_open_impl(pathname, flags, 1);
1572
}
1573
1574
/* Open a file with the specified flags (wrapper to open() function).
1575
   Return a file descriptor on success. Set errno and return -1 on error.
1576
1577
   The file descriptor is created non-inheritable.
1578
1579
   If interrupted by a signal, fail with EINTR. */
1580
int
1581
_Py_open_noraise(const char *pathname, int flags)
1582
{
1583
    return _Py_open_impl(pathname, flags, 0);
1584
}
1585
1586
/* Open a file. Use _wfopen() on Windows, encode the path to the locale
1587
   encoding and use fopen() otherwise.
1588
1589
   The file descriptor is created non-inheritable.
1590
1591
   If interrupted by a signal, fail with EINTR. */
1592
FILE *
1593
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1594
{
1595
    FILE *f;
1596
    if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
  Branch (1596:9): [True: 0, False: 1.35k]
1597
        return NULL;
1598
    }
1599
#ifndef MS_WINDOWS
1600
    char *cpath;
1601
    char cmode[10];
1602
    size_t r;
1603
    r = wcstombs(cmode, mode, 10);
1604
    if (r == DECODE_ERROR || r >= 10) {
  Branch (1604:9): [True: 0, False: 1.35k]
  Branch (1604:30): [True: 0, False: 1.35k]
1605
        errno = EINVAL;
1606
        return NULL;
1607
    }
1608
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1609
    if (cpath == NULL) {
  Branch (1609:9): [True: 0, False: 1.35k]
1610
        return NULL;
1611
    }
1612
    f = fopen(cpath, cmode);
1613
    PyMem_RawFree(cpath);
1614
#else
1615
    f = _wfopen(path, mode);
1616
#endif
1617
    if (f == NULL)
  Branch (1617:9): [True: 1.07k, False: 274]
1618
        return NULL;
1619
    if (make_non_inheritable(fileno(f)) < 0) {
  Branch (1619:9): [True: 0, False: 274]
1620
        fclose(f);
1621
        return NULL;
1622
    }
1623
    return f;
1624
}
1625
1626
1627
/* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1628
   encoding and call fopen() otherwise.
1629
1630
   Return the new file object on success. Raise an exception and return NULL
1631
   on error.
1632
1633
   The file descriptor is created non-inheritable.
1634
1635
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1636
   except if the Python signal handler raises an exception.
1637
1638
   Release the GIL to call _wfopen() or fopen(). The caller must hold
1639
   the GIL. */
1640
FILE*
1641
_Py_fopen_obj(PyObject *path, const char *mode)
1642
{
1643
    FILE *f;
1644
    int async_err = 0;
1645
#ifdef MS_WINDOWS
1646
    wchar_t wmode[10];
1647
    int usize;
1648
1649
    assert(PyGILState_Check());
1650
1651
    if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1652
        return NULL;
1653
    }
1654
    if (!PyUnicode_Check(path)) {
1655
        PyErr_Format(PyExc_TypeError,
1656
                     "str file path expected under Windows, got %R",
1657
                     Py_TYPE(path));
1658
        return NULL;
1659
    }
1660
1661
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1662
    if (wpath == NULL)
1663
        return NULL;
1664
1665
    usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1666
                                wmode, Py_ARRAY_LENGTH(wmode));
1667
    if (usize == 0) {
1668
        PyErr_SetFromWindowsErr(0);
1669
        PyMem_Free(wpath);
1670
        return NULL;
1671
    }
1672
1673
    do {
1674
        Py_BEGIN_ALLOW_THREADS
1675
        f = _wfopen(wpath, wmode);
1676
        Py_END_ALLOW_THREADS
1677
    } while (f == NULL
1678
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1679
    PyMem_Free(wpath);
1680
#else
1681
    PyObject *bytes;
1682
    const char *path_bytes;
1683
1684
    assert(PyGILState_Check());
1685
1686
    if (!PyUnicode_FSConverter(path, &bytes))
  Branch (1686:9): [True: 2, False: 1.05k]
1687
        return NULL;
1688
    path_bytes = PyBytes_AS_STRING(bytes);
1689
1690
    if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
  Branch (1690:9): [True: 0, False: 1.05k]
1691
        Py_DECREF(bytes);
1692
        return NULL;
1693
    }
1694
1695
    do {
1696
        Py_BEGIN_ALLOW_THREADS
1697
        f = fopen(path_bytes, mode);
1698
        Py_END_ALLOW_THREADS
1699
    } while (f == NULL
  Branch (1699:14): [True: 963, False: 89]
1700
             && errno
== EINTR963
&&
!(async_err = PyErr_CheckSignals())0
);
  Branch (1700:17): [True: 0, False: 963]
  Branch (1700:35): [True: 0, False: 0]
1701
1702
    Py_DECREF(bytes);
1703
#endif
1704
    if (async_err)
  Branch (1704:9): [True: 0, False: 1.05k]
1705
        return NULL;
1706
1707
    if (f == NULL) {
  Branch (1707:9): [True: 963, False: 89]
1708
        PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1709
        return NULL;
1710
    }
1711
1712
    if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
  Branch (1712:9): [True: 0, False: 89]
1713
        fclose(f);
1714
        return NULL;
1715
    }
1716
    return f;
1717
}
1718
1719
/* Read count bytes from fd into buf.
1720
1721
   On success, return the number of read bytes, it can be lower than count.
1722
   If the current file offset is at or past the end of file, no bytes are read,
1723
   and read() returns zero.
1724
1725
   On error, raise an exception, set errno and return -1.
1726
1727
   When interrupted by a signal (read() fails with EINTR), retry the syscall.
1728
   If the Python signal handler raises an exception, the function returns -1
1729
   (the syscall is not retried).
1730
1731
   Release the GIL to call read(). The caller must hold the GIL. */
1732
Py_ssize_t
1733
_Py_read(int fd, void *buf, size_t count)
1734
{
1735
    Py_ssize_t n;
1736
    int err;
1737
    int async_err = 0;
1738
1739
    assert(PyGILState_Check());
1740
1741
    /* _Py_read() must not be called with an exception set, otherwise the
1742
     * caller may think that read() was interrupted by a signal and the signal
1743
     * handler raised an exception. */
1744
    assert(!PyErr_Occurred());
1745
1746
    if (count > _PY_READ_MAX) {
  Branch (1746:9): [True: 0, False: 720k]
1747
        count = _PY_READ_MAX;
1748
    }
1749
1750
    _Py_BEGIN_SUPPRESS_IPH
1751
    do {
1752
        Py_BEGIN_ALLOW_THREADS
1753
        errno = 0;
1754
#ifdef MS_WINDOWS
1755
        n = read(fd, buf, (int)count);
1756
#else
1757
        n = read(fd, buf, count);
1758
#endif
1759
        /* save/restore errno because PyErr_CheckSignals()
1760
         * and PyErr_SetFromErrno() can modify it */
1761
        err = errno;
1762
        Py_END_ALLOW_THREADS
1763
    } while (n < 0 && 
err == EINTR46
&&
  Branch (1763:14): [True: 46, False: 720k]
  Branch (1763:23): [True: 8, False: 38]
1764
            
!(async_err = PyErr_CheckSignals())8
);
  Branch (1764:13): [True: 8, False: 0]
1765
    _Py_END_SUPPRESS_IPH
1766
1767
    if (async_err) {
  Branch (1767:9): [True: 0, False: 720k]
1768
        /* read() was interrupted by a signal (failed with EINTR)
1769
         * and the Python signal handler raised an exception */
1770
        errno = err;
1771
        assert(errno == EINTR && PyErr_Occurred());
1772
        return -1;
1773
    }
1774
    if (n < 0) {
  Branch (1774:9): [True: 38, False: 720k]
1775
        PyErr_SetFromErrno(PyExc_OSError);
1776
        errno = err;
1777
        return -1;
1778
    }
1779
1780
    return n;
1781
}
1782
1783
static Py_ssize_t
1784
_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1785
{
1786
    Py_ssize_t n;
1787
    int err;
1788
    int async_err = 0;
1789
1790
    _Py_BEGIN_SUPPRESS_IPH
1791
#ifdef MS_WINDOWS
1792
    if (count > 32767) {
1793
        /* Issue #11395: the Windows console returns an error (12: not
1794
           enough space error) on writing into stdout if stdout mode is
1795
           binary and the length is greater than 66,000 bytes (or less,
1796
           depending on heap usage). */
1797
        if (gil_held) {
1798
            Py_BEGIN_ALLOW_THREADS
1799
            if (isatty(fd)) {
1800
                count = 32767;
1801
            }
1802
            Py_END_ALLOW_THREADS
1803
        } else {
1804
            if (isatty(fd)) {
1805
                count = 32767;
1806
            }
1807
        }
1808
    }
1809
#endif
1810
    if (count > _PY_WRITE_MAX) {
  Branch (1810:9): [True: 0, False: 3.56M]
1811
        count = _PY_WRITE_MAX;
1812
    }
1813
1814
    if (gil_held) {
  Branch (1814:9): [True: 3.55M, False: 6.35k]
1815
        do {
1816
            Py_BEGIN_ALLOW_THREADS
1817
            errno = 0;
1818
#ifdef MS_WINDOWS
1819
            n = write(fd, buf, (int)count);
1820
#else
1821
            n = write(fd, buf, count);
1822
#endif
1823
            /* save/restore errno because PyErr_CheckSignals()
1824
             * and PyErr_SetFromErrno() can modify it */
1825
            err = errno;
1826
            Py_END_ALLOW_THREADS
1827
        } while (n < 0 && 
err == EINTR47
&&
  Branch (1827:18): [True: 47, False: 3.55M]
  Branch (1827:27): [True: 4, False: 43]
1828
                
!(async_err = PyErr_CheckSignals())4
);
  Branch (1828:17): [True: 4, False: 0]
1829
    }
1830
    else {
1831
        do {
1832
            errno = 0;
1833
#ifdef MS_WINDOWS
1834
            n = write(fd, buf, (int)count);
1835
#else
1836
            n = write(fd, buf, count);
1837
#endif
1838
            err = errno;
1839
        } while (n < 0 && 
err == EINTR0
);
  Branch (1839:18): [True: 0, False: 6.35k]
  Branch (1839:27): [True: 0, False: 0]
1840
    }
1841
    _Py_END_SUPPRESS_IPH
1842
1843
    if (async_err) {
  Branch (1843:9): [True: 0, False: 3.56M]
1844
        /* write() was interrupted by a signal (failed with EINTR)
1845
           and the Python signal handler raised an exception (if gil_held is
1846
           nonzero). */
1847
        errno = err;
1848
        assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1849
        return -1;
1850
    }
1851
    if (n < 0) {
  Branch (1851:9): [True: 43, False: 3.56M]
1852
        if (gil_held)
  Branch (1852:13): [True: 43, False: 0]
1853
            PyErr_SetFromErrno(PyExc_OSError);
1854
        errno = err;
1855
        return -1;
1856
    }
1857
1858
    return n;
1859
}
1860
1861
/* Write count bytes of buf into fd.
1862
1863
   On success, return the number of written bytes, it can be lower than count
1864
   including 0. On error, raise an exception, set errno and return -1.
1865
1866
   When interrupted by a signal (write() fails with EINTR), retry the syscall.
1867
   If the Python signal handler raises an exception, the function returns -1
1868
   (the syscall is not retried).
1869
1870
   Release the GIL to call write(). The caller must hold the GIL. */
1871
Py_ssize_t
1872
_Py_write(int fd, const void *buf, size_t count)
1873
{
1874
    assert(PyGILState_Check());
1875
1876
    /* _Py_write() must not be called with an exception set, otherwise the
1877
     * caller may think that write() was interrupted by a signal and the signal
1878
     * handler raised an exception. */
1879
    assert(!PyErr_Occurred());
1880
1881
    return _Py_write_impl(fd, buf, count, 1);
1882
}
1883
1884
/* Write count bytes of buf into fd.
1885
 *
1886
 * On success, return the number of written bytes, it can be lower than count
1887
 * including 0. On error, set errno and return -1.
1888
 *
1889
 * When interrupted by a signal (write() fails with EINTR), retry the syscall
1890
 * without calling the Python signal handler. */
1891
Py_ssize_t
1892
_Py_write_noraise(int fd, const void *buf, size_t count)
1893
{
1894
    return _Py_write_impl(fd, buf, count, 0);
1895
}
1896
1897
#ifdef HAVE_READLINK
1898
1899
/* Read value of symbolic link. Encode the path to the locale encoding, decode
1900
   the result from the locale encoding.
1901
1902
   Return -1 on encoding error, on readlink() error, if the internal buffer is
1903
   too short, on decoding error, or if 'buf' is too short. */
1904
int
1905
_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1906
{
1907
    char *cpath;
1908
    char cbuf[MAXPATHLEN];
1909
    size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1910
    wchar_t *wbuf;
1911
    Py_ssize_t res;
1912
    size_t r1;
1913
1914
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1915
    if (cpath == NULL) {
  Branch (1915:9): [True: 0, False: 283]
1916
        errno = EINVAL;
1917
        return -1;
1918
    }
1919
    res = readlink(cpath, cbuf, cbuf_len);
1920
    PyMem_RawFree(cpath);
1921
    if (res == -1) {
  Branch (1921:9): [True: 283, False: 0]
1922
        return -1;
1923
    }
1924
    if ((size_t)res == cbuf_len) {
  Branch (1924:9): [True: 0, False: 0]
1925
        errno = EINVAL;
1926
        return -1;
1927
    }
1928
    cbuf[res] = '\0'; /* buf will be null terminated */
1929
    wbuf = Py_DecodeLocale(cbuf, &r1);
1930
    if (wbuf == NULL) {
  Branch (1930:9): [True: 0, False: 0]
1931
        errno = EINVAL;
1932
        return -1;
1933
    }
1934
    /* wbuf must have space to store the trailing NUL character */
1935
    if (buflen <= r1) {
  Branch (1935:9): [True: 0, False: 0]
1936
        PyMem_RawFree(wbuf);
1937
        errno = EINVAL;
1938
        return -1;
1939
    }
1940
    wcsncpy(buf, wbuf, buflen);
1941
    PyMem_RawFree(wbuf);
1942
    return (int)r1;
1943
}
1944
#endif
1945
1946
#ifdef HAVE_REALPATH
1947
1948
/* Return the canonicalized absolute pathname. Encode path to the locale
1949
   encoding, decode the result from the locale encoding.
1950
1951
   Return NULL on encoding error, realpath() error, decoding error
1952
   or if 'resolved_path' is too short. */
1953
wchar_t*
1954
_Py_wrealpath(const wchar_t *path,
1955
              wchar_t *resolved_path, size_t resolved_path_len)
1956
{
1957
    char *cpath;
1958
    char cresolved_path[MAXPATHLEN];
1959
    wchar_t *wresolved_path;
1960
    char *res;
1961
    size_t r;
1962
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1963
    if (cpath == NULL) {
  Branch (1963:9): [True: 0, False: 4]
1964
        errno = EINVAL;
1965
        return NULL;
1966
    }
1967
    res = realpath(cpath, cresolved_path);
1968
    PyMem_RawFree(cpath);
1969
    if (res == NULL)
  Branch (1969:9): [True: 4, False: 0]
1970
        return NULL;
1971
1972
    wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1973
    if (wresolved_path == NULL) {
  Branch (1973:9): [True: 0, False: 0]
1974
        errno = EINVAL;
1975
        return NULL;
1976
    }
1977
    /* wresolved_path must have space to store the trailing NUL character */
1978
    if (resolved_path_len <= r) {
  Branch (1978:9): [True: 0, False: 0]
1979
        PyMem_RawFree(wresolved_path);
1980
        errno = EINVAL;
1981
        return NULL;
1982
    }
1983
    wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1984
    PyMem_RawFree(wresolved_path);
1985
    return resolved_path;
1986
}
1987
#endif
1988
1989
1990
int
1991
_Py_isabs(const wchar_t *path)
1992
{
1993
#ifdef MS_WINDOWS
1994
    const wchar_t *tail;
1995
    HRESULT hr = PathCchSkipRoot(path, &tail);
1996
    if (FAILED(hr) || path == tail) {
1997
        return 0;
1998
    }
1999
    if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2000
        // Exclude paths with leading SEP
2001
        return 0;
2002
    }
2003
    if (tail == &path[2] && path[1] == L':') {
2004
        // Exclude drive-relative paths (e.g. C:filename.ext)
2005
        return 0;
2006
    }
2007
    return 1;
2008
#else
2009
    return (path[0] == SEP);
2010
#endif
2011
}
2012
2013
2014
/* Get an absolute path.
2015
   On error (ex: fail to get the current directory), return -1.
2016
   On memory allocation failure, set *abspath_p to NULL and return 0.
2017
   On success, return a newly allocated to *abspath_p to and return 0.
2018
   The string must be freed by PyMem_RawFree(). */
2019
int
2020
_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2021
{
2022
    if (path[0] == '\0' || !wcscmp(path, L".")) {
  Branch (2022:9): [True: 0, False: 106]
  Branch (2022:28): [True: 5, False: 101]
2023
        wchar_t cwd[MAXPATHLEN + 1];
2024
        cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2025
        if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
  Branch (2025:13): [True: 0, False: 5]
2026
            /* unable to get the current directory */
2027
            return -1;
2028
        }
2029
        *abspath_p = _PyMem_RawWcsdup(cwd);
2030
        return 0;
2031
    }
2032
2033
    if (_Py_isabs(path)) {
  Branch (2033:9): [True: 30, False: 71]
2034
        *abspath_p = _PyMem_RawWcsdup(path);
2035
        return 0;
2036
    }
2037
2038
#ifdef MS_WINDOWS
2039
    return _PyOS_getfullpathname(path, abspath_p);
2040
#else
2041
    wchar_t cwd[MAXPATHLEN + 1];
2042
    cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2043
    if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
  Branch (2043:9): [True: 0, False: 71]
2044
        /* unable to get the current directory */
2045
        return -1;
2046
    }
2047
2048
    size_t cwd_len = wcslen(cwd);
2049
    size_t path_len = wcslen(path);
2050
    size_t len = cwd_len + 1 + path_len + 1;
2051
    if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
  Branch (2051:9): [True: 71, False: 0]
2052
        *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2053
    }
2054
    else {
2055
        *abspath_p = NULL;
2056
    }
2057
    if (*abspath_p == NULL) {
  Branch (2057:9): [True: 0, False: 71]
2058
        return 0;
2059
    }
2060
2061
    wchar_t *abspath = *abspath_p;
2062
    memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2063
    abspath += cwd_len;
2064
2065
    *abspath = (wchar_t)SEP;
2066
    abspath++;
2067
2068
    memcpy(abspath, path, path_len * sizeof(wchar_t));
2069
    abspath += path_len;
2070
2071
    *abspath = 0;
2072
    return 0;
2073
#endif
2074
}
2075
2076
2077
// The caller must ensure "buffer" is big enough.
2078
static int
2079
join_relfile(wchar_t *buffer, size_t bufsize,
2080
             const wchar_t *dirname, const wchar_t *relfile)
2081
{
2082
#ifdef MS_WINDOWS
2083
    if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2084
        PATHCCH_ALLOW_LONG_PATHS))) {
2085
        return -1;
2086
    }
2087
#else
2088
    assert(!_Py_isabs(relfile));
2089
    size_t dirlen = wcslen(dirname);
2090
    size_t rellen = wcslen(relfile);
2091
    size_t maxlen = bufsize - 1;
2092
    if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
  Branch (2092:9): [True: 0, False: 1.94k]
  Branch (2092:32): [True: 0, False: 1.94k]
  Branch (2092:52): [True: 0, False: 1.94k]
2093
        return -1;
2094
    }
2095
    if (dirlen == 0) {
  Branch (2095:9): [True: 0, False: 1.94k]
2096
        // We do not add a leading separator.
2097
        wcscpy(buffer, relfile);
2098
    }
2099
    else {
2100
        if (dirname != buffer) {
  Branch (2100:13): [True: 0, False: 1.94k]
2101
            wcscpy(buffer, dirname);
2102
        }
2103
        size_t relstart = dirlen;
2104
        if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
  Branch (2104:13): [True: 1.94k, False: 0]
  Branch (2104:27): [True: 1.93k, False: 15]
2105
            buffer[dirlen] = SEP;
2106
            relstart += 1;
2107
        }
2108
        wcscpy(&buffer[relstart], relfile);
2109
    }
2110
#endif
2111
    return 0;
2112
}
2113
2114
/* Join the two paths together, like os.path.join().  Return NULL
2115
   if memory could not be allocated.  The caller is responsible
2116
   for calling PyMem_RawFree() on the result. */
2117
wchar_t *
2118
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2119
{
2120
    assert(dirname != NULL && relfile != NULL);
2121
#ifndef MS_WINDOWS
2122
    assert(!_Py_isabs(relfile));
2123
#endif
2124
    size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2125
    size_t bufsize = maxlen + 1;
2126
    wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2127
    if (filename == NULL) {
  Branch (2127:9): [True: 0, False: 0]
2128
        return NULL;
2129
    }
2130
    assert(wcslen(dirname) < MAXPATHLEN);
2131
    assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2132
    join_relfile(filename, bufsize, dirname, relfile);
2133
    return filename;
2134
}
2135
2136
/* Join the two paths together, like os.path.join().
2137
     dirname: the target buffer with the dirname already in place,
2138
              including trailing NUL
2139
     relfile: this must be a relative path
2140
     bufsize: total allocated size of the buffer
2141
   Return -1 if anything is wrong with the path lengths. */
2142
int
2143
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2144
{
2145
    assert(dirname != NULL && relfile != NULL);
2146
    assert(bufsize > 0);
2147
    return join_relfile(dirname, bufsize, dirname, relfile);
2148
}
2149
2150
2151
size_t
2152
_Py_find_basename(const wchar_t *filename)
2153
{
2154
    for (size_t i = wcslen(filename); i > 0; --i) {
  Branch (2154:39): [True: 0, False: 0]
2155
        if (filename[i] == SEP) {
  Branch (2155:13): [True: 0, False: 0]
2156
            return i + 1;
2157
        }
2158
    }
2159
    return 0;
2160
}
2161
2162
/* In-place path normalisation. Returns the start of the normalized
2163
   path, which will be within the original buffer. Guaranteed to not
2164
   make the path longer, and will not fail. 'size' is the length of
2165
   the path, if known. If -1, the first null character will be assumed
2166
   to be the end of the path. */
2167
wchar_t *
2168
_Py_normpath(wchar_t *path, Py_ssize_t size)
2169
{
2170
    if (!path[0] || 
size == 027.6k
) {
  Branch (2170:9): [True: 43, False: 27.6k]
  Branch (2170:21): [True: 0, False: 27.6k]
2171
        return path;
2172
    }
2173
    wchar_t *pEnd = size >= 0 ? 
&path[size]25.3k
: NULL;
  Branch (2173:21): [True: 25.3k, False: 2.32k]
2174
    wchar_t *p1 = path;     // sequentially scanned address in the path
2175
    wchar_t *p2 = path;     // destination of a scanned character to be ljusted
2176
    wchar_t *minP2 = path;  // the beginning of the destination range
2177
    wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
2178
2179
#define IS_END(x) (pEnd ? 
(x) == pEnd1.27M
:
!*(x)97.0k
)
2180
#ifdef ALTSEP
2181
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2182
#else
2183
#define IS_SEP(x) (*(x) == 
SEP53.5k
)
2184
#endif
2185
#define SEP_OR_END(x) 
(2.48k
IS_SEP2.48k
(x) ||
IS_END1.91k
(x))
2186
2187
    // Skip leading '.\'
2188
    if (p1[0] == L'.' && 
IS_SEP189
(&p1[1])) {
  Branch (2188:9): [True: 189, False: 27.4k]
2189
        path = &path[2];
2190
        while (IS_SEP(path) && 
!2
IS_END2
(path)) {
  Branch (2190:32): [True: 2, False: 0]
2191
            path++;
2192
        }
2193
        p1 = p2 = minP2 = path;
2194
        lastC = SEP;
2195
    }
2196
#ifdef MS_WINDOWS
2197
    // Skip past drive segment and update minP2
2198
    else if (p1[0] && p1[1] == L':') {
2199
        *p2++ = *p1++;
2200
        *p2++ = *p1++;
2201
        minP2 = p2;
2202
        lastC = L':';
2203
    }
2204
    // Skip past all \\-prefixed paths, including \\?\, \\.\,
2205
    // and network paths, including the first segment.
2206
    else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
2207
        int sepCount = 2;
2208
        *p2++ = SEP;
2209
        *p2++ = SEP;
2210
        p1 += 2;
2211
        for (; !IS_END(p1) && sepCount; ++p1) {
2212
            if (IS_SEP(p1)) {
2213
                --sepCount;
2214
                *p2++ = lastC = SEP;
2215
            } else {
2216
                *p2++ = lastC = *p1;
2217
            }
2218
        }
2219
        if (sepCount) {
2220
            minP2 = p2;      // Invalid path
2221
        } else {
2222
            minP2 = p2 - 1;  // Absolute path has SEP at minP2
2223
        }
2224
    }
2225
#else
2226
    // Skip past two leading SEPs
2227
    else if (IS_SEP(&p1[0]) && 
IS_SEP23.1k
(&p1[1]) &&
!55
IS_SEP55
(&p1[2])) {
  Branch (2227:50): [True: 30, False: 25]
2228
        *p2++ = *p1++;
2229
        *p2++ = *p1++;
2230
        minP2 = p2 - 1;  // Absolute path has SEP at minP2
2231
        lastC = SEP;
2232
    }
2233
#endif /* MS_WINDOWS */
2234
2235
    /* if pEnd is specified, check that. Else, check for null terminator */
2236
    for (; !IS_END(p1); 
++p11.34M
) {
  Branch (2236:12): [True: 1.34M, False: 27.6k]
2237
        wchar_t c = *p1;
2238
#ifdef ALTSEP
2239
        if (c == ALTSEP) {
2240
            c = SEP;
2241
        }
2242
#endif
2243
        if (lastC == SEP) {
  Branch (2243:13): [True: 167k, False: 1.17M]
2244
            if (c == L'.') {
  Branch (2244:17): [True: 1.41k, False: 165k]
2245
                int sep_at_1 = SEP_OR_END(&p1[1]);
2246
                int sep_at_2 = !sep_at_1 && 
SEP_OR_END1.06k
(&p1[2]);
  Branch (2246:32): [True: 1.06k, False: 352]
2247
                if (sep_at_2 && 
p1[1] == L'.'432
) {
  Branch (2247:21): [True: 432, False: 986]
  Branch (2247:33): [True: 419, False: 13]
2248
                    wchar_t *p3 = p2;
2249
                    while (p3 != minP2 && 
*--p3 == 555
SEP555
)
{ }410
  Branch (2249:28): [True: 555, False: 274]
  Branch (2249:43): [True: 410, False: 145]
2250
                    while (p3 != minP2 && 
*(p3 - 1) != 719
SEP719
)
{ --p3; }612
  Branch (2250:28): [True: 719, False: 312]
  Branch (2250:43): [True: 612, False: 107]
2251
                    if (p2 == minP2
  Branch (2251:25): [True: 9, False: 410]
2252
                        || 
(410
p3[0] == L'.'410
&&
p3[1] == L'.'10
&&
IS_SEP10
(&p3[2])))
  Branch (2252:29): [True: 10, False: 400]
  Branch (2252:46): [True: 10, False: 0]
2253
                    {
2254
                        // Previous segment is also ../, so append instead.
2255
                        // Relative path does not absorb ../ at minP2 as well.
2256
                        *p2++ = L'.';
2257
                        *p2++ = L'.';
2258
                        lastC = L'.';
2259
                    } else if (p3[0] == SEP) {
  Branch (2259:32): [True: 265, False: 135]
2260
                        // Absolute path, so absorb segment
2261
                        p2 = p3 + 1;
2262
                    } else {
2263
                        p2 = p3;
2264
                    }
2265
                    p1 += 1;
2266
                } else if (sep_at_1) {
  Branch (2266:28): [True: 352, False: 647]
2267
                } else {
2268
                    *p2++ = lastC = c;
2269
                }
2270
            } else if (c == SEP) {
  Branch (2270:24): [True: 662, False: 165k]
2271
            } else {
2272
                *p2++ = lastC = c;
2273
            }
2274
        } else {
2275
            *p2++ = lastC = c;
2276
        }
2277
    }
2278
    *p2 = L'\0';
2279
    if (p2 != minP2) {
  Branch (2279:9): [True: 27.6k, False: 2]
2280
        while (--p2 != minP2 && 
*p2 == 28.0k
SEP28.0k
) {
  Branch (2280:16): [True: 28.0k, False: 144]
  Branch (2280:33): [True: 494, False: 27.5k]
2281
            *p2 = L'\0';
2282
        }
2283
    }
2284
#undef SEP_OR_END
2285
#undef IS_SEP
2286
#undef IS_END
2287
    return path;
2288
}
2289
2290
2291
/* Get the current directory. buflen is the buffer size in wide characters
2292
   including the null character. Decode the path from the locale encoding.
2293
2294
   Return NULL on getcwd() error, on decoding error, or if 'buf' is
2295
   too short. */
2296
wchar_t*
2297
_Py_wgetcwd(wchar_t *buf, size_t buflen)
2298
{
2299
#ifdef MS_WINDOWS
2300
    int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2301
    return _wgetcwd(buf, ibuflen);
2302
#else
2303
    char fname[MAXPATHLEN];
2304
    wchar_t *wname;
2305
    size_t len;
2306
2307
    if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
  Branch (2307:9): [True: 0, False: 77]
2308
        return NULL;
2309
    wname = Py_DecodeLocale(fname, &len);
2310
    if (wname == NULL)
  Branch (2310:9): [True: 0, False: 77]
2311
        return NULL;
2312
    /* wname must have space to store the trailing NUL character */
2313
    if (buflen <= len) {
  Branch (2313:9): [True: 0, False: 77]
2314
        PyMem_RawFree(wname);
2315
        return NULL;
2316
    }
2317
    wcsncpy(buf, wname, buflen);
2318
    PyMem_RawFree(wname);
2319
    return buf;
2320
#endif
2321
}
2322
2323
/* Duplicate a file descriptor. The new file descriptor is created as
2324
   non-inheritable. Return a new file descriptor on success, raise an OSError
2325
   exception and return -1 on error.
2326
2327
   The GIL is released to call dup(). The caller must hold the GIL. */
2328
int
2329
_Py_dup(int fd)
2330
{
2331
#ifdef MS_WINDOWS
2332
    HANDLE handle;
2333
#endif
2334
2335
    assert(PyGILState_Check());
2336
2337
#ifdef MS_WINDOWS
2338
    handle = _Py_get_osfhandle(fd);
2339
    if (handle == INVALID_HANDLE_VALUE)
2340
        return -1;
2341
2342
    Py_BEGIN_ALLOW_THREADS
2343
    _Py_BEGIN_SUPPRESS_IPH
2344
    fd = dup(fd);
2345
    _Py_END_SUPPRESS_IPH
2346
    Py_END_ALLOW_THREADS
2347
    if (fd < 0) {
2348
        PyErr_SetFromErrno(PyExc_OSError);
2349
        return -1;
2350
    }
2351
2352
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2353
        _Py_BEGIN_SUPPRESS_IPH
2354
        close(fd);
2355
        _Py_END_SUPPRESS_IPH
2356
        return -1;
2357
    }
2358
#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2359
    Py_BEGIN_ALLOW_THREADS
2360
    _Py_BEGIN_SUPPRESS_IPH
2361
    fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2362
    _Py_END_SUPPRESS_IPH
2363
    Py_END_ALLOW_THREADS
2364
    if (fd < 0) {
  Branch (2364:9): [True: 2, False: 13.2k]
2365
        PyErr_SetFromErrno(PyExc_OSError);
2366
        return -1;
2367
    }
2368
2369
#else
2370
    Py_BEGIN_ALLOW_THREADS
2371
    _Py_BEGIN_SUPPRESS_IPH
2372
    fd = dup(fd);
2373
    _Py_END_SUPPRESS_IPH
2374
    Py_END_ALLOW_THREADS
2375
    if (fd < 0) {
2376
        PyErr_SetFromErrno(PyExc_OSError);
2377
        return -1;
2378
    }
2379
2380
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2381
        _Py_BEGIN_SUPPRESS_IPH
2382
        close(fd);
2383
        _Py_END_SUPPRESS_IPH
2384
        return -1;
2385
    }
2386
#endif
2387
    return fd;
2388
}
2389
2390
#ifndef MS_WINDOWS
2391
/* Get the blocking mode of the file descriptor.
2392
   Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2393
   raise an exception and return -1 on error. */
2394
int
2395
_Py_get_blocking(int fd)
2396
{
2397
    int flags;
2398
    _Py_BEGIN_SUPPRESS_IPH
2399
    flags = fcntl(fd, F_GETFL, 0);
2400
    _Py_END_SUPPRESS_IPH
2401
    if (flags < 0) {
  Branch (2401:9): [True: 1, False: 297]
2402
        PyErr_SetFromErrno(PyExc_OSError);
2403
        return -1;
2404
    }
2405
2406
    return !(flags & O_NONBLOCK);
2407
}
2408
2409
/* Set the blocking mode of the specified file descriptor.
2410
2411
   Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2412
   otherwise.
2413
2414
   Return 0 on success, raise an exception and return -1 on error. */
2415
int
2416
_Py_set_blocking(int fd, int blocking)
2417
{
2418
/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2419
   Use fcntl() instead. */
2420
#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2421
    int arg = !blocking;
2422
    if (ioctl(fd, FIONBIO, &arg) < 0)
  Branch (2422:9): [True: 1, False: 251]
2423
        goto error;
2424
#else
2425
    int flags, res;
2426
2427
    _Py_BEGIN_SUPPRESS_IPH
2428
    flags = fcntl(fd, F_GETFL, 0);
2429
    if (flags >= 0) {
2430
        if (blocking)
2431
            flags = flags & (~O_NONBLOCK);
2432
        else
2433
            flags = flags | O_NONBLOCK;
2434
2435
        res = fcntl(fd, F_SETFL, flags);
2436
    } else {
2437
        res = -1;
2438
    }
2439
    _Py_END_SUPPRESS_IPH
2440
2441
    if (res < 0)
2442
        goto error;
2443
#endif
2444
    return 0;
2445
2446
error:
2447
    PyErr_SetFromErrno(PyExc_OSError);
2448
    return -1;
2449
}
2450
#else   /* MS_WINDOWS */
2451
void*
2452
_Py_get_osfhandle_noraise(int fd)
2453
{
2454
    void *handle;
2455
    _Py_BEGIN_SUPPRESS_IPH
2456
    handle = (void*)_get_osfhandle(fd);
2457
    _Py_END_SUPPRESS_IPH
2458
    return handle;
2459
}
2460
2461
void*
2462
_Py_get_osfhandle(int fd)
2463
{
2464
    void *handle = _Py_get_osfhandle_noraise(fd);
2465
    if (handle == INVALID_HANDLE_VALUE)
2466
        PyErr_SetFromErrno(PyExc_OSError);
2467
2468
    return handle;
2469
}
2470
2471
int
2472
_Py_open_osfhandle_noraise(void *handle, int flags)
2473
{
2474
    int fd;
2475
    _Py_BEGIN_SUPPRESS_IPH
2476
    fd = _open_osfhandle((intptr_t)handle, flags);
2477
    _Py_END_SUPPRESS_IPH
2478
    return fd;
2479
}
2480
2481
int
2482
_Py_open_osfhandle(void *handle, int flags)
2483
{
2484
    int fd = _Py_open_osfhandle_noraise(handle, flags);
2485
    if (fd == -1)
2486
        PyErr_SetFromErrno(PyExc_OSError);
2487
2488
    return fd;
2489
}
2490
#endif  /* MS_WINDOWS */
2491
2492
int
2493
_Py_GetLocaleconvNumeric(struct lconv *lc,
2494
                         PyObject **decimal_point, PyObject **thousands_sep)
2495
{
2496
    assert(decimal_point != NULL);
2497
    assert(thousands_sep != NULL);
2498
2499
#ifndef MS_WINDOWS
2500
    int change_locale = 0;
2501
    if ((strlen(lc->decimal_point) > 1 || 
((unsigned char)lc->decimal_point[0]) > 127745
)) {
  Branch (2501:10): [True: 10, False: 745]
  Branch (2501:43): [True: 0, False: 745]
2502
        change_locale = 1;
2503
    }
2504
    if ((strlen(lc->thousands_sep) > 1 || 
((unsigned char)lc->thousands_sep[0]) > 127740
)) {
  Branch (2504:10): [True: 15, False: 740]
  Branch (2504:43): [True: 111, False: 629]
2505
        change_locale = 1;
2506
    }
2507
2508
    /* Keep a copy of the LC_CTYPE locale */
2509
    char *oldloc = NULL, *loc = NULL;
2510
    if (change_locale) {
  Branch (2510:9): [True: 126, False: 629]
2511
        oldloc = setlocale(LC_CTYPE, NULL);
2512
        if (!oldloc) {
  Branch (2512:13): [True: 0, False: 126]
2513
            PyErr_SetString(PyExc_RuntimeWarning,
2514
                            "failed to get LC_CTYPE locale");
2515
            return -1;
2516
        }
2517
2518
        oldloc = _PyMem_Strdup(oldloc);
2519
        if (!oldloc) {
  Branch (2519:13): [True: 0, False: 126]
2520
            PyErr_NoMemory();
2521
            return -1;
2522
        }
2523
2524
        loc = setlocale(LC_NUMERIC, NULL);
2525
        if (loc != NULL && strcmp(loc, oldloc) == 0) {
  Branch (2525:13): [True: 126, False: 0]
  Branch (2525:28): [True: 125, False: 1]
2526
            loc = NULL;
2527
        }
2528
2529
        if (loc != NULL) {
  Branch (2529:13): [True: 1, False: 125]
2530
            /* Only set the locale temporarily the LC_CTYPE locale
2531
               if LC_NUMERIC locale is different than LC_CTYPE locale and
2532
               decimal_point and/or thousands_sep are non-ASCII or longer than
2533
               1 byte */
2534
            setlocale(LC_CTYPE, loc);
2535
        }
2536
    }
2537
2538
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2539
#else /* MS_WINDOWS */
2540
/* Use _W_* fields of Windows strcut lconv */
2541
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2542
#endif /* MS_WINDOWS */
2543
2544
    int res = -1;
2545
2546
    *decimal_point = GET_LOCALE_STRING(decimal_point);
2547
    if (*decimal_point == NULL) {
  Branch (2547:9): [True: 0, False: 755]
2548
        goto done;
2549
    }
2550
2551
    *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2552
    if (*thousands_sep == NULL) {
  Branch (2552:9): [True: 0, False: 755]
2553
        goto done;
2554
    }
2555
2556
    res = 0;
2557
2558
done:
2559
#ifndef MS_WINDOWS
2560
    if (loc != NULL) {
  Branch (2560:9): [True: 1, False: 754]
2561
        setlocale(LC_CTYPE, oldloc);
2562
    }
2563
    PyMem_Free(oldloc);
2564
#endif
2565
    return res;
2566
2567
#undef GET_LOCALE_STRING
2568
}
2569
2570
/* Our selection logic for which function to use is as follows:
2571
 * 1. If close_range(2) is available, always prefer that; it's better for
2572
 *    contiguous ranges like this than fdwalk(3) which entails iterating over
2573
 *    the entire fd space and simply doing nothing for those outside the range.
2574
 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2575
 *    closing up to sysconf(_SC_OPEN_MAX).
2576
 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2577
 *    as that will be more performant if the range happens to have any chunk of
2578
 *    non-opened fd in the middle.
2579
 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2580
 */
2581
#ifdef __FreeBSD__
2582
#  define USE_CLOSEFROM
2583
#endif /* __FreeBSD__ */
2584
2585
#ifdef HAVE_FDWALK
2586
#  define USE_FDWALK
2587
#endif /* HAVE_FDWALK */
2588
2589
#ifdef USE_FDWALK
2590
static int
2591
_fdwalk_close_func(void *lohi, int fd)
2592
{
2593
    int lo = ((int *)lohi)[0];
2594
    int hi = ((int *)lohi)[1];
2595
2596
    if (fd >= hi) {
2597
        return 1;
2598
    }
2599
    else if (fd >= lo) {
2600
        /* Ignore errors */
2601
        (void)close(fd);
2602
    }
2603
    return 0;
2604
}
2605
#endif /* USE_FDWALK */
2606
2607
/* Closes all file descriptors in [first, last], ignoring errors. */
2608
void
2609
_Py_closerange(int first, int last)
2610
{
2611
    first = Py_MAX(first, 0);
2612
    _Py_BEGIN_SUPPRESS_IPH
2613
#ifdef HAVE_CLOSE_RANGE
2614
    if (close_range(first, last, 0) == 0) {
2615
        /* close_range() ignores errors when it closes file descriptors.
2616
         * Possible reasons of an error return are lack of kernel support
2617
         * or denial of the underlying syscall by a seccomp sandbox on Linux.
2618
         * Fallback to other methods in case of any error. */
2619
    }
2620
    else
2621
#endif /* HAVE_CLOSE_RANGE */
2622
#ifdef USE_CLOSEFROM
2623
    if (last >= sysconf(_SC_OPEN_MAX)) {
2624
        /* Any errors encountered while closing file descriptors are ignored */
2625
        closefrom(first);
2626
    }
2627
    else
2628
#endif /* USE_CLOSEFROM */
2629
#ifdef USE_FDWALK
2630
    {
2631
        int lohi[2];
2632
        lohi[0] = first;
2633
        lohi[1] = last + 1;
2634
        fdwalk(_fdwalk_close_func, lohi);
2635
    }
2636
#else
2637
    {
2638
        for (int i = first; i <= last; i++) {
  Branch (2638:29): [True: 0, False: 0]
2639
            /* Ignore errors */
2640
            (void)close(i);
2641
        }
2642
    }
2643
#endif /* USE_FDWALK */
2644
    _Py_END_SUPPRESS_IPH
2645
}