LCOV - code coverage report
Current view: top level - Python - fileutils.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 543 744 73.0 %
Date: 2022-07-07 18:19:46 Functions: 49 55 89.1 %

          Line data    Source code
       1             : #include "Python.h"
       2             : #include "pycore_fileutils.h"     // fileutils definitions
       3             : #include "pycore_runtime.h"       // _PyRuntime
       4             : #include "osdefs.h"               // SEP
       5             : #include <locale.h>
       6             : #include <stdlib.h>               // mbstowcs()
       7             : 
       8             : #ifdef MS_WINDOWS
       9             : #  include <malloc.h>
      10             : #  include <windows.h>
      11             : #  include <pathcch.h>            // PathCchCombineEx
      12             : extern int winerror_to_errno(int);
      13             : #endif
      14             : 
      15             : #ifdef HAVE_LANGINFO_H
      16             : #include <langinfo.h>
      17             : #endif
      18             : 
      19             : #ifdef HAVE_SYS_IOCTL_H
      20             : #include <sys/ioctl.h>
      21             : #endif
      22             : 
      23             : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
      24             : #include <iconv.h>
      25             : #endif
      26             : 
      27             : #ifdef HAVE_FCNTL_H
      28             : #include <fcntl.h>
      29             : #endif /* HAVE_FCNTL_H */
      30             : 
      31             : #ifdef O_CLOEXEC
      32             : /* Does open() support the O_CLOEXEC flag? Possible values:
      33             : 
      34             :    -1: unknown
      35             :     0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
      36             :     1: open() supports O_CLOEXEC flag, close-on-exec is set
      37             : 
      38             :    The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
      39             :    and os.open(). */
      40             : int _Py_open_cloexec_works = -1;
      41             : #endif
      42             : 
      43             : // The value must be the same in unicodeobject.c.
      44             : #define MAX_UNICODE 0x10ffff
      45             : 
      46             : // mbstowcs() and mbrtowc() errors
      47             : static const size_t DECODE_ERROR = ((size_t)-1);
      48             : static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
      49             : 
      50             : 
      51             : static int
      52     2572250 : get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
      53             : {
      54     2572250 :     switch (errors)
      55             :     {
      56       18525 :     case _Py_ERROR_STRICT:
      57       18525 :         *surrogateescape = 0;
      58       18525 :         return 0;
      59     2553720 :     case _Py_ERROR_SURROGATEESCAPE:
      60     2553720 :         *surrogateescape = 1;
      61     2553720 :         return 0;
      62           4 :     default:
      63           4 :         return -1;
      64             :     }
      65             : }
      66             : 
      67             : 
      68             : PyObject *
      69           1 : _Py_device_encoding(int fd)
      70             : {
      71             :     int valid;
      72           1 :     Py_BEGIN_ALLOW_THREADS
      73             :     _Py_BEGIN_SUPPRESS_IPH
      74           1 :     valid = isatty(fd);
      75             :     _Py_END_SUPPRESS_IPH
      76           1 :     Py_END_ALLOW_THREADS
      77           1 :     if (!valid)
      78           1 :         Py_RETURN_NONE;
      79             : 
      80             : #if defined(MS_WINDOWS)
      81             :     UINT cp;
      82             :     if (fd == 0)
      83             :         cp = GetConsoleCP();
      84             :     else if (fd == 1 || fd == 2)
      85             :         cp = GetConsoleOutputCP();
      86             :     else
      87             :         cp = 0;
      88             :     /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
      89             :        has no console */
      90             :     if (cp == 0) {
      91             :         Py_RETURN_NONE;
      92             :     }
      93             : 
      94             :     return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
      95             : #else
      96           0 :     if (_PyRuntime.preconfig.utf8_mode) {
      97             :         _Py_DECLARE_STR(utf_8, "utf-8");
      98           0 :         return Py_NewRef(&_Py_STR(utf_8));
      99             :     }
     100           0 :     return _Py_GetLocaleEncodingObject();
     101             : #endif
     102             : }
     103             : 
     104             : 
     105             : static size_t
     106    41008300 : is_valid_wide_char(wchar_t ch)
     107             : {
     108             : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
     109             :     /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
     110             :        for non-Unicode locales, which makes values higher than MAX_UNICODE
     111             :        possibly valid. */
     112             :     return 1;
     113             : #endif
     114    41008300 :     if (Py_UNICODE_IS_SURROGATE(ch)) {
     115             :         // Reject lone surrogate characters
     116           0 :         return 0;
     117             :     }
     118    41008300 :     if (ch > MAX_UNICODE) {
     119             :         // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
     120             :         // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
     121             :         // it creates characters outside the [U+0000; U+10ffff] range:
     122             :         // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
     123           3 :         return 0;
     124             :     }
     125    41008300 :     return 1;
     126             : }
     127             : 
     128             : 
     129             : static size_t
     130     4941290 : _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
     131             : {
     132     4941290 :     size_t count = mbstowcs(dest, src, n);
     133     4941290 :     if (dest != NULL && count != DECODE_ERROR) {
     134    43467200 :         for (size_t i=0; i < count; i++) {
     135    41007900 :             wchar_t ch = dest[i];
     136    41007900 :             if (!is_valid_wide_char(ch)) {
     137           0 :                 return DECODE_ERROR;
     138             :             }
     139             :         }
     140             :     }
     141     4941290 :     return count;
     142             : }
     143             : 
     144             : 
     145             : #ifdef HAVE_MBRTOWC
     146             : static size_t
     147         486 : _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
     148             : {
     149         486 :     assert(pwc != NULL);
     150         486 :     size_t count = mbrtowc(pwc, str, len, pmbs);
     151         486 :     if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
     152         362 :         if (!is_valid_wide_char(*pwc)) {
     153           3 :             return DECODE_ERROR;
     154             :         }
     155             :     }
     156         483 :     return count;
     157             : }
     158             : #endif
     159             : 
     160             : 
     161             : #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
     162             : 
     163             : #define USE_FORCE_ASCII
     164             : 
     165             : extern int _Py_normalize_encoding(const char *, char *, size_t);
     166             : 
     167             : /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
     168             :    and POSIX locale. nl_langinfo(CODESET) announces an alias of the
     169             :    ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
     170             :    ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
     171             :    locale.getpreferredencoding() codec. For example, if command line arguments
     172             :    are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
     173             :    UnicodeEncodeError instead of retrieving the original byte string.
     174             : 
     175             :    The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
     176             :    nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
     177             :    one byte in range 0x80-0xff can be decoded from the locale encoding. The
     178             :    workaround is also enabled on error, for example if getting the locale
     179             :    failed.
     180             : 
     181             :    On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
     182             :    announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
     183             :    ASCII encoding in this case.
     184             : 
     185             :    Values of force_ascii:
     186             : 
     187             :        1: the workaround is used: Py_EncodeLocale() uses
     188             :           encode_ascii_surrogateescape() and Py_DecodeLocale() uses
     189             :           decode_ascii()
     190             :        0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
     191             :           Py_DecodeLocale() uses mbstowcs()
     192             :       -1: unknown, need to call check_force_ascii() to get the value
     193             : */
     194             : static int force_ascii = -1;
     195             : 
     196             : static int
     197        5825 : check_force_ascii(void)
     198             : {
     199        5825 :     char *loc = setlocale(LC_CTYPE, NULL);
     200        5825 :     if (loc == NULL) {
     201           0 :         goto error;
     202             :     }
     203        5825 :     if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
     204             :         /* the LC_CTYPE locale is different than C and POSIX */
     205        5648 :         return 0;
     206             :     }
     207             : 
     208             : #if defined(HAVE_LANGINFO_H) && defined(CODESET)
     209         177 :     const char *codeset = nl_langinfo(CODESET);
     210         177 :     if (!codeset || codeset[0] == '\0') {
     211             :         /* CODESET is not set or empty */
     212           0 :         goto error;
     213             :     }
     214             : 
     215             :     char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
     216         177 :     if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
     217           0 :         goto error;
     218             :     }
     219             : 
     220             : #ifdef __hpux
     221             :     if (strcmp(encoding, "roman8") == 0) {
     222             :         unsigned char ch;
     223             :         wchar_t wch;
     224             :         size_t res;
     225             : 
     226             :         ch = (unsigned char)0xA7;
     227             :         res = _Py_mbstowcs(&wch, (char*)&ch, 1);
     228             :         if (res != DECODE_ERROR && wch == L'\xA7') {
     229             :             /* On HP-UX with C locale or the POSIX locale,
     230             :                nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
     231             :                Latin1 encoding in practice. Force ASCII in this case.
     232             : 
     233             :                Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
     234             :             return 1;
     235             :         }
     236             :     }
     237             : #else
     238         177 :     const char* ascii_aliases[] = {
     239             :         "ascii",
     240             :         /* Aliases from Lib/encodings/aliases.py */
     241             :         "646",
     242             :         "ansi_x3.4_1968",
     243             :         "ansi_x3.4_1986",
     244             :         "ansi_x3_4_1968",
     245             :         "cp367",
     246             :         "csascii",
     247             :         "ibm367",
     248             :         "iso646_us",
     249             :         "iso_646.irv_1991",
     250             :         "iso_ir_6",
     251             :         "us",
     252             :         "us_ascii",
     253             :         NULL
     254             :     };
     255             : 
     256         177 :     int is_ascii = 0;
     257         531 :     for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
     258         531 :         if (strcmp(encoding, *alias) == 0) {
     259         177 :             is_ascii = 1;
     260         177 :             break;
     261             :         }
     262             :     }
     263         177 :     if (!is_ascii) {
     264             :         /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
     265           0 :         return 0;
     266             :     }
     267             : 
     268       22833 :     for (unsigned int i=0x80; i<=0xff; i++) {
     269             :         char ch[1];
     270             :         wchar_t wch[1];
     271             :         size_t res;
     272             : 
     273       22656 :         unsigned uch = (unsigned char)i;
     274       22656 :         ch[0] = (char)uch;
     275       22656 :         res = _Py_mbstowcs(wch, ch, 1);
     276       22656 :         if (res != DECODE_ERROR) {
     277             :             /* decoding a non-ASCII character from the locale encoding succeed:
     278             :                the locale encoding is not ASCII, force ASCII */
     279           0 :             return 1;
     280             :         }
     281             :     }
     282             :     /* None of the bytes in the range 0x80-0xff can be decoded from the locale
     283             :        encoding: the locale encoding is really ASCII */
     284             : #endif   /* !defined(__hpux) */
     285         177 :     return 0;
     286             : #else
     287             :     /* nl_langinfo(CODESET) is not available: always force ASCII */
     288             :     return 1;
     289             : #endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
     290             : 
     291           0 : error:
     292             :     /* if an error occurred, force the ASCII encoding */
     293           0 :     return 1;
     294             : }
     295             : 
     296             : 
     297             : int
     298        2897 : _Py_GetForceASCII(void)
     299             : {
     300        2897 :     if (force_ascii == -1) {
     301          82 :         force_ascii = check_force_ascii();
     302             :     }
     303        2897 :     return force_ascii;
     304             : }
     305             : 
     306             : 
     307             : void
     308        6103 : _Py_ResetForceASCII(void)
     309             : {
     310        6103 :     force_ascii = -1;
     311        6103 : }
     312             : 
     313             : 
     314             : static int
     315           0 : encode_ascii(const wchar_t *text, char **str,
     316             :              size_t *error_pos, const char **reason,
     317             :              int raw_malloc, _Py_error_handler errors)
     318             : {
     319           0 :     char *result = NULL, *out;
     320             :     size_t len, i;
     321             :     wchar_t ch;
     322             : 
     323             :     int surrogateescape;
     324           0 :     if (get_surrogateescape(errors, &surrogateescape) < 0) {
     325           0 :         return -3;
     326             :     }
     327             : 
     328           0 :     len = wcslen(text);
     329             : 
     330             :     /* +1 for NULL byte */
     331           0 :     if (raw_malloc) {
     332           0 :         result = PyMem_RawMalloc(len + 1);
     333             :     }
     334             :     else {
     335           0 :         result = PyMem_Malloc(len + 1);
     336             :     }
     337           0 :     if (result == NULL) {
     338           0 :         return -1;
     339             :     }
     340             : 
     341           0 :     out = result;
     342           0 :     for (i=0; i<len; i++) {
     343           0 :         ch = text[i];
     344             : 
     345           0 :         if (ch <= 0x7f) {
     346             :             /* ASCII character */
     347           0 :             *out++ = (char)ch;
     348             :         }
     349           0 :         else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
     350             :             /* UTF-8b surrogate */
     351           0 :             *out++ = (char)(ch - 0xdc00);
     352             :         }
     353             :         else {
     354           0 :             if (raw_malloc) {
     355           0 :                 PyMem_RawFree(result);
     356             :             }
     357             :             else {
     358           0 :                 PyMem_Free(result);
     359             :             }
     360           0 :             if (error_pos != NULL) {
     361           0 :                 *error_pos = i;
     362             :             }
     363           0 :             if (reason) {
     364           0 :                 *reason = "encoding error";
     365             :             }
     366           0 :             return -2;
     367             :         }
     368             :     }
     369           0 :     *out = '\0';
     370           0 :     *str = result;
     371           0 :     return 0;
     372             : }
     373             : #else
     374             : int
     375             : _Py_GetForceASCII(void)
     376             : {
     377             :     return 0;
     378             : }
     379             : 
     380             : void
     381             : _Py_ResetForceASCII(void)
     382             : {
     383             :     /* nothing to do */
     384             : }
     385             : #endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
     386             : 
     387             : 
     388             : #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
     389             : static int
     390           0 : decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
     391             :              const char **reason, _Py_error_handler errors)
     392             : {
     393             :     wchar_t *res;
     394             :     unsigned char *in;
     395             :     wchar_t *out;
     396           0 :     size_t argsize = strlen(arg) + 1;
     397             : 
     398             :     int surrogateescape;
     399           0 :     if (get_surrogateescape(errors, &surrogateescape) < 0) {
     400           0 :         return -3;
     401             :     }
     402             : 
     403           0 :     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
     404           0 :         return -1;
     405             :     }
     406           0 :     res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
     407           0 :     if (!res) {
     408           0 :         return -1;
     409             :     }
     410             : 
     411           0 :     out = res;
     412           0 :     for (in = (unsigned char*)arg; *in; in++) {
     413           0 :         unsigned char ch = *in;
     414           0 :         if (ch < 128) {
     415           0 :             *out++ = ch;
     416             :         }
     417             :         else {
     418           0 :             if (!surrogateescape) {
     419           0 :                 PyMem_RawFree(res);
     420           0 :                 if (wlen) {
     421           0 :                     *wlen = in - (unsigned char*)arg;
     422             :                 }
     423           0 :                 if (reason) {
     424           0 :                     *reason = "decoding error";
     425             :                 }
     426           0 :                 return -2;
     427             :             }
     428           0 :             *out++ = 0xdc00 + ch;
     429             :         }
     430             :     }
     431           0 :     *out = 0;
     432             : 
     433           0 :     if (wlen != NULL) {
     434           0 :         *wlen = out - res;
     435             :     }
     436           0 :     *wstr = res;
     437           0 :     return 0;
     438             : }
     439             : #endif   /* !HAVE_MBRTOWC */
     440             : 
     441             : static int
     442     2459330 : decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
     443             :                       const char **reason, _Py_error_handler errors)
     444             : {
     445             :     wchar_t *res;
     446             :     size_t argsize;
     447             :     size_t count;
     448             : #ifdef HAVE_MBRTOWC
     449             :     unsigned char *in;
     450             :     wchar_t *out;
     451             :     mbstate_t mbs;
     452             : #endif
     453             : 
     454             :     int surrogateescape;
     455     2459330 :     if (get_surrogateescape(errors, &surrogateescape) < 0) {
     456           2 :         return -3;
     457             :     }
     458             : 
     459             : #ifdef HAVE_BROKEN_MBSTOWCS
     460             :     /* Some platforms have a broken implementation of
     461             :      * mbstowcs which does not count the characters that
     462             :      * would result from conversion.  Use an upper bound.
     463             :      */
     464             :     argsize = strlen(arg);
     465             : #else
     466     2459330 :     argsize = _Py_mbstowcs(NULL, arg, 0);
     467             : #endif
     468     2459330 :     if (argsize != DECODE_ERROR) {
     469     2459310 :         if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
     470           0 :             return -1;
     471             :         }
     472     2459310 :         res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
     473     2459310 :         if (!res) {
     474           0 :             return -1;
     475             :         }
     476             : 
     477     2459310 :         count = _Py_mbstowcs(res, arg, argsize + 1);
     478     2459310 :         if (count != DECODE_ERROR) {
     479     2459310 :             *wstr = res;
     480     2459310 :             if (wlen != NULL) {
     481     2451820 :                 *wlen = count;
     482             :             }
     483     2459310 :             return 0;
     484             :         }
     485           0 :         PyMem_RawFree(res);
     486             :     }
     487             : 
     488             :     /* Conversion failed. Fall back to escaping with surrogateescape. */
     489             : #ifdef HAVE_MBRTOWC
     490             :     /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
     491             : 
     492             :     /* Overallocate; as multi-byte characters are in the argument, the
     493             :        actual output could use less memory. */
     494          22 :     argsize = strlen(arg) + 1;
     495          22 :     if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
     496           0 :         return -1;
     497             :     }
     498          22 :     res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
     499          22 :     if (!res) {
     500           0 :         return -1;
     501             :     }
     502             : 
     503          22 :     in = (unsigned char*)arg;
     504          22 :     out = res;
     505          22 :     memset(&mbs, 0, sizeof mbs);
     506         486 :     while (argsize) {
     507         486 :         size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
     508         486 :         if (converted == 0) {
     509             :             /* Reached end of string; null char stored. */
     510          18 :             break;
     511             :         }
     512             : 
     513         468 :         if (converted == INCOMPLETE_CHARACTER) {
     514             :             /* Incomplete character. This should never happen,
     515             :                since we provide everything that we have -
     516             :                unless there is a bug in the C library, or I
     517             :                misunderstood how mbrtowc works. */
     518           0 :             goto decode_error;
     519             :         }
     520             : 
     521         468 :         if (converted == DECODE_ERROR) {
     522         109 :             if (!surrogateescape) {
     523           4 :                 goto decode_error;
     524             :             }
     525             : 
     526             :             /* Decoding error. Escape as UTF-8b, and start over in the initial
     527             :                shift state. */
     528         105 :             *out++ = 0xdc00 + *in++;
     529         105 :             argsize--;
     530         105 :             memset(&mbs, 0, sizeof mbs);
     531         105 :             continue;
     532             :         }
     533             : 
     534             :         // _Py_mbrtowc() reject lone surrogate characters
     535         359 :         assert(!Py_UNICODE_IS_SURROGATE(*out));
     536             : 
     537             :         /* successfully converted some bytes */
     538         359 :         in += converted;
     539         359 :         argsize -= converted;
     540         359 :         out++;
     541             :     }
     542          18 :     if (wlen != NULL) {
     543          18 :         *wlen = out - res;
     544             :     }
     545          18 :     *wstr = res;
     546          18 :     return 0;
     547             : 
     548           4 : decode_error:
     549           4 :     PyMem_RawFree(res);
     550           4 :     if (wlen) {
     551           4 :         *wlen = in - (unsigned char*)arg;
     552             :     }
     553           4 :     if (reason) {
     554           4 :         *reason = "decoding error";
     555             :     }
     556           4 :     return -2;
     557             : #else   /* HAVE_MBRTOWC */
     558             :     /* Cannot use C locale for escaping; manually escape as if charset
     559             :        is ASCII (i.e. escape all bytes > 128. This will still roundtrip
     560             :        correctly in the locale's charset, which must be an ASCII superset. */
     561             :     return decode_ascii(arg, wstr, wlen, reason, errors);
     562             : #endif   /* HAVE_MBRTOWC */
     563             : }
     564             : 
     565             : 
     566             : /* Decode a byte string from the locale encoding.
     567             : 
     568             :    Use the strict error handler if 'surrogateescape' is zero.  Use the
     569             :    surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
     570             :    bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
     571             :    can be decoded as a surrogate character, escape the bytes using the
     572             :    surrogateescape error handler instead of decoding them.
     573             : 
     574             :    On success, return 0 and write the newly allocated wide character string into
     575             :    *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
     576             :    the number of wide characters excluding the null character into *wlen.
     577             : 
     578             :    On memory allocation failure, return -1.
     579             : 
     580             :    On decoding error, return -2. If wlen is not NULL, write the start of
     581             :    invalid byte sequence in the input string into *wlen. If reason is not NULL,
     582             :    write the decoding error message into *reason.
     583             : 
     584             :    Return -3 if the error handler 'errors' is not supported.
     585             : 
     586             :    Use the Py_EncodeLocaleEx() function to encode the character string back to
     587             :    a byte string. */
     588             : int
     589     2475030 : _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
     590             :                    const char **reason,
     591             :                    int current_locale, _Py_error_handler errors)
     592             : {
     593     2475030 :     if (current_locale) {
     594             : #ifdef _Py_FORCE_UTF8_LOCALE
     595             :         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
     596             :                                 errors);
     597             : #else
     598     1396150 :         return decode_current_locale(arg, wstr, wlen, reason, errors);
     599             : #endif
     600             :     }
     601             : 
     602             : #ifdef _Py_FORCE_UTF8_FS_ENCODING
     603             :     return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
     604             :                             errors);
     605             : #else
     606     1078890 :     int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
     607             : #ifdef MS_WINDOWS
     608             :     use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
     609             : #endif
     610     1078890 :     if (use_utf8) {
     611       23184 :         return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
     612             :                                 errors);
     613             :     }
     614             : 
     615             : #ifdef USE_FORCE_ASCII
     616     1055700 :     if (force_ascii == -1) {
     617        5743 :         force_ascii = check_force_ascii();
     618             :     }
     619             : 
     620     1055700 :     if (force_ascii) {
     621             :         /* force ASCII encoding to workaround mbstowcs() issue */
     622           0 :         return decode_ascii(arg, wstr, wlen, reason, errors);
     623             :     }
     624             : #endif
     625             : 
     626     1055700 :     return decode_current_locale(arg, wstr, wlen, reason, errors);
     627             : #endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
     628             : }
     629             : 
     630             : 
     631             : /* Decode a byte string from the locale encoding with the
     632             :    surrogateescape error handler: undecodable bytes are decoded as characters
     633             :    in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
     634             :    character, escape the bytes using the surrogateescape error handler instead
     635             :    of decoding them.
     636             : 
     637             :    Return a pointer to a newly allocated wide character string, use
     638             :    PyMem_RawFree() to free the memory. If size is not NULL, write the number of
     639             :    wide characters excluding the null character into *size
     640             : 
     641             :    Return NULL on decoding error or memory allocation error. If *size* is not
     642             :    NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
     643             :    decoding error.
     644             : 
     645             :    Decoding errors should never happen, unless there is a bug in the C
     646             :    library.
     647             : 
     648             :    Use the Py_EncodeLocale() function to encode the character string back to a
     649             :    byte string. */
     650             : wchar_t*
     651       54342 : Py_DecodeLocale(const char* arg, size_t *wlen)
     652             : {
     653             :     wchar_t *wstr;
     654       54342 :     int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
     655             :                                  NULL, 0,
     656             :                                  _Py_ERROR_SURROGATEESCAPE);
     657       54342 :     if (res != 0) {
     658           0 :         assert(res != -3);
     659           0 :         if (wlen != NULL) {
     660           0 :             *wlen = (size_t)res;
     661             :         }
     662           0 :         return NULL;
     663             :     }
     664       54342 :     return wstr;
     665             : }
     666             : 
     667             : 
     668             : static int
     669      112919 : encode_current_locale(const wchar_t *text, char **str,
     670             :                       size_t *error_pos, const char **reason,
     671             :                       int raw_malloc, _Py_error_handler errors)
     672             : {
     673      112919 :     const size_t len = wcslen(text);
     674      112919 :     char *result = NULL, *bytes = NULL;
     675             :     size_t i, size, converted;
     676             :     wchar_t c, buf[2];
     677             : 
     678             :     int surrogateescape;
     679      112919 :     if (get_surrogateescape(errors, &surrogateescape) < 0) {
     680           2 :         return -3;
     681             :     }
     682             : 
     683             :     /* The function works in two steps:
     684             :        1. compute the length of the output buffer in bytes (size)
     685             :        2. outputs the bytes */
     686      112917 :     size = 0;
     687      112917 :     buf[1] = 0;
     688             :     while (1) {
     689    11800000 :         for (i=0; i < len; i++) {
     690    11574200 :             c = text[i];
     691    11574200 :             if (c >= 0xdc80 && c <= 0xdcff) {
     692           5 :                 if (!surrogateescape) {
     693           1 :                     goto encode_error;
     694             :                 }
     695             :                 /* UTF-8b surrogate */
     696           4 :                 if (bytes != NULL) {
     697           2 :                     *bytes++ = c - 0xdc00;
     698           2 :                     size--;
     699             :                 }
     700             :                 else {
     701           2 :                     size++;
     702             :                 }
     703           4 :                 continue;
     704             :             }
     705             :             else {
     706    11574200 :                 buf[0] = c;
     707    11574200 :                 if (bytes != NULL) {
     708     5787100 :                     converted = wcstombs(bytes, buf, size);
     709             :                 }
     710             :                 else {
     711     5787110 :                     converted = wcstombs(NULL, buf, 0);
     712             :                 }
     713    11574200 :                 if (converted == DECODE_ERROR) {
     714           0 :                     goto encode_error;
     715             :                 }
     716    11574200 :                 if (bytes != NULL) {
     717     5787100 :                     bytes += converted;
     718     5787100 :                     size -= converted;
     719             :                 }
     720             :                 else {
     721     5787110 :                     size += converted;
     722             :                 }
     723             :             }
     724             :         }
     725      225832 :         if (result != NULL) {
     726      112916 :             *bytes = '\0';
     727      112916 :             break;
     728             :         }
     729             : 
     730      112916 :         size += 1; /* nul byte at the end */
     731      112916 :         if (raw_malloc) {
     732      112916 :             result = PyMem_RawMalloc(size);
     733             :         }
     734             :         else {
     735           0 :             result = PyMem_Malloc(size);
     736             :         }
     737      112916 :         if (result == NULL) {
     738           0 :             return -1;
     739             :         }
     740      112916 :         bytes = result;
     741             :     }
     742      112916 :     *str = result;
     743      112916 :     return 0;
     744             : 
     745           1 : encode_error:
     746           1 :     if (raw_malloc) {
     747           1 :         PyMem_RawFree(result);
     748             :     }
     749             :     else {
     750           0 :         PyMem_Free(result);
     751             :     }
     752           1 :     if (error_pos != NULL) {
     753           1 :         *error_pos = i;
     754             :     }
     755           1 :     if (reason) {
     756           1 :         *reason = "encoding error";
     757             :     }
     758           1 :     return -2;
     759             : }
     760             : 
     761             : 
     762             : /* Encode a string to the locale encoding.
     763             : 
     764             :    Parameters:
     765             : 
     766             :    * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
     767             :      of PyMem_Malloc().
     768             :    * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
     769             :      Python filesystem encoding.
     770             :    * errors: error handler like "strict" or "surrogateescape".
     771             : 
     772             :    Return value:
     773             : 
     774             :     0: success, *str is set to a newly allocated decoded string.
     775             :    -1: memory allocation failure
     776             :    -2: encoding error, set *error_pos and *reason (if set).
     777             :    -3: the error handler 'errors' is not supported.
     778             :  */
     779             : static int
     780      115670 : encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
     781             :                  const char **reason,
     782             :                  int raw_malloc, int current_locale, _Py_error_handler errors)
     783             : {
     784      115670 :     if (current_locale) {
     785             : #ifdef _Py_FORCE_UTF8_LOCALE
     786             :         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
     787             :                                 raw_malloc, errors);
     788             : #else
     789        1392 :         return encode_current_locale(text, str, error_pos, reason,
     790             :                                      raw_malloc, errors);
     791             : #endif
     792             :     }
     793             : 
     794             : #ifdef _Py_FORCE_UTF8_FS_ENCODING
     795             :     return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
     796             :                             raw_malloc, errors);
     797             : #else
     798      114278 :     int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
     799             : #ifdef MS_WINDOWS
     800             :     use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
     801             : #endif
     802      114278 :     if (use_utf8) {
     803        2751 :         return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
     804             :                                 raw_malloc, errors);
     805             :     }
     806             : 
     807             : #ifdef USE_FORCE_ASCII
     808      111527 :     if (force_ascii == -1) {
     809           0 :         force_ascii = check_force_ascii();
     810             :     }
     811             : 
     812      111527 :     if (force_ascii) {
     813           0 :         return encode_ascii(text, str, error_pos, reason,
     814             :                             raw_malloc, errors);
     815             :     }
     816             : #endif
     817             : 
     818      111527 :     return encode_current_locale(text, str, error_pos, reason,
     819             :                                  raw_malloc, errors);
     820             : #endif   /* _Py_FORCE_UTF8_FS_ENCODING */
     821             : }
     822             : 
     823             : static char*
     824       22813 : encode_locale(const wchar_t *text, size_t *error_pos,
     825             :               int raw_malloc, int current_locale)
     826             : {
     827             :     char *str;
     828       22813 :     int res = encode_locale_ex(text, &str, error_pos, NULL,
     829             :                                raw_malloc, current_locale,
     830             :                                _Py_ERROR_SURROGATEESCAPE);
     831       22813 :     if (res != -2 && error_pos) {
     832           0 :         *error_pos = (size_t)-1;
     833             :     }
     834       22813 :     if (res != 0) {
     835           0 :         return NULL;
     836             :     }
     837       22813 :     return str;
     838             : }
     839             : 
     840             : /* Encode a wide character string to the locale encoding with the
     841             :    surrogateescape error handler: surrogate characters in the range
     842             :    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
     843             : 
     844             :    Return a pointer to a newly allocated byte string, use PyMem_Free() to free
     845             :    the memory. Return NULL on encoding or memory allocation error.
     846             : 
     847             :    If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
     848             :    to the index of the invalid character on encoding error.
     849             : 
     850             :    Use the Py_DecodeLocale() function to decode the bytes string back to a wide
     851             :    character string. */
     852             : char*
     853           0 : Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
     854             : {
     855           0 :     return encode_locale(text, error_pos, 0, 0);
     856             : }
     857             : 
     858             : 
     859             : /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
     860             :    instead of PyMem_Free(). */
     861             : char*
     862       22813 : _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
     863             : {
     864       22813 :     return encode_locale(text, error_pos, 1, 0);
     865             : }
     866             : 
     867             : 
     868             : int
     869       92857 : _Py_EncodeLocaleEx(const wchar_t *text, char **str,
     870             :                    size_t *error_pos, const char **reason,
     871             :                    int current_locale, _Py_error_handler errors)
     872             : {
     873       92857 :     return encode_locale_ex(text, str, error_pos, reason, 1,
     874             :                             current_locale, errors);
     875             : }
     876             : 
     877             : 
     878             : // Get the current locale encoding name:
     879             : //
     880             : // - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
     881             : // - Return "utf-8" if the UTF-8 Mode is enabled
     882             : // - On Windows, return the ANSI code page (ex: "cp1250")
     883             : // - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
     884             : // - Otherwise, return nl_langinfo(CODESET).
     885             : //
     886             : // Return NULL on memory allocation failure.
     887             : //
     888             : // See also config_get_locale_encoding()
     889             : wchar_t*
     890        7483 : _Py_GetLocaleEncoding(void)
     891             : {
     892             : #ifdef _Py_FORCE_UTF8_LOCALE
     893             :     // On Android langinfo.h and CODESET are missing,
     894             :     // and UTF-8 is always used in mbstowcs() and wcstombs().
     895             :     return _PyMem_RawWcsdup(L"utf-8");
     896             : #else
     897             : 
     898             : #ifdef MS_WINDOWS
     899             :     wchar_t encoding[23];
     900             :     unsigned int ansi_codepage = GetACP();
     901             :     swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
     902             :     encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
     903             :     return _PyMem_RawWcsdup(encoding);
     904             : #else
     905        7483 :     const char *encoding = nl_langinfo(CODESET);
     906        7483 :     if (!encoding || encoding[0] == '\0') {
     907             :         // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
     908             :         // macOS if the LC_CTYPE locale is not supported.
     909           0 :         return _PyMem_RawWcsdup(L"utf-8");
     910             :     }
     911             : 
     912             :     wchar_t *wstr;
     913        7483 :     int res = decode_current_locale(encoding, &wstr, NULL,
     914             :                                     NULL, _Py_ERROR_SURROGATEESCAPE);
     915        7483 :     if (res < 0) {
     916           0 :         return NULL;
     917             :     }
     918        7483 :     return wstr;
     919             : #endif  // !MS_WINDOWS
     920             : 
     921             : #endif  // !_Py_FORCE_UTF8_LOCALE
     922             : }
     923             : 
     924             : 
     925             : PyObject *
     926        1870 : _Py_GetLocaleEncodingObject(void)
     927             : {
     928        1870 :     wchar_t *encoding = _Py_GetLocaleEncoding();
     929        1870 :     if (encoding == NULL) {
     930           0 :         PyErr_NoMemory();
     931           0 :         return NULL;
     932             :     }
     933             : 
     934        1870 :     PyObject *str = PyUnicode_FromWideChar(encoding, -1);
     935        1870 :     PyMem_RawFree(encoding);
     936        1870 :     return str;
     937             : }
     938             : 
     939             : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
     940             : 
     941             : /* Check whether current locale uses Unicode as internal wchar_t form. */
     942             : int
     943             : _Py_LocaleUsesNonUnicodeWchar(void)
     944             : {
     945             :     /* Oracle Solaris uses non-Unicode internal wchar_t form for
     946             :        non-Unicode locales and hence needs conversion to UTF first. */
     947             :     char* codeset = nl_langinfo(CODESET);
     948             :     if (!codeset) {
     949             :         return 0;
     950             :     }
     951             :     /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
     952             :     return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
     953             : }
     954             : 
     955             : static wchar_t *
     956             : _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
     957             :                      const char *tocode, const char *fromcode)
     958             : {
     959             :     static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
     960             : 
     961             :     /* Ensure we won't overflow the size. */
     962             :     if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
     963             :         PyErr_NoMemory();
     964             :         return NULL;
     965             :     }
     966             : 
     967             :     /* the string doesn't have to be NULL terminated */
     968             :     wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
     969             :     if (target == NULL) {
     970             :         PyErr_NoMemory();
     971             :         return NULL;
     972             :     }
     973             : 
     974             :     iconv_t cd = iconv_open(tocode, fromcode);
     975             :     if (cd == (iconv_t)-1) {
     976             :         PyErr_Format(PyExc_ValueError, "iconv_open() failed");
     977             :         PyMem_Free(target);
     978             :         return NULL;
     979             :     }
     980             : 
     981             :     char *inbuf = (char *) source;
     982             :     char *outbuf = (char *) target;
     983             :     size_t inbytesleft = sizeof(wchar_t) * size;
     984             :     size_t outbytesleft = inbytesleft;
     985             : 
     986             :     size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
     987             :     if (ret == DECODE_ERROR) {
     988             :         PyErr_Format(PyExc_ValueError, "iconv() failed");
     989             :         PyMem_Free(target);
     990             :         iconv_close(cd);
     991             :         return NULL;
     992             :     }
     993             : 
     994             :     iconv_close(cd);
     995             :     return target;
     996             : }
     997             : 
     998             : /* Convert a wide character string to the UCS-4 encoded string. This
     999             :    is necessary on systems where internal form of wchar_t are not Unicode
    1000             :    code points (e.g. Oracle Solaris).
    1001             : 
    1002             :    Return a pointer to a newly allocated string, use PyMem_Free() to free
    1003             :    the memory. Return NULL and raise exception on conversion or memory
    1004             :    allocation error. */
    1005             : wchar_t *
    1006             : _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
    1007             : {
    1008             :     return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
    1009             : }
    1010             : 
    1011             : /* Convert a UCS-4 encoded string to native wide character string. This
    1012             :    is necessary on systems where internal form of wchar_t are not Unicode
    1013             :    code points (e.g. Oracle Solaris).
    1014             : 
    1015             :    The conversion is done in place. This can be done because both wchar_t
    1016             :    and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
    1017             :    to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
    1018             :    which is currently the only system using these functions; it doesn't have
    1019             :    to be for other systems).
    1020             : 
    1021             :    Return 0 on success. Return -1 and raise exception on conversion
    1022             :    or memory allocation error. */
    1023             : int
    1024             : _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
    1025             : {
    1026             :     wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
    1027             :     if (!result) {
    1028             :         return -1;
    1029             :     }
    1030             :     memcpy(unicode, result, size * sizeof(wchar_t));
    1031             :     PyMem_Free(result);
    1032             :     return 0;
    1033             : }
    1034             : #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
    1035             : 
    1036             : #ifdef MS_WINDOWS
    1037             : static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
    1038             : 
    1039             : static void
    1040             : FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
    1041             : {
    1042             :     /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
    1043             :     /* Cannot simply cast and dereference in_ptr,
    1044             :        since it might not be aligned properly */
    1045             :     __int64 in;
    1046             :     memcpy(&in, in_ptr, sizeof(in));
    1047             :     *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
    1048             :     *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
    1049             : }
    1050             : 
    1051             : void
    1052             : _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
    1053             : {
    1054             :     /* XXX endianness */
    1055             :     __int64 out;
    1056             :     out = time_in + secs_between_epochs;
    1057             :     out = out * 10000000 + nsec_in / 100;
    1058             :     memcpy(out_ptr, &out, sizeof(out));
    1059             : }
    1060             : 
    1061             : /* Below, we *know* that ugo+r is 0444 */
    1062             : #if _S_IREAD != 0400
    1063             : #error Unsupported C library
    1064             : #endif
    1065             : static int
    1066             : attributes_to_mode(DWORD attr)
    1067             : {
    1068             :     int m = 0;
    1069             :     if (attr & FILE_ATTRIBUTE_DIRECTORY)
    1070             :         m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
    1071             :     else
    1072             :         m |= _S_IFREG;
    1073             :     if (attr & FILE_ATTRIBUTE_READONLY)
    1074             :         m |= 0444;
    1075             :     else
    1076             :         m |= 0666;
    1077             :     return m;
    1078             : }
    1079             : 
    1080             : void
    1081             : _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
    1082             :                            struct _Py_stat_struct *result)
    1083             : {
    1084             :     memset(result, 0, sizeof(*result));
    1085             :     result->st_mode = attributes_to_mode(info->dwFileAttributes);
    1086             :     result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
    1087             :     result->st_dev = info->dwVolumeSerialNumber;
    1088             :     result->st_rdev = result->st_dev;
    1089             :     FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
    1090             :     FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
    1091             :     FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
    1092             :     result->st_nlink = info->nNumberOfLinks;
    1093             :     result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
    1094             :     /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
    1095             :        open other name surrogate reparse points without traversing them. To
    1096             :        detect/handle these, check st_file_attributes and st_reparse_tag. */
    1097             :     result->st_reparse_tag = reparse_tag;
    1098             :     if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
    1099             :         reparse_tag == IO_REPARSE_TAG_SYMLINK) {
    1100             :         /* first clear the S_IFMT bits */
    1101             :         result->st_mode ^= (result->st_mode & S_IFMT);
    1102             :         /* now set the bits that make this a symlink */
    1103             :         result->st_mode |= S_IFLNK;
    1104             :     }
    1105             :     result->st_file_attributes = info->dwFileAttributes;
    1106             : }
    1107             : #endif
    1108             : 
    1109             : /* Return information about a file.
    1110             : 
    1111             :    On POSIX, use fstat().
    1112             : 
    1113             :    On Windows, use GetFileType() and GetFileInformationByHandle() which support
    1114             :    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
    1115             :    than 2 GiB because the file size type is a signed 32-bit integer: see issue
    1116             :    #23152.
    1117             : 
    1118             :    On Windows, set the last Windows error and return nonzero on error. On
    1119             :    POSIX, set errno and return nonzero on error. Fill status and return 0 on
    1120             :    success. */
    1121             : int
    1122      548270 : _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
    1123             : {
    1124             : #ifdef MS_WINDOWS
    1125             :     BY_HANDLE_FILE_INFORMATION info;
    1126             :     HANDLE h;
    1127             :     int type;
    1128             : 
    1129             :     h = _Py_get_osfhandle_noraise(fd);
    1130             : 
    1131             :     if (h == INVALID_HANDLE_VALUE) {
    1132             :         /* errno is already set by _get_osfhandle, but we also set
    1133             :            the Win32 error for callers who expect that */
    1134             :         SetLastError(ERROR_INVALID_HANDLE);
    1135             :         return -1;
    1136             :     }
    1137             :     memset(status, 0, sizeof(*status));
    1138             : 
    1139             :     type = GetFileType(h);
    1140             :     if (type == FILE_TYPE_UNKNOWN) {
    1141             :         DWORD error = GetLastError();
    1142             :         if (error != 0) {
    1143             :             errno = winerror_to_errno(error);
    1144             :             return -1;
    1145             :         }
    1146             :         /* else: valid but unknown file */
    1147             :     }
    1148             : 
    1149             :     if (type != FILE_TYPE_DISK) {
    1150             :         if (type == FILE_TYPE_CHAR)
    1151             :             status->st_mode = _S_IFCHR;
    1152             :         else if (type == FILE_TYPE_PIPE)
    1153             :             status->st_mode = _S_IFIFO;
    1154             :         return 0;
    1155             :     }
    1156             : 
    1157             :     if (!GetFileInformationByHandle(h, &info)) {
    1158             :         /* The Win32 error is already set, but we also set errno for
    1159             :            callers who expect it */
    1160             :         errno = winerror_to_errno(GetLastError());
    1161             :         return -1;
    1162             :     }
    1163             : 
    1164             :     _Py_attribute_data_to_stat(&info, 0, status);
    1165             :     /* specific to fstat() */
    1166             :     status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
    1167             :     return 0;
    1168             : #else
    1169      548270 :     return fstat(fd, status);
    1170             : #endif
    1171             : }
    1172             : 
    1173             : /* Return information about a file.
    1174             : 
    1175             :    On POSIX, use fstat().
    1176             : 
    1177             :    On Windows, use GetFileType() and GetFileInformationByHandle() which support
    1178             :    files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
    1179             :    than 2 GiB because the file size type is a signed 32-bit integer: see issue
    1180             :    #23152.
    1181             : 
    1182             :    Raise an exception and return -1 on error. On Windows, set the last Windows
    1183             :    error on error. On POSIX, set errno on error. Fill status and return 0 on
    1184             :    success.
    1185             : 
    1186             :    Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
    1187             :    to call fstat(). The caller must hold the GIL. */
    1188             : int
    1189         342 : _Py_fstat(int fd, struct _Py_stat_struct *status)
    1190             : {
    1191             :     int res;
    1192             : 
    1193         342 :     assert(PyGILState_Check());
    1194             : 
    1195         342 :     Py_BEGIN_ALLOW_THREADS
    1196         342 :     res = _Py_fstat_noraise(fd, status);
    1197         342 :     Py_END_ALLOW_THREADS
    1198             : 
    1199         342 :     if (res != 0) {
    1200             : #ifdef MS_WINDOWS
    1201             :         PyErr_SetFromWindowsErr(0);
    1202             : #else
    1203           2 :         PyErr_SetFromErrno(PyExc_OSError);
    1204             : #endif
    1205           2 :         return -1;
    1206             :     }
    1207         340 :     return 0;
    1208             : }
    1209             : 
    1210             : /* Like _Py_stat() but with a raw filename. */
    1211             : int
    1212        3337 : _Py_wstat(const wchar_t* path, struct stat *buf)
    1213             : {
    1214             :     int err;
    1215             : #ifdef MS_WINDOWS
    1216             :     struct _stat wstatbuf;
    1217             :     err = _wstat(path, &wstatbuf);
    1218             :     if (!err) {
    1219             :         buf->st_mode = wstatbuf.st_mode;
    1220             :     }
    1221             : #else
    1222             :     char *fname;
    1223        3337 :     fname = _Py_EncodeLocaleRaw(path, NULL);
    1224        3337 :     if (fname == NULL) {
    1225           0 :         errno = EINVAL;
    1226           0 :         return -1;
    1227             :     }
    1228        3337 :     err = stat(fname, buf);
    1229        3337 :     PyMem_RawFree(fname);
    1230             : #endif
    1231        3337 :     return err;
    1232             : }
    1233             : 
    1234             : 
    1235             : /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
    1236             :    call stat() otherwise. Only fill st_mode attribute on Windows.
    1237             : 
    1238             :    Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
    1239             :    raised. */
    1240             : 
    1241             : int
    1242           0 : _Py_stat(PyObject *path, struct stat *statbuf)
    1243             : {
    1244             : #ifdef MS_WINDOWS
    1245             :     int err;
    1246             : 
    1247             :     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
    1248             :     if (wpath == NULL)
    1249             :         return -2;
    1250             : 
    1251             :     err = _Py_wstat(wpath, statbuf);
    1252             :     PyMem_Free(wpath);
    1253             :     return err;
    1254             : #else
    1255             :     int ret;
    1256             :     PyObject *bytes;
    1257             :     char *cpath;
    1258             : 
    1259           0 :     bytes = PyUnicode_EncodeFSDefault(path);
    1260           0 :     if (bytes == NULL)
    1261           0 :         return -2;
    1262             : 
    1263             :     /* check for embedded null bytes */
    1264           0 :     if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
    1265           0 :         Py_DECREF(bytes);
    1266           0 :         return -2;
    1267             :     }
    1268             : 
    1269           0 :     ret = stat(cpath, statbuf);
    1270           0 :     Py_DECREF(bytes);
    1271           0 :     return ret;
    1272             : #endif
    1273             : }
    1274             : 
    1275             : 
    1276             : /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
    1277             : static int
    1278        3055 : get_inheritable(int fd, int raise)
    1279             : {
    1280             : #ifdef MS_WINDOWS
    1281             :     HANDLE handle;
    1282             :     DWORD flags;
    1283             : 
    1284             :     handle = _Py_get_osfhandle_noraise(fd);
    1285             :     if (handle == INVALID_HANDLE_VALUE) {
    1286             :         if (raise)
    1287             :             PyErr_SetFromErrno(PyExc_OSError);
    1288             :         return -1;
    1289             :     }
    1290             : 
    1291             :     if (!GetHandleInformation(handle, &flags)) {
    1292             :         if (raise)
    1293             :             PyErr_SetFromWindowsErr(0);
    1294             :         return -1;
    1295             :     }
    1296             : 
    1297             :     return (flags & HANDLE_FLAG_INHERIT);
    1298             : #else
    1299             :     int flags;
    1300             : 
    1301        3055 :     flags = fcntl(fd, F_GETFD, 0);
    1302        3055 :     if (flags == -1) {
    1303           2 :         if (raise)
    1304           2 :             PyErr_SetFromErrno(PyExc_OSError);
    1305           2 :         return -1;
    1306             :     }
    1307        3053 :     return !(flags & FD_CLOEXEC);
    1308             : #endif
    1309             : }
    1310             : 
    1311             : /* Get the inheritable flag of the specified file descriptor.
    1312             :    Return 1 if the file descriptor can be inherited, 0 if it cannot,
    1313             :    raise an exception and return -1 on error. */
    1314             : int
    1315         130 : _Py_get_inheritable(int fd)
    1316             : {
    1317         130 :     return get_inheritable(fd, 1);
    1318             : }
    1319             : 
    1320             : 
    1321             : /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
    1322             : static int
    1323      483230 : set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
    1324             : {
    1325             : #ifdef MS_WINDOWS
    1326             :     HANDLE handle;
    1327             :     DWORD flags;
    1328             : #else
    1329             : #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
    1330             :     static int ioctl_works = -1;
    1331             :     int request;
    1332             :     int err;
    1333             : #endif
    1334             :     int flags, new_flags;
    1335             :     int res;
    1336             : #endif
    1337             : 
    1338             :     /* atomic_flag_works can only be used to make the file descriptor
    1339             :        non-inheritable */
    1340      483230 :     assert(!(atomic_flag_works != NULL && inheritable));
    1341             : 
    1342      483230 :     if (atomic_flag_works != NULL && !inheritable) {
    1343      474355 :         if (*atomic_flag_works == -1) {
    1344        2925 :             int isInheritable = get_inheritable(fd, raise);
    1345        2925 :             if (isInheritable == -1)
    1346           0 :                 return -1;
    1347        2925 :             *atomic_flag_works = !isInheritable;
    1348             :         }
    1349             : 
    1350      474355 :         if (*atomic_flag_works)
    1351      474355 :             return 0;
    1352             :     }
    1353             : 
    1354             : #ifdef MS_WINDOWS
    1355             :     handle = _Py_get_osfhandle_noraise(fd);
    1356             :     if (handle == INVALID_HANDLE_VALUE) {
    1357             :         if (raise)
    1358             :             PyErr_SetFromErrno(PyExc_OSError);
    1359             :         return -1;
    1360             :     }
    1361             : 
    1362             :     if (inheritable)
    1363             :         flags = HANDLE_FLAG_INHERIT;
    1364             :     else
    1365             :         flags = 0;
    1366             : 
    1367             :     /* This check can be removed once support for Windows 7 ends. */
    1368             : #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
    1369             :         GetFileType(handle) == FILE_TYPE_CHAR)
    1370             : 
    1371             :     if (!CONSOLE_PSEUDOHANDLE(handle) &&
    1372             :         !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
    1373             :         if (raise)
    1374             :             PyErr_SetFromWindowsErr(0);
    1375             :         return -1;
    1376             :     }
    1377             : #undef CONSOLE_PSEUDOHANDLE
    1378             :     return 0;
    1379             : 
    1380             : #else
    1381             : 
    1382             : #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
    1383        8875 :     if (ioctl_works != 0 && raise != 0) {
    1384             :         /* fast-path: ioctl() only requires one syscall */
    1385             :         /* caveat: raise=0 is an indicator that we must be async-signal-safe
    1386             :          * thus avoid using ioctl() so we skip the fast-path. */
    1387        2558 :         if (inheritable)
    1388          57 :             request = FIONCLEX;
    1389             :         else
    1390        2501 :             request = FIOCLEX;
    1391        2558 :         err = ioctl(fd, request, NULL);
    1392        2558 :         if (!err) {
    1393        2553 :             ioctl_works = 1;
    1394        2553 :             return 0;
    1395             :         }
    1396             : 
    1397             : #ifdef O_PATH
    1398           5 :         if (errno == EBADF) {
    1399             :             // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
    1400             :             // on O_PATH file descriptors. Fall through to the fcntl()
    1401             :             // implementation.
    1402             :         }
    1403             :         else
    1404             : #endif
    1405           0 :         if (errno != ENOTTY && errno != EACCES) {
    1406           0 :             if (raise)
    1407           0 :                 PyErr_SetFromErrno(PyExc_OSError);
    1408           0 :             return -1;
    1409             :         }
    1410             :         else {
    1411             :             /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
    1412             :                device". The ioctl is declared but not supported by the kernel.
    1413             :                Remember that ioctl() doesn't work. It is the case on
    1414             :                Illumos-based OS for example.
    1415             : 
    1416             :                Issue #27057: When SELinux policy disallows ioctl it will fail
    1417             :                with EACCES. While FIOCLEX is safe operation it may be
    1418             :                unavailable because ioctl was denied altogether.
    1419             :                This can be the case on Android. */
    1420           0 :             ioctl_works = 0;
    1421             :         }
    1422             :         /* fallback to fcntl() if ioctl() does not work */
    1423             :     }
    1424             : #endif
    1425             : 
    1426             :     /* slow-path: fcntl() requires two syscalls */
    1427        6322 :     flags = fcntl(fd, F_GETFD);
    1428        6322 :     if (flags < 0) {
    1429           3 :         if (raise)
    1430           3 :             PyErr_SetFromErrno(PyExc_OSError);
    1431           3 :         return -1;
    1432             :     }
    1433             : 
    1434        6319 :     if (inheritable) {
    1435        3111 :         new_flags = flags & ~FD_CLOEXEC;
    1436             :     }
    1437             :     else {
    1438        3208 :         new_flags = flags | FD_CLOEXEC;
    1439             :     }
    1440             : 
    1441        6319 :     if (new_flags == flags) {
    1442             :         /* FD_CLOEXEC flag already set/cleared: nothing to do */
    1443         143 :         return 0;
    1444             :     }
    1445             : 
    1446        6176 :     res = fcntl(fd, F_SETFD, new_flags);
    1447        6176 :     if (res < 0) {
    1448           0 :         if (raise)
    1449           0 :             PyErr_SetFromErrno(PyExc_OSError);
    1450           0 :         return -1;
    1451             :     }
    1452        6176 :     return 0;
    1453             : #endif
    1454             : }
    1455             : 
    1456             : /* Make the file descriptor non-inheritable.
    1457             :    Return 0 on success, set errno and return -1 on error. */
    1458             : static int
    1459        3183 : make_non_inheritable(int fd)
    1460             : {
    1461        3183 :     return set_inheritable(fd, 0, 0, NULL);
    1462             : }
    1463             : 
    1464             : /* Set the inheritable flag of the specified file descriptor.
    1465             :    On success: return 0, on error: raise an exception and return -1.
    1466             : 
    1467             :    If atomic_flag_works is not NULL:
    1468             : 
    1469             :     * if *atomic_flag_works==-1, check if the inheritable is set on the file
    1470             :       descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
    1471             :       set the inheritable flag
    1472             :     * if *atomic_flag_works==1: do nothing
    1473             :     * if *atomic_flag_works==0: set inheritable flag to False
    1474             : 
    1475             :    Set atomic_flag_works to NULL if no atomic flag was used to create the
    1476             :    file descriptor.
    1477             : 
    1478             :    atomic_flag_works can only be used to make a file descriptor
    1479             :    non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
    1480             : int
    1481      470808 : _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
    1482             : {
    1483      470808 :     return set_inheritable(fd, inheritable, 1, atomic_flag_works);
    1484             : }
    1485             : 
    1486             : /* Same as _Py_set_inheritable() but on error, set errno and
    1487             :    don't raise an exception.
    1488             :    This function is async-signal-safe. */
    1489             : int
    1490        3134 : _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
    1491             : {
    1492        3134 :     return set_inheritable(fd, inheritable, 0, atomic_flag_works);
    1493             : }
    1494             : 
    1495             : static int
    1496        5636 : _Py_open_impl(const char *pathname, int flags, int gil_held)
    1497             : {
    1498             :     int fd;
    1499        5636 :     int async_err = 0;
    1500             : #ifndef MS_WINDOWS
    1501             :     int *atomic_flag_works;
    1502             : #endif
    1503             : 
    1504             : #ifdef MS_WINDOWS
    1505             :     flags |= O_NOINHERIT;
    1506             : #elif defined(O_CLOEXEC)
    1507        5636 :     atomic_flag_works = &_Py_open_cloexec_works;
    1508        5636 :     flags |= O_CLOEXEC;
    1509             : #else
    1510             :     atomic_flag_works = NULL;
    1511             : #endif
    1512             : 
    1513        5636 :     if (gil_held) {
    1514           0 :         PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
    1515           0 :         if (pathname_obj == NULL) {
    1516           0 :             return -1;
    1517             :         }
    1518           0 :         if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
    1519           0 :             Py_DECREF(pathname_obj);
    1520           0 :             return -1;
    1521             :         }
    1522             : 
    1523             :         do {
    1524           0 :             Py_BEGIN_ALLOW_THREADS
    1525           0 :             fd = open(pathname, flags);
    1526           0 :             Py_END_ALLOW_THREADS
    1527             :         } while (fd < 0
    1528           0 :                  && errno == EINTR && !(async_err = PyErr_CheckSignals()));
    1529           0 :         if (async_err) {
    1530           0 :             Py_DECREF(pathname_obj);
    1531           0 :             return -1;
    1532             :         }
    1533           0 :         if (fd < 0) {
    1534           0 :             PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
    1535           0 :             Py_DECREF(pathname_obj);
    1536           0 :             return -1;
    1537             :         }
    1538           0 :         Py_DECREF(pathname_obj);
    1539             :     }
    1540             :     else {
    1541        5636 :         fd = open(pathname, flags);
    1542        5636 :         if (fd < 0)
    1543           0 :             return -1;
    1544             :     }
    1545             : 
    1546             : #ifndef MS_WINDOWS
    1547        5636 :     if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
    1548           0 :         close(fd);
    1549           0 :         return -1;
    1550             :     }
    1551             : #endif
    1552             : 
    1553        5636 :     return fd;
    1554             : }
    1555             : 
    1556             : /* Open a file with the specified flags (wrapper to open() function).
    1557             :    Return a file descriptor on success. Raise an exception and return -1 on
    1558             :    error.
    1559             : 
    1560             :    The file descriptor is created non-inheritable.
    1561             : 
    1562             :    When interrupted by a signal (open() fails with EINTR), retry the syscall,
    1563             :    except if the Python signal handler raises an exception.
    1564             : 
    1565             :    Release the GIL to call open(). The caller must hold the GIL. */
    1566             : int
    1567           0 : _Py_open(const char *pathname, int flags)
    1568             : {
    1569             :     /* _Py_open() must be called with the GIL held. */
    1570           0 :     assert(PyGILState_Check());
    1571           0 :     return _Py_open_impl(pathname, flags, 1);
    1572             : }
    1573             : 
    1574             : /* Open a file with the specified flags (wrapper to open() function).
    1575             :    Return a file descriptor on success. Set errno and return -1 on error.
    1576             : 
    1577             :    The file descriptor is created non-inheritable.
    1578             : 
    1579             :    If interrupted by a signal, fail with EINTR. */
    1580             : int
    1581        5636 : _Py_open_noraise(const char *pathname, int flags)
    1582             : {
    1583        5636 :     return _Py_open_impl(pathname, flags, 0);
    1584             : }
    1585             : 
    1586             : /* Open a file. Use _wfopen() on Windows, encode the path to the locale
    1587             :    encoding and use fopen() otherwise.
    1588             : 
    1589             :    The file descriptor is created non-inheritable.
    1590             : 
    1591             :    If interrupted by a signal, fail with EINTR. */
    1592             : FILE *
    1593       15778 : _Py_wfopen(const wchar_t *path, const wchar_t *mode)
    1594             : {
    1595             :     FILE *f;
    1596       15778 :     if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
    1597           0 :         return NULL;
    1598             :     }
    1599             : #ifndef MS_WINDOWS
    1600             :     char *cpath;
    1601             :     char cmode[10];
    1602             :     size_t r;
    1603       15778 :     r = wcstombs(cmode, mode, 10);
    1604       15778 :     if (r == DECODE_ERROR || r >= 10) {
    1605           0 :         errno = EINVAL;
    1606           0 :         return NULL;
    1607             :     }
    1608       15778 :     cpath = _Py_EncodeLocaleRaw(path, NULL);
    1609       15778 :     if (cpath == NULL) {
    1610           0 :         return NULL;
    1611             :     }
    1612       15778 :     f = fopen(cpath, cmode);
    1613       15778 :     PyMem_RawFree(cpath);
    1614             : #else
    1615             :     f = _wfopen(path, mode);
    1616             : #endif
    1617       15778 :     if (f == NULL)
    1618       12595 :         return NULL;
    1619        3183 :     if (make_non_inheritable(fileno(f)) < 0) {
    1620           0 :         fclose(f);
    1621           0 :         return NULL;
    1622             :     }
    1623        3183 :     return f;
    1624             : }
    1625             : 
    1626             : 
    1627             : /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
    1628             :    encoding and call fopen() otherwise.
    1629             : 
    1630             :    Return the new file object on success. Raise an exception and return NULL
    1631             :    on error.
    1632             : 
    1633             :    The file descriptor is created non-inheritable.
    1634             : 
    1635             :    When interrupted by a signal (open() fails with EINTR), retry the syscall,
    1636             :    except if the Python signal handler raises an exception.
    1637             : 
    1638             :    Release the GIL to call _wfopen() or fopen(). The caller must hold
    1639             :    the GIL. */
    1640             : FILE*
    1641        1440 : _Py_fopen_obj(PyObject *path, const char *mode)
    1642             : {
    1643             :     FILE *f;
    1644        1440 :     int async_err = 0;
    1645             : #ifdef MS_WINDOWS
    1646             :     wchar_t wmode[10];
    1647             :     int usize;
    1648             : 
    1649             :     assert(PyGILState_Check());
    1650             : 
    1651             :     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
    1652             :         return NULL;
    1653             :     }
    1654             :     if (!PyUnicode_Check(path)) {
    1655             :         PyErr_Format(PyExc_TypeError,
    1656             :                      "str file path expected under Windows, got %R",
    1657             :                      Py_TYPE(path));
    1658             :         return NULL;
    1659             :     }
    1660             : 
    1661             :     wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
    1662             :     if (wpath == NULL)
    1663             :         return NULL;
    1664             : 
    1665             :     usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
    1666             :                                 wmode, Py_ARRAY_LENGTH(wmode));
    1667             :     if (usize == 0) {
    1668             :         PyErr_SetFromWindowsErr(0);
    1669             :         PyMem_Free(wpath);
    1670             :         return NULL;
    1671             :     }
    1672             : 
    1673             :     do {
    1674             :         Py_BEGIN_ALLOW_THREADS
    1675             :         f = _wfopen(wpath, wmode);
    1676             :         Py_END_ALLOW_THREADS
    1677             :     } while (f == NULL
    1678             :              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
    1679             :     PyMem_Free(wpath);
    1680             : #else
    1681             :     PyObject *bytes;
    1682             :     const char *path_bytes;
    1683             : 
    1684        1440 :     assert(PyGILState_Check());
    1685             : 
    1686        1440 :     if (!PyUnicode_FSConverter(path, &bytes))
    1687           2 :         return NULL;
    1688        1438 :     path_bytes = PyBytes_AS_STRING(bytes);
    1689             : 
    1690        1438 :     if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
    1691           1 :         Py_DECREF(bytes);
    1692           1 :         return NULL;
    1693             :     }
    1694             : 
    1695             :     do {
    1696        1437 :         Py_BEGIN_ALLOW_THREADS
    1697        1437 :         f = fopen(path_bytes, mode);
    1698        1437 :         Py_END_ALLOW_THREADS
    1699             :     } while (f == NULL
    1700        1437 :              && errno == EINTR && !(async_err = PyErr_CheckSignals()));
    1701             : 
    1702        1437 :     Py_DECREF(bytes);
    1703             : #endif
    1704        1437 :     if (async_err)
    1705           0 :         return NULL;
    1706             : 
    1707        1437 :     if (f == NULL) {
    1708         968 :         PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
    1709         968 :         return NULL;
    1710             :     }
    1711             : 
    1712         469 :     if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
    1713           0 :         fclose(f);
    1714           0 :         return NULL;
    1715             :     }
    1716         469 :     return f;
    1717             : }
    1718             : 
    1719             : /* Read count bytes from fd into buf.
    1720             : 
    1721             :    On success, return the number of read bytes, it can be lower than count.
    1722             :    If the current file offset is at or past the end of file, no bytes are read,
    1723             :    and read() returns zero.
    1724             : 
    1725             :    On error, raise an exception, set errno and return -1.
    1726             : 
    1727             :    When interrupted by a signal (read() fails with EINTR), retry the syscall.
    1728             :    If the Python signal handler raises an exception, the function returns -1
    1729             :    (the syscall is not retried).
    1730             : 
    1731             :    Release the GIL to call read(). The caller must hold the GIL. */
    1732             : Py_ssize_t
    1733      793158 : _Py_read(int fd, void *buf, size_t count)
    1734             : {
    1735             :     Py_ssize_t n;
    1736             :     int err;
    1737      793158 :     int async_err = 0;
    1738             : 
    1739      793158 :     assert(PyGILState_Check());
    1740             : 
    1741             :     /* _Py_read() must not be called with an exception set, otherwise the
    1742             :      * caller may think that read() was interrupted by a signal and the signal
    1743             :      * handler raised an exception. */
    1744      793158 :     assert(!PyErr_Occurred());
    1745             : 
    1746      793158 :     if (count > _PY_READ_MAX) {
    1747           0 :         count = _PY_READ_MAX;
    1748             :     }
    1749             : 
    1750             :     _Py_BEGIN_SUPPRESS_IPH
    1751             :     do {
    1752      793197 :         Py_BEGIN_ALLOW_THREADS
    1753      793197 :         errno = 0;
    1754             : #ifdef MS_WINDOWS
    1755             :         n = read(fd, buf, (int)count);
    1756             : #else
    1757      793197 :         n = read(fd, buf, count);
    1758             : #endif
    1759             :         /* save/restore errno because PyErr_CheckSignals()
    1760             :          * and PyErr_SetFromErrno() can modify it */
    1761      793195 :         err = errno;
    1762      793195 :         Py_END_ALLOW_THREADS
    1763         125 :     } while (n < 0 && err == EINTR &&
    1764      793238 :             !(async_err = PyErr_CheckSignals()));
    1765             :     _Py_END_SUPPRESS_IPH
    1766             : 
    1767      793156 :     if (async_err) {
    1768             :         /* read() was interrupted by a signal (failed with EINTR)
    1769             :          * and the Python signal handler raised an exception */
    1770           4 :         errno = err;
    1771           4 :         assert(errno == EINTR && PyErr_Occurred());
    1772           4 :         return -1;
    1773             :     }
    1774      793152 :     if (n < 0) {
    1775          39 :         PyErr_SetFromErrno(PyExc_OSError);
    1776          39 :         errno = err;
    1777          39 :         return -1;
    1778             :     }
    1779             : 
    1780      793113 :     return n;
    1781             : }
    1782             : 
    1783             : static Py_ssize_t
    1784      317226 : _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
    1785             : {
    1786             :     Py_ssize_t n;
    1787             :     int err;
    1788      317226 :     int async_err = 0;
    1789             : 
    1790             :     _Py_BEGIN_SUPPRESS_IPH
    1791             : #ifdef MS_WINDOWS
    1792             :     if (count > 32767) {
    1793             :         /* Issue #11395: the Windows console returns an error (12: not
    1794             :            enough space error) on writing into stdout if stdout mode is
    1795             :            binary and the length is greater than 66,000 bytes (or less,
    1796             :            depending on heap usage). */
    1797             :         if (gil_held) {
    1798             :             Py_BEGIN_ALLOW_THREADS
    1799             :             if (isatty(fd)) {
    1800             :                 count = 32767;
    1801             :             }
    1802             :             Py_END_ALLOW_THREADS
    1803             :         } else {
    1804             :             if (isatty(fd)) {
    1805             :                 count = 32767;
    1806             :             }
    1807             :         }
    1808             :     }
    1809             : #endif
    1810      317226 :     if (count > _PY_WRITE_MAX) {
    1811           0 :         count = _PY_WRITE_MAX;
    1812             :     }
    1813             : 
    1814      317226 :     if (gil_held) {
    1815             :         do {
    1816      309941 :             Py_BEGIN_ALLOW_THREADS
    1817      309941 :             errno = 0;
    1818             : #ifdef MS_WINDOWS
    1819             :             n = write(fd, buf, (int)count);
    1820             : #else
    1821      309941 :             n = write(fd, buf, count);
    1822             : #endif
    1823             :             /* save/restore errno because PyErr_CheckSignals()
    1824             :              * and PyErr_SetFromErrno() can modify it */
    1825      309941 :             err = errno;
    1826      309941 :             Py_END_ALLOW_THREADS
    1827          76 :         } while (n < 0 && err == EINTR &&
    1828      309917 :                 !(async_err = PyErr_CheckSignals()));
    1829             :     }
    1830             :     else {
    1831             :         do {
    1832        7295 :             errno = 0;
    1833             : #ifdef MS_WINDOWS
    1834             :             n = write(fd, buf, (int)count);
    1835             : #else
    1836        7295 :             n = write(fd, buf, count);
    1837             : #endif
    1838        7295 :             err = errno;
    1839        7295 :         } while (n < 0 && err == EINTR);
    1840             :     }
    1841             :     _Py_END_SUPPRESS_IPH
    1842             : 
    1843      317192 :     if (async_err) {
    1844             :         /* write() was interrupted by a signal (failed with EINTR)
    1845             :            and the Python signal handler raised an exception (if gil_held is
    1846             :            nonzero). */
    1847           0 :         errno = err;
    1848           0 :         assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
    1849           0 :         return -1;
    1850             :     }
    1851      317192 :     if (n < 0) {
    1852          62 :         if (gil_held)
    1853          56 :             PyErr_SetFromErrno(PyExc_OSError);
    1854          62 :         errno = err;
    1855          62 :         return -1;
    1856             :     }
    1857             : 
    1858      317130 :     return n;
    1859             : }
    1860             : 
    1861             : /* Write count bytes of buf into fd.
    1862             : 
    1863             :    On success, return the number of written bytes, it can be lower than count
    1864             :    including 0. On error, raise an exception, set errno and return -1.
    1865             : 
    1866             :    When interrupted by a signal (write() fails with EINTR), retry the syscall.
    1867             :    If the Python signal handler raises an exception, the function returns -1
    1868             :    (the syscall is not retried).
    1869             : 
    1870             :    Release the GIL to call write(). The caller must hold the GIL. */
    1871             : Py_ssize_t
    1872      309931 : _Py_write(int fd, const void *buf, size_t count)
    1873             : {
    1874      309931 :     assert(PyGILState_Check());
    1875             : 
    1876             :     /* _Py_write() must not be called with an exception set, otherwise the
    1877             :      * caller may think that write() was interrupted by a signal and the signal
    1878             :      * handler raised an exception. */
    1879      309931 :     assert(!PyErr_Occurred());
    1880             : 
    1881      309931 :     return _Py_write_impl(fd, buf, count, 1);
    1882             : }
    1883             : 
    1884             : /* Write count bytes of buf into fd.
    1885             :  *
    1886             :  * On success, return the number of written bytes, it can be lower than count
    1887             :  * including 0. On error, set errno and return -1.
    1888             :  *
    1889             :  * When interrupted by a signal (write() fails with EINTR), retry the syscall
    1890             :  * without calling the Python signal handler. */
    1891             : Py_ssize_t
    1892        7295 : _Py_write_noraise(int fd, const void *buf, size_t count)
    1893             : {
    1894        7295 :     return _Py_write_impl(fd, buf, count, 0);
    1895             : }
    1896             : 
    1897             : #ifdef HAVE_READLINK
    1898             : 
    1899             : /* Read value of symbolic link. Encode the path to the locale encoding, decode
    1900             :    the result from the locale encoding.
    1901             : 
    1902             :    Return -1 on encoding error, on readlink() error, if the internal buffer is
    1903             :    too short, on decoding error, or if 'buf' is too short. */
    1904             : int
    1905        3470 : _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
    1906             : {
    1907             :     char *cpath;
    1908             :     char cbuf[MAXPATHLEN];
    1909        3470 :     size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
    1910             :     wchar_t *wbuf;
    1911             :     Py_ssize_t res;
    1912             :     size_t r1;
    1913             : 
    1914        3470 :     cpath = _Py_EncodeLocaleRaw(path, NULL);
    1915        3470 :     if (cpath == NULL) {
    1916           0 :         errno = EINVAL;
    1917           0 :         return -1;
    1918             :     }
    1919        3470 :     res = readlink(cpath, cbuf, cbuf_len);
    1920        3470 :     PyMem_RawFree(cpath);
    1921        3470 :     if (res == -1) {
    1922        3437 :         return -1;
    1923             :     }
    1924          33 :     if ((size_t)res == cbuf_len) {
    1925           0 :         errno = EINVAL;
    1926           0 :         return -1;
    1927             :     }
    1928          33 :     cbuf[res] = '\0'; /* buf will be null terminated */
    1929          33 :     wbuf = Py_DecodeLocale(cbuf, &r1);
    1930          33 :     if (wbuf == NULL) {
    1931           0 :         errno = EINVAL;
    1932           0 :         return -1;
    1933             :     }
    1934             :     /* wbuf must have space to store the trailing NUL character */
    1935          33 :     if (buflen <= r1) {
    1936           0 :         PyMem_RawFree(wbuf);
    1937           0 :         errno = EINVAL;
    1938           0 :         return -1;
    1939             :     }
    1940          33 :     wcsncpy(buf, wbuf, buflen);
    1941          33 :     PyMem_RawFree(wbuf);
    1942          33 :     return (int)r1;
    1943             : }
    1944             : #endif
    1945             : 
    1946             : #ifdef HAVE_REALPATH
    1947             : 
    1948             : /* Return the canonicalized absolute pathname. Encode path to the locale
    1949             :    encoding, decode the result from the locale encoding.
    1950             : 
    1951             :    Return NULL on encoding error, realpath() error, decoding error
    1952             :    or if 'resolved_path' is too short. */
    1953             : wchar_t*
    1954         228 : _Py_wrealpath(const wchar_t *path,
    1955             :               wchar_t *resolved_path, size_t resolved_path_len)
    1956             : {
    1957             :     char *cpath;
    1958             :     char cresolved_path[MAXPATHLEN];
    1959             :     wchar_t *wresolved_path;
    1960             :     char *res;
    1961             :     size_t r;
    1962         228 :     cpath = _Py_EncodeLocaleRaw(path, NULL);
    1963         228 :     if (cpath == NULL) {
    1964           0 :         errno = EINVAL;
    1965           0 :         return NULL;
    1966             :     }
    1967         228 :     res = realpath(cpath, cresolved_path);
    1968         228 :     PyMem_RawFree(cpath);
    1969         228 :     if (res == NULL)
    1970          20 :         return NULL;
    1971             : 
    1972         208 :     wresolved_path = Py_DecodeLocale(cresolved_path, &r);
    1973         208 :     if (wresolved_path == NULL) {
    1974           0 :         errno = EINVAL;
    1975           0 :         return NULL;
    1976             :     }
    1977             :     /* wresolved_path must have space to store the trailing NUL character */
    1978         208 :     if (resolved_path_len <= r) {
    1979           0 :         PyMem_RawFree(wresolved_path);
    1980           0 :         errno = EINVAL;
    1981           0 :         return NULL;
    1982             :     }
    1983         208 :     wcsncpy(resolved_path, wresolved_path, resolved_path_len);
    1984         208 :     PyMem_RawFree(wresolved_path);
    1985         208 :     return resolved_path;
    1986             : }
    1987             : #endif
    1988             : 
    1989             : 
    1990             : int
    1991       73848 : _Py_isabs(const wchar_t *path)
    1992             : {
    1993             : #ifdef MS_WINDOWS
    1994             :     const wchar_t *tail;
    1995             :     HRESULT hr = PathCchSkipRoot(path, &tail);
    1996             :     if (FAILED(hr) || path == tail) {
    1997             :         return 0;
    1998             :     }
    1999             :     if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
    2000             :         // Exclude paths with leading SEP
    2001             :         return 0;
    2002             :     }
    2003             :     if (tail == &path[2] && path[1] == L':') {
    2004             :         // Exclude drive-relative paths (e.g. C:filename.ext)
    2005             :         return 0;
    2006             :     }
    2007             :     return 1;
    2008             : #else
    2009       73848 :     return (path[0] == SEP);
    2010             : #endif
    2011             : }
    2012             : 
    2013             : 
    2014             : /* Get an absolute path.
    2015             :    On error (ex: fail to get the current directory), return -1.
    2016             :    On memory allocation failure, set *abspath_p to NULL and return 0.
    2017             :    On success, return a newly allocated to *abspath_p to and return 0.
    2018             :    The string must be freed by PyMem_RawFree(). */
    2019             : int
    2020        3073 : _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
    2021             : {
    2022        3073 :     if (path[0] == '\0' || !wcscmp(path, L".")) {
    2023             :         wchar_t cwd[MAXPATHLEN + 1];
    2024           6 :         cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
    2025           6 :         if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
    2026             :             /* unable to get the current directory */
    2027           0 :             return -1;
    2028             :         }
    2029           6 :         *abspath_p = _PyMem_RawWcsdup(cwd);
    2030           6 :         return 0;
    2031             :     }
    2032             : 
    2033        3067 :     if (_Py_isabs(path)) {
    2034        2878 :         *abspath_p = _PyMem_RawWcsdup(path);
    2035        2878 :         return 0;
    2036             :     }
    2037             : 
    2038             : #ifdef MS_WINDOWS
    2039             :     return _PyOS_getfullpathname(path, abspath_p);
    2040             : #else
    2041             :     wchar_t cwd[MAXPATHLEN + 1];
    2042         189 :     cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
    2043         189 :     if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
    2044             :         /* unable to get the current directory */
    2045           0 :         return -1;
    2046             :     }
    2047             : 
    2048         189 :     size_t cwd_len = wcslen(cwd);
    2049         189 :     size_t path_len = wcslen(path);
    2050         189 :     size_t len = cwd_len + 1 + path_len + 1;
    2051         189 :     if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
    2052         189 :         *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
    2053             :     }
    2054             :     else {
    2055           0 :         *abspath_p = NULL;
    2056             :     }
    2057         189 :     if (*abspath_p == NULL) {
    2058           0 :         return 0;
    2059             :     }
    2060             : 
    2061         189 :     wchar_t *abspath = *abspath_p;
    2062         189 :     memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
    2063         189 :     abspath += cwd_len;
    2064             : 
    2065         189 :     *abspath = (wchar_t)SEP;
    2066         189 :     abspath++;
    2067             : 
    2068         189 :     memcpy(abspath, path, path_len * sizeof(wchar_t));
    2069         189 :     abspath += path_len;
    2070             : 
    2071         189 :     *abspath = 0;
    2072         189 :     return 0;
    2073             : #endif
    2074             : }
    2075             : 
    2076             : 
    2077             : // The caller must ensure "buffer" is big enough.
    2078             : static int
    2079       22124 : join_relfile(wchar_t *buffer, size_t bufsize,
    2080             :              const wchar_t *dirname, const wchar_t *relfile)
    2081             : {
    2082             : #ifdef MS_WINDOWS
    2083             :     if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
    2084             :         PATHCCH_ALLOW_LONG_PATHS))) {
    2085             :         return -1;
    2086             :     }
    2087             : #else
    2088       22124 :     assert(!_Py_isabs(relfile));
    2089       22124 :     size_t dirlen = wcslen(dirname);
    2090       22124 :     size_t rellen = wcslen(relfile);
    2091       22124 :     size_t maxlen = bufsize - 1;
    2092       22124 :     if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
    2093           0 :         return -1;
    2094             :     }
    2095       22124 :     if (dirlen == 0) {
    2096             :         // We do not add a leading separator.
    2097           0 :         wcscpy(buffer, relfile);
    2098             :     }
    2099             :     else {
    2100       22124 :         if (dirname != buffer) {
    2101           2 :             wcscpy(buffer, dirname);
    2102             :         }
    2103       22124 :         size_t relstart = dirlen;
    2104       22124 :         if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
    2105       22124 :             buffer[dirlen] = SEP;
    2106       22124 :             relstart += 1;
    2107             :         }
    2108       22124 :         wcscpy(&buffer[relstart], relfile);
    2109             :     }
    2110             : #endif
    2111       22124 :     return 0;
    2112             : }
    2113             : 
    2114             : /* Join the two paths together, like os.path.join().  Return NULL
    2115             :    if memory could not be allocated.  The caller is responsible
    2116             :    for calling PyMem_RawFree() on the result. */
    2117             : wchar_t *
    2118           2 : _Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
    2119             : {
    2120           2 :     assert(dirname != NULL && relfile != NULL);
    2121             : #ifndef MS_WINDOWS
    2122           2 :     assert(!_Py_isabs(relfile));
    2123             : #endif
    2124           2 :     size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
    2125           2 :     size_t bufsize = maxlen + 1;
    2126           2 :     wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
    2127           2 :     if (filename == NULL) {
    2128           0 :         return NULL;
    2129             :     }
    2130           2 :     assert(wcslen(dirname) < MAXPATHLEN);
    2131           2 :     assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
    2132           2 :     join_relfile(filename, bufsize, dirname, relfile);
    2133           2 :     return filename;
    2134             : }
    2135             : 
    2136             : /* Join the two paths together, like os.path.join().
    2137             :      dirname: the target buffer with the dirname already in place,
    2138             :               including trailing NUL
    2139             :      relfile: this must be a relative path
    2140             :      bufsize: total allocated size of the buffer
    2141             :    Return -1 if anything is wrong with the path lengths. */
    2142             : int
    2143       22122 : _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
    2144             : {
    2145       22122 :     assert(dirname != NULL && relfile != NULL);
    2146       22122 :     assert(bufsize > 0);
    2147       22122 :     return join_relfile(dirname, bufsize, dirname, relfile);
    2148             : }
    2149             : 
    2150             : 
    2151             : size_t
    2152           0 : _Py_find_basename(const wchar_t *filename)
    2153             : {
    2154           0 :     for (size_t i = wcslen(filename); i > 0; --i) {
    2155           0 :         if (filename[i] == SEP) {
    2156           0 :             return i + 1;
    2157             :         }
    2158             :     }
    2159           0 :     return 0;
    2160             : }
    2161             : 
    2162             : /* In-place path normalisation. Returns the start of the normalized
    2163             :    path, which will be within the original buffer. Guaranteed to not
    2164             :    make the path longer, and will not fail. 'size' is the length of
    2165             :    the path, if known. If -1, the first null character will be assumed
    2166             :    to be the end of the path. */
    2167             : wchar_t *
    2168      181686 : _Py_normpath(wchar_t *path, Py_ssize_t size)
    2169             : {
    2170      181686 :     if (!path[0] || size == 0) {
    2171          44 :         return path;
    2172             :     }
    2173      181642 :     wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
    2174      181642 :     wchar_t *p1 = path;     // sequentially scanned address in the path
    2175      181642 :     wchar_t *p2 = path;     // destination of a scanned character to be ljusted
    2176      181642 :     wchar_t *minP2 = path;  // the beginning of the destination range
    2177      181642 :     wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
    2178             : 
    2179             : #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
    2180             : #ifdef ALTSEP
    2181             : #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
    2182             : #else
    2183             : #define IS_SEP(x) (*(x) == SEP)
    2184             : #endif
    2185             : #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
    2186             : 
    2187             :     // Skip leading '.\'
    2188      181642 :     if (p1[0] == L'.' && IS_SEP(&p1[1])) {
    2189         138 :         path = &path[2];
    2190         140 :         while (IS_SEP(path) && !IS_END(path)) {
    2191           2 :             path++;
    2192             :         }
    2193         138 :         p1 = p2 = minP2 = path;
    2194         138 :         lastC = SEP;
    2195             :     }
    2196             : #ifdef MS_WINDOWS
    2197             :     // Skip past drive segment and update minP2
    2198             :     else if (p1[0] && p1[1] == L':') {
    2199             :         *p2++ = *p1++;
    2200             :         *p2++ = *p1++;
    2201             :         minP2 = p2;
    2202             :         lastC = L':';
    2203             :     }
    2204             :     // Skip past all \\-prefixed paths, including \\?\, \\.\,
    2205             :     // and network paths, including the first segment.
    2206             :     else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
    2207             :         int sepCount = 2;
    2208             :         *p2++ = SEP;
    2209             :         *p2++ = SEP;
    2210             :         p1 += 2;
    2211             :         for (; !IS_END(p1) && sepCount; ++p1) {
    2212             :             if (IS_SEP(p1)) {
    2213             :                 --sepCount;
    2214             :                 *p2++ = lastC = SEP;
    2215             :             } else {
    2216             :                 *p2++ = lastC = *p1;
    2217             :             }
    2218             :         }
    2219             :         if (sepCount) {
    2220             :             minP2 = p2;      // Invalid path
    2221             :         } else {
    2222             :             minP2 = p2 - 1;  // Absolute path has SEP at minP2
    2223             :         }
    2224             :     }
    2225             : #else
    2226             :     // Skip past two leading SEPs
    2227      181504 :     else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
    2228          30 :         *p2++ = *p1++;
    2229          30 :         *p2++ = *p1++;
    2230          30 :         minP2 = p2 - 1;  // Absolute path has SEP at minP2
    2231          30 :         lastC = SEP;
    2232             :     }
    2233             : #endif /* MS_WINDOWS */
    2234             : 
    2235             :     /* if pEnd is specified, check that. Else, check for null terminator */
    2236    10833200 :     for (; !IS_END(p1); ++p1) {
    2237    10651600 :         wchar_t c = *p1;
    2238             : #ifdef ALTSEP
    2239             :         if (c == ALTSEP) {
    2240             :             c = SEP;
    2241             :         }
    2242             : #endif
    2243    10651600 :         if (lastC == SEP) {
    2244     1267500 :             if (c == L'.') {
    2245        4380 :                 int sep_at_1 = SEP_OR_END(&p1[1]);
    2246        4380 :                 int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
    2247        4380 :                 if (sep_at_2 && p1[1] == L'.') {
    2248         444 :                     wchar_t *p3 = p2;
    2249         879 :                     while (p3 != minP2 && *--p3 == SEP) { }
    2250        1321 :                     while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
    2251         444 :                     if (p2 == minP2
    2252         435 :                         || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
    2253             :                     {
    2254             :                         // Previous segment is also ../, so append instead.
    2255             :                         // Relative path does not absorb ../ at minP2 as well.
    2256          19 :                         *p2++ = L'.';
    2257          19 :                         *p2++ = L'.';
    2258          19 :                         lastC = L'.';
    2259         425 :                     } else if (p3[0] == SEP) {
    2260             :                         // Absolute path, so absorb segment
    2261         263 :                         p2 = p3 + 1;
    2262             :                     } else {
    2263         162 :                         p2 = p3;
    2264             :                     }
    2265         444 :                     p1 += 1;
    2266        3936 :                 } else if (sep_at_1) {
    2267             :                 } else {
    2268        3495 :                     *p2++ = lastC = c;
    2269             :                 }
    2270     1263120 :             } else if (c == SEP) {
    2271             :             } else {
    2272     1262390 :                 *p2++ = lastC = c;
    2273             :             }
    2274             :         } else {
    2275     9384080 :             *p2++ = lastC = c;
    2276             :         }
    2277             :     }
    2278      181642 :     *p2 = L'\0';
    2279      181642 :     if (p2 != minP2) {
    2280      182190 :         while (--p2 != minP2 && *p2 == SEP) {
    2281         550 :             *p2 = L'\0';
    2282             :         }
    2283             :     }
    2284             : #undef SEP_OR_END
    2285             : #undef IS_SEP
    2286             : #undef IS_END
    2287      181642 :     return path;
    2288             : }
    2289             : 
    2290             : 
    2291             : /* Get the current directory. buflen is the buffer size in wide characters
    2292             :    including the null character. Decode the path from the locale encoding.
    2293             : 
    2294             :    Return NULL on getcwd() error, on decoding error, or if 'buf' is
    2295             :    too short. */
    2296             : wchar_t*
    2297         819 : _Py_wgetcwd(wchar_t *buf, size_t buflen)
    2298             : {
    2299             : #ifdef MS_WINDOWS
    2300             :     int ibuflen = (int)Py_MIN(buflen, INT_MAX);
    2301             :     return _wgetcwd(buf, ibuflen);
    2302             : #else
    2303             :     char fname[MAXPATHLEN];
    2304             :     wchar_t *wname;
    2305             :     size_t len;
    2306             : 
    2307         819 :     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
    2308           0 :         return NULL;
    2309         819 :     wname = Py_DecodeLocale(fname, &len);
    2310         819 :     if (wname == NULL)
    2311           0 :         return NULL;
    2312             :     /* wname must have space to store the trailing NUL character */
    2313         819 :     if (buflen <= len) {
    2314           0 :         PyMem_RawFree(wname);
    2315           0 :         return NULL;
    2316             :     }
    2317         819 :     wcsncpy(buf, wname, buflen);
    2318         819 :     PyMem_RawFree(wname);
    2319         819 :     return buf;
    2320             : #endif
    2321             : }
    2322             : 
    2323             : /* Duplicate a file descriptor. The new file descriptor is created as
    2324             :    non-inheritable. Return a new file descriptor on success, raise an OSError
    2325             :    exception and return -1 on error.
    2326             : 
    2327             :    The GIL is released to call dup(). The caller must hold the GIL. */
    2328             : int
    2329       15795 : _Py_dup(int fd)
    2330             : {
    2331             : #ifdef MS_WINDOWS
    2332             :     HANDLE handle;
    2333             : #endif
    2334             : 
    2335       15795 :     assert(PyGILState_Check());
    2336             : 
    2337             : #ifdef MS_WINDOWS
    2338             :     handle = _Py_get_osfhandle(fd);
    2339             :     if (handle == INVALID_HANDLE_VALUE)
    2340             :         return -1;
    2341             : 
    2342             :     Py_BEGIN_ALLOW_THREADS
    2343             :     _Py_BEGIN_SUPPRESS_IPH
    2344             :     fd = dup(fd);
    2345             :     _Py_END_SUPPRESS_IPH
    2346             :     Py_END_ALLOW_THREADS
    2347             :     if (fd < 0) {
    2348             :         PyErr_SetFromErrno(PyExc_OSError);
    2349             :         return -1;
    2350             :     }
    2351             : 
    2352             :     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
    2353             :         _Py_BEGIN_SUPPRESS_IPH
    2354             :         close(fd);
    2355             :         _Py_END_SUPPRESS_IPH
    2356             :         return -1;
    2357             :     }
    2358             : #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
    2359       15795 :     Py_BEGIN_ALLOW_THREADS
    2360             :     _Py_BEGIN_SUPPRESS_IPH
    2361       15795 :     fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
    2362             :     _Py_END_SUPPRESS_IPH
    2363       15795 :     Py_END_ALLOW_THREADS
    2364       15795 :     if (fd < 0) {
    2365           3 :         PyErr_SetFromErrno(PyExc_OSError);
    2366           3 :         return -1;
    2367             :     }
    2368             : 
    2369             : #else
    2370             :     Py_BEGIN_ALLOW_THREADS
    2371             :     _Py_BEGIN_SUPPRESS_IPH
    2372             :     fd = dup(fd);
    2373             :     _Py_END_SUPPRESS_IPH
    2374             :     Py_END_ALLOW_THREADS
    2375             :     if (fd < 0) {
    2376             :         PyErr_SetFromErrno(PyExc_OSError);
    2377             :         return -1;
    2378             :     }
    2379             : 
    2380             :     if (_Py_set_inheritable(fd, 0, NULL) < 0) {
    2381             :         _Py_BEGIN_SUPPRESS_IPH
    2382             :         close(fd);
    2383             :         _Py_END_SUPPRESS_IPH
    2384             :         return -1;
    2385             :     }
    2386             : #endif
    2387       15792 :     return fd;
    2388             : }
    2389             : 
    2390             : #ifndef MS_WINDOWS
    2391             : /* Get the blocking mode of the file descriptor.
    2392             :    Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
    2393             :    raise an exception and return -1 on error. */
    2394             : int
    2395         344 : _Py_get_blocking(int fd)
    2396             : {
    2397             :     int flags;
    2398             :     _Py_BEGIN_SUPPRESS_IPH
    2399         344 :     flags = fcntl(fd, F_GETFL, 0);
    2400             :     _Py_END_SUPPRESS_IPH
    2401         344 :     if (flags < 0) {
    2402           1 :         PyErr_SetFromErrno(PyExc_OSError);
    2403           1 :         return -1;
    2404             :     }
    2405             : 
    2406         343 :     return !(flags & O_NONBLOCK);
    2407             : }
    2408             : 
    2409             : /* Set the blocking mode of the specified file descriptor.
    2410             : 
    2411             :    Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
    2412             :    otherwise.
    2413             : 
    2414             :    Return 0 on success, raise an exception and return -1 on error. */
    2415             : int
    2416         327 : _Py_set_blocking(int fd, int blocking)
    2417             : {
    2418             : /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
    2419             :    Use fcntl() instead. */
    2420             : #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
    2421         327 :     int arg = !blocking;
    2422         327 :     if (ioctl(fd, FIONBIO, &arg) < 0)
    2423           1 :         goto error;
    2424             : #else
    2425             :     int flags, res;
    2426             : 
    2427             :     _Py_BEGIN_SUPPRESS_IPH
    2428             :     flags = fcntl(fd, F_GETFL, 0);
    2429             :     if (flags >= 0) {
    2430             :         if (blocking)
    2431             :             flags = flags & (~O_NONBLOCK);
    2432             :         else
    2433             :             flags = flags | O_NONBLOCK;
    2434             : 
    2435             :         res = fcntl(fd, F_SETFL, flags);
    2436             :     } else {
    2437             :         res = -1;
    2438             :     }
    2439             :     _Py_END_SUPPRESS_IPH
    2440             : 
    2441             :     if (res < 0)
    2442             :         goto error;
    2443             : #endif
    2444         326 :     return 0;
    2445             : 
    2446           1 : error:
    2447           1 :     PyErr_SetFromErrno(PyExc_OSError);
    2448           1 :     return -1;
    2449             : }
    2450             : #else   /* MS_WINDOWS */
    2451             : void*
    2452             : _Py_get_osfhandle_noraise(int fd)
    2453             : {
    2454             :     void *handle;
    2455             :     _Py_BEGIN_SUPPRESS_IPH
    2456             :     handle = (void*)_get_osfhandle(fd);
    2457             :     _Py_END_SUPPRESS_IPH
    2458             :     return handle;
    2459             : }
    2460             : 
    2461             : void*
    2462             : _Py_get_osfhandle(int fd)
    2463             : {
    2464             :     void *handle = _Py_get_osfhandle_noraise(fd);
    2465             :     if (handle == INVALID_HANDLE_VALUE)
    2466             :         PyErr_SetFromErrno(PyExc_OSError);
    2467             : 
    2468             :     return handle;
    2469             : }
    2470             : 
    2471             : int
    2472             : _Py_open_osfhandle_noraise(void *handle, int flags)
    2473             : {
    2474             :     int fd;
    2475             :     _Py_BEGIN_SUPPRESS_IPH
    2476             :     fd = _open_osfhandle((intptr_t)handle, flags);
    2477             :     _Py_END_SUPPRESS_IPH
    2478             :     return fd;
    2479             : }
    2480             : 
    2481             : int
    2482             : _Py_open_osfhandle(void *handle, int flags)
    2483             : {
    2484             :     int fd = _Py_open_osfhandle_noraise(handle, flags);
    2485             :     if (fd == -1)
    2486             :         PyErr_SetFromErrno(PyExc_OSError);
    2487             : 
    2488             :     return fd;
    2489             : }
    2490             : #endif  /* MS_WINDOWS */
    2491             : 
    2492             : int
    2493         389 : _Py_GetLocaleconvNumeric(struct lconv *lc,
    2494             :                          PyObject **decimal_point, PyObject **thousands_sep)
    2495             : {
    2496         389 :     assert(decimal_point != NULL);
    2497         389 :     assert(thousands_sep != NULL);
    2498             : 
    2499             : #ifndef MS_WINDOWS
    2500         389 :     int change_locale = 0;
    2501         389 :     if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
    2502           0 :         change_locale = 1;
    2503             :     }
    2504         389 :     if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
    2505           0 :         change_locale = 1;
    2506             :     }
    2507             : 
    2508             :     /* Keep a copy of the LC_CTYPE locale */
    2509         389 :     char *oldloc = NULL, *loc = NULL;
    2510         389 :     if (change_locale) {
    2511           0 :         oldloc = setlocale(LC_CTYPE, NULL);
    2512           0 :         if (!oldloc) {
    2513           0 :             PyErr_SetString(PyExc_RuntimeWarning,
    2514             :                             "failed to get LC_CTYPE locale");
    2515           0 :             return -1;
    2516             :         }
    2517             : 
    2518           0 :         oldloc = _PyMem_Strdup(oldloc);
    2519           0 :         if (!oldloc) {
    2520           0 :             PyErr_NoMemory();
    2521           0 :             return -1;
    2522             :         }
    2523             : 
    2524           0 :         loc = setlocale(LC_NUMERIC, NULL);
    2525           0 :         if (loc != NULL && strcmp(loc, oldloc) == 0) {
    2526           0 :             loc = NULL;
    2527             :         }
    2528             : 
    2529           0 :         if (loc != NULL) {
    2530             :             /* Only set the locale temporarily the LC_CTYPE locale
    2531             :                if LC_NUMERIC locale is different than LC_CTYPE locale and
    2532             :                decimal_point and/or thousands_sep are non-ASCII or longer than
    2533             :                1 byte */
    2534           0 :             setlocale(LC_CTYPE, loc);
    2535             :         }
    2536             :     }
    2537             : 
    2538             : #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
    2539             : #else /* MS_WINDOWS */
    2540             : /* Use _W_* fields of Windows strcut lconv */
    2541             : #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
    2542             : #endif /* MS_WINDOWS */
    2543             : 
    2544         389 :     int res = -1;
    2545             : 
    2546         389 :     *decimal_point = GET_LOCALE_STRING(decimal_point);
    2547         389 :     if (*decimal_point == NULL) {
    2548           0 :         goto done;
    2549             :     }
    2550             : 
    2551         389 :     *thousands_sep = GET_LOCALE_STRING(thousands_sep);
    2552         389 :     if (*thousands_sep == NULL) {
    2553           0 :         goto done;
    2554             :     }
    2555             : 
    2556         389 :     res = 0;
    2557             : 
    2558         389 : done:
    2559             : #ifndef MS_WINDOWS
    2560         389 :     if (loc != NULL) {
    2561           0 :         setlocale(LC_CTYPE, oldloc);
    2562             :     }
    2563         389 :     PyMem_Free(oldloc);
    2564             : #endif
    2565         389 :     return res;
    2566             : 
    2567             : #undef GET_LOCALE_STRING
    2568             : }
    2569             : 
    2570             : /* Our selection logic for which function to use is as follows:
    2571             :  * 1. If close_range(2) is available, always prefer that; it's better for
    2572             :  *    contiguous ranges like this than fdwalk(3) which entails iterating over
    2573             :  *    the entire fd space and simply doing nothing for those outside the range.
    2574             :  * 2. If closefrom(2) is available, we'll attempt to use that next if we're
    2575             :  *    closing up to sysconf(_SC_OPEN_MAX).
    2576             :  * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
    2577             :  *    as that will be more performant if the range happens to have any chunk of
    2578             :  *    non-opened fd in the middle.
    2579             :  * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
    2580             :  */
    2581             : #ifdef __FreeBSD__
    2582             : #  define USE_CLOSEFROM
    2583             : #endif /* __FreeBSD__ */
    2584             : 
    2585             : #ifdef HAVE_FDWALK
    2586             : #  define USE_FDWALK
    2587             : #endif /* HAVE_FDWALK */
    2588             : 
    2589             : #ifdef USE_FDWALK
    2590             : static int
    2591             : _fdwalk_close_func(void *lohi, int fd)
    2592             : {
    2593             :     int lo = ((int *)lohi)[0];
    2594             :     int hi = ((int *)lohi)[1];
    2595             : 
    2596             :     if (fd >= hi) {
    2597             :         return 1;
    2598             :     }
    2599             :     else if (fd >= lo) {
    2600             :         /* Ignore errors */
    2601             :         (void)close(fd);
    2602             :     }
    2603             :     return 0;
    2604             : }
    2605             : #endif /* USE_FDWALK */
    2606             : 
    2607             : /* Closes all file descriptors in [first, last], ignoring errors. */
    2608             : void
    2609           2 : _Py_closerange(int first, int last)
    2610             : {
    2611           2 :     first = Py_MAX(first, 0);
    2612             :     _Py_BEGIN_SUPPRESS_IPH
    2613             : #ifdef HAVE_CLOSE_RANGE
    2614             :     if (close_range(first, last, 0) == 0) {
    2615             :         /* close_range() ignores errors when it closes file descriptors.
    2616             :          * Possible reasons of an error return are lack of kernel support
    2617             :          * or denial of the underlying syscall by a seccomp sandbox on Linux.
    2618             :          * Fallback to other methods in case of any error. */
    2619             :     }
    2620             :     else
    2621             : #endif /* HAVE_CLOSE_RANGE */
    2622             : #ifdef USE_CLOSEFROM
    2623             :     if (last >= sysconf(_SC_OPEN_MAX)) {
    2624             :         /* Any errors encountered while closing file descriptors are ignored */
    2625             :         closefrom(first);
    2626             :     }
    2627             :     else
    2628             : #endif /* USE_CLOSEFROM */
    2629             : #ifdef USE_FDWALK
    2630             :     {
    2631             :         int lohi[2];
    2632             :         lohi[0] = first;
    2633             :         lohi[1] = last + 1;
    2634             :         fdwalk(_fdwalk_close_func, lohi);
    2635             :     }
    2636             : #else
    2637             :     {
    2638          12 :         for (int i = first; i <= last; i++) {
    2639             :             /* Ignore errors */
    2640          10 :             (void)close(i);
    2641             :         }
    2642             :     }
    2643             : #endif /* USE_FDWALK */
    2644             :     _Py_END_SUPPRESS_IPH
    2645           2 : }

Generated by: LCOV version 1.14