Line data Source code
1 : #include "Python.h"
2 : #include "pycore_fileutils.h" // fileutils definitions
3 : #include "pycore_runtime.h" // _PyRuntime
4 : #include "osdefs.h" // SEP
5 : #include <locale.h>
6 : #include <stdlib.h> // mbstowcs()
7 :
8 : #ifdef MS_WINDOWS
9 : # include <malloc.h>
10 : # include <windows.h>
11 : # include <pathcch.h> // PathCchCombineEx
12 : extern int winerror_to_errno(int);
13 : #endif
14 :
15 : #ifdef HAVE_LANGINFO_H
16 : #include <langinfo.h>
17 : #endif
18 :
19 : #ifdef HAVE_SYS_IOCTL_H
20 : #include <sys/ioctl.h>
21 : #endif
22 :
23 : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
24 : #include <iconv.h>
25 : #endif
26 :
27 : #ifdef HAVE_FCNTL_H
28 : #include <fcntl.h>
29 : #endif /* HAVE_FCNTL_H */
30 :
31 : #ifdef O_CLOEXEC
32 : /* Does open() support the O_CLOEXEC flag? Possible values:
33 :
34 : -1: unknown
35 : 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
36 : 1: open() supports O_CLOEXEC flag, close-on-exec is set
37 :
38 : The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
39 : and os.open(). */
40 : int _Py_open_cloexec_works = -1;
41 : #endif
42 :
43 : // The value must be the same in unicodeobject.c.
44 : #define MAX_UNICODE 0x10ffff
45 :
46 : // mbstowcs() and mbrtowc() errors
47 : static const size_t DECODE_ERROR = ((size_t)-1);
48 : static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
49 :
50 :
51 : static int
52 2572250 : get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
53 : {
54 2572250 : switch (errors)
55 : {
56 18525 : case _Py_ERROR_STRICT:
57 18525 : *surrogateescape = 0;
58 18525 : return 0;
59 2553720 : case _Py_ERROR_SURROGATEESCAPE:
60 2553720 : *surrogateescape = 1;
61 2553720 : return 0;
62 4 : default:
63 4 : return -1;
64 : }
65 : }
66 :
67 :
68 : PyObject *
69 1 : _Py_device_encoding(int fd)
70 : {
71 : int valid;
72 1 : Py_BEGIN_ALLOW_THREADS
73 : _Py_BEGIN_SUPPRESS_IPH
74 1 : valid = isatty(fd);
75 : _Py_END_SUPPRESS_IPH
76 1 : Py_END_ALLOW_THREADS
77 1 : if (!valid)
78 1 : Py_RETURN_NONE;
79 :
80 : #if defined(MS_WINDOWS)
81 : UINT cp;
82 : if (fd == 0)
83 : cp = GetConsoleCP();
84 : else if (fd == 1 || fd == 2)
85 : cp = GetConsoleOutputCP();
86 : else
87 : cp = 0;
88 : /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
89 : has no console */
90 : if (cp == 0) {
91 : Py_RETURN_NONE;
92 : }
93 :
94 : return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
95 : #else
96 0 : if (_PyRuntime.preconfig.utf8_mode) {
97 : _Py_DECLARE_STR(utf_8, "utf-8");
98 0 : return Py_NewRef(&_Py_STR(utf_8));
99 : }
100 0 : return _Py_GetLocaleEncodingObject();
101 : #endif
102 : }
103 :
104 :
105 : static size_t
106 41008300 : is_valid_wide_char(wchar_t ch)
107 : {
108 : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
109 : /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
110 : for non-Unicode locales, which makes values higher than MAX_UNICODE
111 : possibly valid. */
112 : return 1;
113 : #endif
114 41008300 : if (Py_UNICODE_IS_SURROGATE(ch)) {
115 : // Reject lone surrogate characters
116 0 : return 0;
117 : }
118 41008300 : if (ch > MAX_UNICODE) {
119 : // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
120 : // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
121 : // it creates characters outside the [U+0000; U+10ffff] range:
122 : // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
123 3 : return 0;
124 : }
125 41008300 : return 1;
126 : }
127 :
128 :
129 : static size_t
130 4941290 : _Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
131 : {
132 4941290 : size_t count = mbstowcs(dest, src, n);
133 4941290 : if (dest != NULL && count != DECODE_ERROR) {
134 43467200 : for (size_t i=0; i < count; i++) {
135 41007900 : wchar_t ch = dest[i];
136 41007900 : if (!is_valid_wide_char(ch)) {
137 0 : return DECODE_ERROR;
138 : }
139 : }
140 : }
141 4941290 : return count;
142 : }
143 :
144 :
145 : #ifdef HAVE_MBRTOWC
146 : static size_t
147 486 : _Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
148 : {
149 486 : assert(pwc != NULL);
150 486 : size_t count = mbrtowc(pwc, str, len, pmbs);
151 486 : if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
152 362 : if (!is_valid_wide_char(*pwc)) {
153 3 : return DECODE_ERROR;
154 : }
155 : }
156 483 : return count;
157 : }
158 : #endif
159 :
160 :
161 : #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
162 :
163 : #define USE_FORCE_ASCII
164 :
165 : extern int _Py_normalize_encoding(const char *, char *, size_t);
166 :
167 : /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
168 : and POSIX locale. nl_langinfo(CODESET) announces an alias of the
169 : ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
170 : ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
171 : locale.getpreferredencoding() codec. For example, if command line arguments
172 : are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
173 : UnicodeEncodeError instead of retrieving the original byte string.
174 :
175 : The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
176 : nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
177 : one byte in range 0x80-0xff can be decoded from the locale encoding. The
178 : workaround is also enabled on error, for example if getting the locale
179 : failed.
180 :
181 : On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
182 : announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
183 : ASCII encoding in this case.
184 :
185 : Values of force_ascii:
186 :
187 : 1: the workaround is used: Py_EncodeLocale() uses
188 : encode_ascii_surrogateescape() and Py_DecodeLocale() uses
189 : decode_ascii()
190 : 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
191 : Py_DecodeLocale() uses mbstowcs()
192 : -1: unknown, need to call check_force_ascii() to get the value
193 : */
194 : static int force_ascii = -1;
195 :
196 : static int
197 5825 : check_force_ascii(void)
198 : {
199 5825 : char *loc = setlocale(LC_CTYPE, NULL);
200 5825 : if (loc == NULL) {
201 0 : goto error;
202 : }
203 5825 : if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
204 : /* the LC_CTYPE locale is different than C and POSIX */
205 5648 : return 0;
206 : }
207 :
208 : #if defined(HAVE_LANGINFO_H) && defined(CODESET)
209 177 : const char *codeset = nl_langinfo(CODESET);
210 177 : if (!codeset || codeset[0] == '\0') {
211 : /* CODESET is not set or empty */
212 0 : goto error;
213 : }
214 :
215 : char encoding[20]; /* longest name: "iso_646.irv_1991\0" */
216 177 : if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
217 0 : goto error;
218 : }
219 :
220 : #ifdef __hpux
221 : if (strcmp(encoding, "roman8") == 0) {
222 : unsigned char ch;
223 : wchar_t wch;
224 : size_t res;
225 :
226 : ch = (unsigned char)0xA7;
227 : res = _Py_mbstowcs(&wch, (char*)&ch, 1);
228 : if (res != DECODE_ERROR && wch == L'\xA7') {
229 : /* On HP-UX with C locale or the POSIX locale,
230 : nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
231 : Latin1 encoding in practice. Force ASCII in this case.
232 :
233 : Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
234 : return 1;
235 : }
236 : }
237 : #else
238 177 : const char* ascii_aliases[] = {
239 : "ascii",
240 : /* Aliases from Lib/encodings/aliases.py */
241 : "646",
242 : "ansi_x3.4_1968",
243 : "ansi_x3.4_1986",
244 : "ansi_x3_4_1968",
245 : "cp367",
246 : "csascii",
247 : "ibm367",
248 : "iso646_us",
249 : "iso_646.irv_1991",
250 : "iso_ir_6",
251 : "us",
252 : "us_ascii",
253 : NULL
254 : };
255 :
256 177 : int is_ascii = 0;
257 531 : for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
258 531 : if (strcmp(encoding, *alias) == 0) {
259 177 : is_ascii = 1;
260 177 : break;
261 : }
262 : }
263 177 : if (!is_ascii) {
264 : /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
265 0 : return 0;
266 : }
267 :
268 22833 : for (unsigned int i=0x80; i<=0xff; i++) {
269 : char ch[1];
270 : wchar_t wch[1];
271 : size_t res;
272 :
273 22656 : unsigned uch = (unsigned char)i;
274 22656 : ch[0] = (char)uch;
275 22656 : res = _Py_mbstowcs(wch, ch, 1);
276 22656 : if (res != DECODE_ERROR) {
277 : /* decoding a non-ASCII character from the locale encoding succeed:
278 : the locale encoding is not ASCII, force ASCII */
279 0 : return 1;
280 : }
281 : }
282 : /* None of the bytes in the range 0x80-0xff can be decoded from the locale
283 : encoding: the locale encoding is really ASCII */
284 : #endif /* !defined(__hpux) */
285 177 : return 0;
286 : #else
287 : /* nl_langinfo(CODESET) is not available: always force ASCII */
288 : return 1;
289 : #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
290 :
291 0 : error:
292 : /* if an error occurred, force the ASCII encoding */
293 0 : return 1;
294 : }
295 :
296 :
297 : int
298 2897 : _Py_GetForceASCII(void)
299 : {
300 2897 : if (force_ascii == -1) {
301 82 : force_ascii = check_force_ascii();
302 : }
303 2897 : return force_ascii;
304 : }
305 :
306 :
307 : void
308 6103 : _Py_ResetForceASCII(void)
309 : {
310 6103 : force_ascii = -1;
311 6103 : }
312 :
313 :
314 : static int
315 0 : encode_ascii(const wchar_t *text, char **str,
316 : size_t *error_pos, const char **reason,
317 : int raw_malloc, _Py_error_handler errors)
318 : {
319 0 : char *result = NULL, *out;
320 : size_t len, i;
321 : wchar_t ch;
322 :
323 : int surrogateescape;
324 0 : if (get_surrogateescape(errors, &surrogateescape) < 0) {
325 0 : return -3;
326 : }
327 :
328 0 : len = wcslen(text);
329 :
330 : /* +1 for NULL byte */
331 0 : if (raw_malloc) {
332 0 : result = PyMem_RawMalloc(len + 1);
333 : }
334 : else {
335 0 : result = PyMem_Malloc(len + 1);
336 : }
337 0 : if (result == NULL) {
338 0 : return -1;
339 : }
340 :
341 0 : out = result;
342 0 : for (i=0; i<len; i++) {
343 0 : ch = text[i];
344 :
345 0 : if (ch <= 0x7f) {
346 : /* ASCII character */
347 0 : *out++ = (char)ch;
348 : }
349 0 : else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
350 : /* UTF-8b surrogate */
351 0 : *out++ = (char)(ch - 0xdc00);
352 : }
353 : else {
354 0 : if (raw_malloc) {
355 0 : PyMem_RawFree(result);
356 : }
357 : else {
358 0 : PyMem_Free(result);
359 : }
360 0 : if (error_pos != NULL) {
361 0 : *error_pos = i;
362 : }
363 0 : if (reason) {
364 0 : *reason = "encoding error";
365 : }
366 0 : return -2;
367 : }
368 : }
369 0 : *out = '\0';
370 0 : *str = result;
371 0 : return 0;
372 : }
373 : #else
374 : int
375 : _Py_GetForceASCII(void)
376 : {
377 : return 0;
378 : }
379 :
380 : void
381 : _Py_ResetForceASCII(void)
382 : {
383 : /* nothing to do */
384 : }
385 : #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
386 :
387 :
388 : #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
389 : static int
390 0 : decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
391 : const char **reason, _Py_error_handler errors)
392 : {
393 : wchar_t *res;
394 : unsigned char *in;
395 : wchar_t *out;
396 0 : size_t argsize = strlen(arg) + 1;
397 :
398 : int surrogateescape;
399 0 : if (get_surrogateescape(errors, &surrogateescape) < 0) {
400 0 : return -3;
401 : }
402 :
403 0 : if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
404 0 : return -1;
405 : }
406 0 : res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
407 0 : if (!res) {
408 0 : return -1;
409 : }
410 :
411 0 : out = res;
412 0 : for (in = (unsigned char*)arg; *in; in++) {
413 0 : unsigned char ch = *in;
414 0 : if (ch < 128) {
415 0 : *out++ = ch;
416 : }
417 : else {
418 0 : if (!surrogateescape) {
419 0 : PyMem_RawFree(res);
420 0 : if (wlen) {
421 0 : *wlen = in - (unsigned char*)arg;
422 : }
423 0 : if (reason) {
424 0 : *reason = "decoding error";
425 : }
426 0 : return -2;
427 : }
428 0 : *out++ = 0xdc00 + ch;
429 : }
430 : }
431 0 : *out = 0;
432 :
433 0 : if (wlen != NULL) {
434 0 : *wlen = out - res;
435 : }
436 0 : *wstr = res;
437 0 : return 0;
438 : }
439 : #endif /* !HAVE_MBRTOWC */
440 :
441 : static int
442 2459330 : decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
443 : const char **reason, _Py_error_handler errors)
444 : {
445 : wchar_t *res;
446 : size_t argsize;
447 : size_t count;
448 : #ifdef HAVE_MBRTOWC
449 : unsigned char *in;
450 : wchar_t *out;
451 : mbstate_t mbs;
452 : #endif
453 :
454 : int surrogateescape;
455 2459330 : if (get_surrogateescape(errors, &surrogateescape) < 0) {
456 2 : return -3;
457 : }
458 :
459 : #ifdef HAVE_BROKEN_MBSTOWCS
460 : /* Some platforms have a broken implementation of
461 : * mbstowcs which does not count the characters that
462 : * would result from conversion. Use an upper bound.
463 : */
464 : argsize = strlen(arg);
465 : #else
466 2459330 : argsize = _Py_mbstowcs(NULL, arg, 0);
467 : #endif
468 2459330 : if (argsize != DECODE_ERROR) {
469 2459310 : if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
470 0 : return -1;
471 : }
472 2459310 : res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
473 2459310 : if (!res) {
474 0 : return -1;
475 : }
476 :
477 2459310 : count = _Py_mbstowcs(res, arg, argsize + 1);
478 2459310 : if (count != DECODE_ERROR) {
479 2459310 : *wstr = res;
480 2459310 : if (wlen != NULL) {
481 2451820 : *wlen = count;
482 : }
483 2459310 : return 0;
484 : }
485 0 : PyMem_RawFree(res);
486 : }
487 :
488 : /* Conversion failed. Fall back to escaping with surrogateescape. */
489 : #ifdef HAVE_MBRTOWC
490 : /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
491 :
492 : /* Overallocate; as multi-byte characters are in the argument, the
493 : actual output could use less memory. */
494 22 : argsize = strlen(arg) + 1;
495 22 : if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
496 0 : return -1;
497 : }
498 22 : res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
499 22 : if (!res) {
500 0 : return -1;
501 : }
502 :
503 22 : in = (unsigned char*)arg;
504 22 : out = res;
505 22 : memset(&mbs, 0, sizeof mbs);
506 486 : while (argsize) {
507 486 : size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
508 486 : if (converted == 0) {
509 : /* Reached end of string; null char stored. */
510 18 : break;
511 : }
512 :
513 468 : if (converted == INCOMPLETE_CHARACTER) {
514 : /* Incomplete character. This should never happen,
515 : since we provide everything that we have -
516 : unless there is a bug in the C library, or I
517 : misunderstood how mbrtowc works. */
518 0 : goto decode_error;
519 : }
520 :
521 468 : if (converted == DECODE_ERROR) {
522 109 : if (!surrogateescape) {
523 4 : goto decode_error;
524 : }
525 :
526 : /* Decoding error. Escape as UTF-8b, and start over in the initial
527 : shift state. */
528 105 : *out++ = 0xdc00 + *in++;
529 105 : argsize--;
530 105 : memset(&mbs, 0, sizeof mbs);
531 105 : continue;
532 : }
533 :
534 : // _Py_mbrtowc() reject lone surrogate characters
535 359 : assert(!Py_UNICODE_IS_SURROGATE(*out));
536 :
537 : /* successfully converted some bytes */
538 359 : in += converted;
539 359 : argsize -= converted;
540 359 : out++;
541 : }
542 18 : if (wlen != NULL) {
543 18 : *wlen = out - res;
544 : }
545 18 : *wstr = res;
546 18 : return 0;
547 :
548 4 : decode_error:
549 4 : PyMem_RawFree(res);
550 4 : if (wlen) {
551 4 : *wlen = in - (unsigned char*)arg;
552 : }
553 4 : if (reason) {
554 4 : *reason = "decoding error";
555 : }
556 4 : return -2;
557 : #else /* HAVE_MBRTOWC */
558 : /* Cannot use C locale for escaping; manually escape as if charset
559 : is ASCII (i.e. escape all bytes > 128. This will still roundtrip
560 : correctly in the locale's charset, which must be an ASCII superset. */
561 : return decode_ascii(arg, wstr, wlen, reason, errors);
562 : #endif /* HAVE_MBRTOWC */
563 : }
564 :
565 :
566 : /* Decode a byte string from the locale encoding.
567 :
568 : Use the strict error handler if 'surrogateescape' is zero. Use the
569 : surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
570 : bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
571 : can be decoded as a surrogate character, escape the bytes using the
572 : surrogateescape error handler instead of decoding them.
573 :
574 : On success, return 0 and write the newly allocated wide character string into
575 : *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
576 : the number of wide characters excluding the null character into *wlen.
577 :
578 : On memory allocation failure, return -1.
579 :
580 : On decoding error, return -2. If wlen is not NULL, write the start of
581 : invalid byte sequence in the input string into *wlen. If reason is not NULL,
582 : write the decoding error message into *reason.
583 :
584 : Return -3 if the error handler 'errors' is not supported.
585 :
586 : Use the Py_EncodeLocaleEx() function to encode the character string back to
587 : a byte string. */
588 : int
589 2475030 : _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
590 : const char **reason,
591 : int current_locale, _Py_error_handler errors)
592 : {
593 2475030 : if (current_locale) {
594 : #ifdef _Py_FORCE_UTF8_LOCALE
595 : return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
596 : errors);
597 : #else
598 1396150 : return decode_current_locale(arg, wstr, wlen, reason, errors);
599 : #endif
600 : }
601 :
602 : #ifdef _Py_FORCE_UTF8_FS_ENCODING
603 : return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604 : errors);
605 : #else
606 1078890 : int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
607 : #ifdef MS_WINDOWS
608 : use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
609 : #endif
610 1078890 : if (use_utf8) {
611 23184 : return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
612 : errors);
613 : }
614 :
615 : #ifdef USE_FORCE_ASCII
616 1055700 : if (force_ascii == -1) {
617 5743 : force_ascii = check_force_ascii();
618 : }
619 :
620 1055700 : if (force_ascii) {
621 : /* force ASCII encoding to workaround mbstowcs() issue */
622 0 : return decode_ascii(arg, wstr, wlen, reason, errors);
623 : }
624 : #endif
625 :
626 1055700 : return decode_current_locale(arg, wstr, wlen, reason, errors);
627 : #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
628 : }
629 :
630 :
631 : /* Decode a byte string from the locale encoding with the
632 : surrogateescape error handler: undecodable bytes are decoded as characters
633 : in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
634 : character, escape the bytes using the surrogateescape error handler instead
635 : of decoding them.
636 :
637 : Return a pointer to a newly allocated wide character string, use
638 : PyMem_RawFree() to free the memory. If size is not NULL, write the number of
639 : wide characters excluding the null character into *size
640 :
641 : Return NULL on decoding error or memory allocation error. If *size* is not
642 : NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
643 : decoding error.
644 :
645 : Decoding errors should never happen, unless there is a bug in the C
646 : library.
647 :
648 : Use the Py_EncodeLocale() function to encode the character string back to a
649 : byte string. */
650 : wchar_t*
651 54342 : Py_DecodeLocale(const char* arg, size_t *wlen)
652 : {
653 : wchar_t *wstr;
654 54342 : int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
655 : NULL, 0,
656 : _Py_ERROR_SURROGATEESCAPE);
657 54342 : if (res != 0) {
658 0 : assert(res != -3);
659 0 : if (wlen != NULL) {
660 0 : *wlen = (size_t)res;
661 : }
662 0 : return NULL;
663 : }
664 54342 : return wstr;
665 : }
666 :
667 :
668 : static int
669 112919 : encode_current_locale(const wchar_t *text, char **str,
670 : size_t *error_pos, const char **reason,
671 : int raw_malloc, _Py_error_handler errors)
672 : {
673 112919 : const size_t len = wcslen(text);
674 112919 : char *result = NULL, *bytes = NULL;
675 : size_t i, size, converted;
676 : wchar_t c, buf[2];
677 :
678 : int surrogateescape;
679 112919 : if (get_surrogateescape(errors, &surrogateescape) < 0) {
680 2 : return -3;
681 : }
682 :
683 : /* The function works in two steps:
684 : 1. compute the length of the output buffer in bytes (size)
685 : 2. outputs the bytes */
686 112917 : size = 0;
687 112917 : buf[1] = 0;
688 : while (1) {
689 11800000 : for (i=0; i < len; i++) {
690 11574200 : c = text[i];
691 11574200 : if (c >= 0xdc80 && c <= 0xdcff) {
692 5 : if (!surrogateescape) {
693 1 : goto encode_error;
694 : }
695 : /* UTF-8b surrogate */
696 4 : if (bytes != NULL) {
697 2 : *bytes++ = c - 0xdc00;
698 2 : size--;
699 : }
700 : else {
701 2 : size++;
702 : }
703 4 : continue;
704 : }
705 : else {
706 11574200 : buf[0] = c;
707 11574200 : if (bytes != NULL) {
708 5787100 : converted = wcstombs(bytes, buf, size);
709 : }
710 : else {
711 5787110 : converted = wcstombs(NULL, buf, 0);
712 : }
713 11574200 : if (converted == DECODE_ERROR) {
714 0 : goto encode_error;
715 : }
716 11574200 : if (bytes != NULL) {
717 5787100 : bytes += converted;
718 5787100 : size -= converted;
719 : }
720 : else {
721 5787110 : size += converted;
722 : }
723 : }
724 : }
725 225832 : if (result != NULL) {
726 112916 : *bytes = '\0';
727 112916 : break;
728 : }
729 :
730 112916 : size += 1; /* nul byte at the end */
731 112916 : if (raw_malloc) {
732 112916 : result = PyMem_RawMalloc(size);
733 : }
734 : else {
735 0 : result = PyMem_Malloc(size);
736 : }
737 112916 : if (result == NULL) {
738 0 : return -1;
739 : }
740 112916 : bytes = result;
741 : }
742 112916 : *str = result;
743 112916 : return 0;
744 :
745 1 : encode_error:
746 1 : if (raw_malloc) {
747 1 : PyMem_RawFree(result);
748 : }
749 : else {
750 0 : PyMem_Free(result);
751 : }
752 1 : if (error_pos != NULL) {
753 1 : *error_pos = i;
754 : }
755 1 : if (reason) {
756 1 : *reason = "encoding error";
757 : }
758 1 : return -2;
759 : }
760 :
761 :
762 : /* Encode a string to the locale encoding.
763 :
764 : Parameters:
765 :
766 : * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
767 : of PyMem_Malloc().
768 : * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
769 : Python filesystem encoding.
770 : * errors: error handler like "strict" or "surrogateescape".
771 :
772 : Return value:
773 :
774 : 0: success, *str is set to a newly allocated decoded string.
775 : -1: memory allocation failure
776 : -2: encoding error, set *error_pos and *reason (if set).
777 : -3: the error handler 'errors' is not supported.
778 : */
779 : static int
780 115670 : encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
781 : const char **reason,
782 : int raw_malloc, int current_locale, _Py_error_handler errors)
783 : {
784 115670 : if (current_locale) {
785 : #ifdef _Py_FORCE_UTF8_LOCALE
786 : return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
787 : raw_malloc, errors);
788 : #else
789 1392 : return encode_current_locale(text, str, error_pos, reason,
790 : raw_malloc, errors);
791 : #endif
792 : }
793 :
794 : #ifdef _Py_FORCE_UTF8_FS_ENCODING
795 : return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
796 : raw_malloc, errors);
797 : #else
798 114278 : int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
799 : #ifdef MS_WINDOWS
800 : use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
801 : #endif
802 114278 : if (use_utf8) {
803 2751 : return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
804 : raw_malloc, errors);
805 : }
806 :
807 : #ifdef USE_FORCE_ASCII
808 111527 : if (force_ascii == -1) {
809 0 : force_ascii = check_force_ascii();
810 : }
811 :
812 111527 : if (force_ascii) {
813 0 : return encode_ascii(text, str, error_pos, reason,
814 : raw_malloc, errors);
815 : }
816 : #endif
817 :
818 111527 : return encode_current_locale(text, str, error_pos, reason,
819 : raw_malloc, errors);
820 : #endif /* _Py_FORCE_UTF8_FS_ENCODING */
821 : }
822 :
823 : static char*
824 22813 : encode_locale(const wchar_t *text, size_t *error_pos,
825 : int raw_malloc, int current_locale)
826 : {
827 : char *str;
828 22813 : int res = encode_locale_ex(text, &str, error_pos, NULL,
829 : raw_malloc, current_locale,
830 : _Py_ERROR_SURROGATEESCAPE);
831 22813 : if (res != -2 && error_pos) {
832 0 : *error_pos = (size_t)-1;
833 : }
834 22813 : if (res != 0) {
835 0 : return NULL;
836 : }
837 22813 : return str;
838 : }
839 :
840 : /* Encode a wide character string to the locale encoding with the
841 : surrogateescape error handler: surrogate characters in the range
842 : U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
843 :
844 : Return a pointer to a newly allocated byte string, use PyMem_Free() to free
845 : the memory. Return NULL on encoding or memory allocation error.
846 :
847 : If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
848 : to the index of the invalid character on encoding error.
849 :
850 : Use the Py_DecodeLocale() function to decode the bytes string back to a wide
851 : character string. */
852 : char*
853 0 : Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
854 : {
855 0 : return encode_locale(text, error_pos, 0, 0);
856 : }
857 :
858 :
859 : /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
860 : instead of PyMem_Free(). */
861 : char*
862 22813 : _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
863 : {
864 22813 : return encode_locale(text, error_pos, 1, 0);
865 : }
866 :
867 :
868 : int
869 92857 : _Py_EncodeLocaleEx(const wchar_t *text, char **str,
870 : size_t *error_pos, const char **reason,
871 : int current_locale, _Py_error_handler errors)
872 : {
873 92857 : return encode_locale_ex(text, str, error_pos, reason, 1,
874 : current_locale, errors);
875 : }
876 :
877 :
878 : // Get the current locale encoding name:
879 : //
880 : // - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
881 : // - Return "utf-8" if the UTF-8 Mode is enabled
882 : // - On Windows, return the ANSI code page (ex: "cp1250")
883 : // - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
884 : // - Otherwise, return nl_langinfo(CODESET).
885 : //
886 : // Return NULL on memory allocation failure.
887 : //
888 : // See also config_get_locale_encoding()
889 : wchar_t*
890 7483 : _Py_GetLocaleEncoding(void)
891 : {
892 : #ifdef _Py_FORCE_UTF8_LOCALE
893 : // On Android langinfo.h and CODESET are missing,
894 : // and UTF-8 is always used in mbstowcs() and wcstombs().
895 : return _PyMem_RawWcsdup(L"utf-8");
896 : #else
897 :
898 : #ifdef MS_WINDOWS
899 : wchar_t encoding[23];
900 : unsigned int ansi_codepage = GetACP();
901 : swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
902 : encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
903 : return _PyMem_RawWcsdup(encoding);
904 : #else
905 7483 : const char *encoding = nl_langinfo(CODESET);
906 7483 : if (!encoding || encoding[0] == '\0') {
907 : // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
908 : // macOS if the LC_CTYPE locale is not supported.
909 0 : return _PyMem_RawWcsdup(L"utf-8");
910 : }
911 :
912 : wchar_t *wstr;
913 7483 : int res = decode_current_locale(encoding, &wstr, NULL,
914 : NULL, _Py_ERROR_SURROGATEESCAPE);
915 7483 : if (res < 0) {
916 0 : return NULL;
917 : }
918 7483 : return wstr;
919 : #endif // !MS_WINDOWS
920 :
921 : #endif // !_Py_FORCE_UTF8_LOCALE
922 : }
923 :
924 :
925 : PyObject *
926 1870 : _Py_GetLocaleEncodingObject(void)
927 : {
928 1870 : wchar_t *encoding = _Py_GetLocaleEncoding();
929 1870 : if (encoding == NULL) {
930 0 : PyErr_NoMemory();
931 0 : return NULL;
932 : }
933 :
934 1870 : PyObject *str = PyUnicode_FromWideChar(encoding, -1);
935 1870 : PyMem_RawFree(encoding);
936 1870 : return str;
937 : }
938 :
939 : #ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
940 :
941 : /* Check whether current locale uses Unicode as internal wchar_t form. */
942 : int
943 : _Py_LocaleUsesNonUnicodeWchar(void)
944 : {
945 : /* Oracle Solaris uses non-Unicode internal wchar_t form for
946 : non-Unicode locales and hence needs conversion to UTF first. */
947 : char* codeset = nl_langinfo(CODESET);
948 : if (!codeset) {
949 : return 0;
950 : }
951 : /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
952 : return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
953 : }
954 :
955 : static wchar_t *
956 : _Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
957 : const char *tocode, const char *fromcode)
958 : {
959 : static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
960 :
961 : /* Ensure we won't overflow the size. */
962 : if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
963 : PyErr_NoMemory();
964 : return NULL;
965 : }
966 :
967 : /* the string doesn't have to be NULL terminated */
968 : wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
969 : if (target == NULL) {
970 : PyErr_NoMemory();
971 : return NULL;
972 : }
973 :
974 : iconv_t cd = iconv_open(tocode, fromcode);
975 : if (cd == (iconv_t)-1) {
976 : PyErr_Format(PyExc_ValueError, "iconv_open() failed");
977 : PyMem_Free(target);
978 : return NULL;
979 : }
980 :
981 : char *inbuf = (char *) source;
982 : char *outbuf = (char *) target;
983 : size_t inbytesleft = sizeof(wchar_t) * size;
984 : size_t outbytesleft = inbytesleft;
985 :
986 : size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
987 : if (ret == DECODE_ERROR) {
988 : PyErr_Format(PyExc_ValueError, "iconv() failed");
989 : PyMem_Free(target);
990 : iconv_close(cd);
991 : return NULL;
992 : }
993 :
994 : iconv_close(cd);
995 : return target;
996 : }
997 :
998 : /* Convert a wide character string to the UCS-4 encoded string. This
999 : is necessary on systems where internal form of wchar_t are not Unicode
1000 : code points (e.g. Oracle Solaris).
1001 :
1002 : Return a pointer to a newly allocated string, use PyMem_Free() to free
1003 : the memory. Return NULL and raise exception on conversion or memory
1004 : allocation error. */
1005 : wchar_t *
1006 : _Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1007 : {
1008 : return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1009 : }
1010 :
1011 : /* Convert a UCS-4 encoded string to native wide character string. This
1012 : is necessary on systems where internal form of wchar_t are not Unicode
1013 : code points (e.g. Oracle Solaris).
1014 :
1015 : The conversion is done in place. This can be done because both wchar_t
1016 : and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1017 : to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1018 : which is currently the only system using these functions; it doesn't have
1019 : to be for other systems).
1020 :
1021 : Return 0 on success. Return -1 and raise exception on conversion
1022 : or memory allocation error. */
1023 : int
1024 : _Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1025 : {
1026 : wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1027 : if (!result) {
1028 : return -1;
1029 : }
1030 : memcpy(unicode, result, size * sizeof(wchar_t));
1031 : PyMem_Free(result);
1032 : return 0;
1033 : }
1034 : #endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1035 :
1036 : #ifdef MS_WINDOWS
1037 : static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1038 :
1039 : static void
1040 : FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1041 : {
1042 : /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1043 : /* Cannot simply cast and dereference in_ptr,
1044 : since it might not be aligned properly */
1045 : __int64 in;
1046 : memcpy(&in, in_ptr, sizeof(in));
1047 : *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1048 : *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1049 : }
1050 :
1051 : void
1052 : _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1053 : {
1054 : /* XXX endianness */
1055 : __int64 out;
1056 : out = time_in + secs_between_epochs;
1057 : out = out * 10000000 + nsec_in / 100;
1058 : memcpy(out_ptr, &out, sizeof(out));
1059 : }
1060 :
1061 : /* Below, we *know* that ugo+r is 0444 */
1062 : #if _S_IREAD != 0400
1063 : #error Unsupported C library
1064 : #endif
1065 : static int
1066 : attributes_to_mode(DWORD attr)
1067 : {
1068 : int m = 0;
1069 : if (attr & FILE_ATTRIBUTE_DIRECTORY)
1070 : m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1071 : else
1072 : m |= _S_IFREG;
1073 : if (attr & FILE_ATTRIBUTE_READONLY)
1074 : m |= 0444;
1075 : else
1076 : m |= 0666;
1077 : return m;
1078 : }
1079 :
1080 : void
1081 : _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1082 : struct _Py_stat_struct *result)
1083 : {
1084 : memset(result, 0, sizeof(*result));
1085 : result->st_mode = attributes_to_mode(info->dwFileAttributes);
1086 : result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1087 : result->st_dev = info->dwVolumeSerialNumber;
1088 : result->st_rdev = result->st_dev;
1089 : FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
1090 : FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1091 : FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1092 : result->st_nlink = info->nNumberOfLinks;
1093 : result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1094 : /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1095 : open other name surrogate reparse points without traversing them. To
1096 : detect/handle these, check st_file_attributes and st_reparse_tag. */
1097 : result->st_reparse_tag = reparse_tag;
1098 : if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1099 : reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1100 : /* first clear the S_IFMT bits */
1101 : result->st_mode ^= (result->st_mode & S_IFMT);
1102 : /* now set the bits that make this a symlink */
1103 : result->st_mode |= S_IFLNK;
1104 : }
1105 : result->st_file_attributes = info->dwFileAttributes;
1106 : }
1107 : #endif
1108 :
1109 : /* Return information about a file.
1110 :
1111 : On POSIX, use fstat().
1112 :
1113 : On Windows, use GetFileType() and GetFileInformationByHandle() which support
1114 : files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1115 : than 2 GiB because the file size type is a signed 32-bit integer: see issue
1116 : #23152.
1117 :
1118 : On Windows, set the last Windows error and return nonzero on error. On
1119 : POSIX, set errno and return nonzero on error. Fill status and return 0 on
1120 : success. */
1121 : int
1122 548270 : _Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1123 : {
1124 : #ifdef MS_WINDOWS
1125 : BY_HANDLE_FILE_INFORMATION info;
1126 : HANDLE h;
1127 : int type;
1128 :
1129 : h = _Py_get_osfhandle_noraise(fd);
1130 :
1131 : if (h == INVALID_HANDLE_VALUE) {
1132 : /* errno is already set by _get_osfhandle, but we also set
1133 : the Win32 error for callers who expect that */
1134 : SetLastError(ERROR_INVALID_HANDLE);
1135 : return -1;
1136 : }
1137 : memset(status, 0, sizeof(*status));
1138 :
1139 : type = GetFileType(h);
1140 : if (type == FILE_TYPE_UNKNOWN) {
1141 : DWORD error = GetLastError();
1142 : if (error != 0) {
1143 : errno = winerror_to_errno(error);
1144 : return -1;
1145 : }
1146 : /* else: valid but unknown file */
1147 : }
1148 :
1149 : if (type != FILE_TYPE_DISK) {
1150 : if (type == FILE_TYPE_CHAR)
1151 : status->st_mode = _S_IFCHR;
1152 : else if (type == FILE_TYPE_PIPE)
1153 : status->st_mode = _S_IFIFO;
1154 : return 0;
1155 : }
1156 :
1157 : if (!GetFileInformationByHandle(h, &info)) {
1158 : /* The Win32 error is already set, but we also set errno for
1159 : callers who expect it */
1160 : errno = winerror_to_errno(GetLastError());
1161 : return -1;
1162 : }
1163 :
1164 : _Py_attribute_data_to_stat(&info, 0, status);
1165 : /* specific to fstat() */
1166 : status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow;
1167 : return 0;
1168 : #else
1169 548270 : return fstat(fd, status);
1170 : #endif
1171 : }
1172 :
1173 : /* Return information about a file.
1174 :
1175 : On POSIX, use fstat().
1176 :
1177 : On Windows, use GetFileType() and GetFileInformationByHandle() which support
1178 : files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger
1179 : than 2 GiB because the file size type is a signed 32-bit integer: see issue
1180 : #23152.
1181 :
1182 : Raise an exception and return -1 on error. On Windows, set the last Windows
1183 : error on error. On POSIX, set errno on error. Fill status and return 0 on
1184 : success.
1185 :
1186 : Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1187 : to call fstat(). The caller must hold the GIL. */
1188 : int
1189 342 : _Py_fstat(int fd, struct _Py_stat_struct *status)
1190 : {
1191 : int res;
1192 :
1193 342 : assert(PyGILState_Check());
1194 :
1195 342 : Py_BEGIN_ALLOW_THREADS
1196 342 : res = _Py_fstat_noraise(fd, status);
1197 342 : Py_END_ALLOW_THREADS
1198 :
1199 342 : if (res != 0) {
1200 : #ifdef MS_WINDOWS
1201 : PyErr_SetFromWindowsErr(0);
1202 : #else
1203 2 : PyErr_SetFromErrno(PyExc_OSError);
1204 : #endif
1205 2 : return -1;
1206 : }
1207 340 : return 0;
1208 : }
1209 :
1210 : /* Like _Py_stat() but with a raw filename. */
1211 : int
1212 3337 : _Py_wstat(const wchar_t* path, struct stat *buf)
1213 : {
1214 : int err;
1215 : #ifdef MS_WINDOWS
1216 : struct _stat wstatbuf;
1217 : err = _wstat(path, &wstatbuf);
1218 : if (!err) {
1219 : buf->st_mode = wstatbuf.st_mode;
1220 : }
1221 : #else
1222 : char *fname;
1223 3337 : fname = _Py_EncodeLocaleRaw(path, NULL);
1224 3337 : if (fname == NULL) {
1225 0 : errno = EINVAL;
1226 0 : return -1;
1227 : }
1228 3337 : err = stat(fname, buf);
1229 3337 : PyMem_RawFree(fname);
1230 : #endif
1231 3337 : return err;
1232 : }
1233 :
1234 :
1235 : /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1236 : call stat() otherwise. Only fill st_mode attribute on Windows.
1237 :
1238 : Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1239 : raised. */
1240 :
1241 : int
1242 0 : _Py_stat(PyObject *path, struct stat *statbuf)
1243 : {
1244 : #ifdef MS_WINDOWS
1245 : int err;
1246 :
1247 : wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1248 : if (wpath == NULL)
1249 : return -2;
1250 :
1251 : err = _Py_wstat(wpath, statbuf);
1252 : PyMem_Free(wpath);
1253 : return err;
1254 : #else
1255 : int ret;
1256 : PyObject *bytes;
1257 : char *cpath;
1258 :
1259 0 : bytes = PyUnicode_EncodeFSDefault(path);
1260 0 : if (bytes == NULL)
1261 0 : return -2;
1262 :
1263 : /* check for embedded null bytes */
1264 0 : if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1265 0 : Py_DECREF(bytes);
1266 0 : return -2;
1267 : }
1268 :
1269 0 : ret = stat(cpath, statbuf);
1270 0 : Py_DECREF(bytes);
1271 0 : return ret;
1272 : #endif
1273 : }
1274 :
1275 :
1276 : /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1277 : static int
1278 3055 : get_inheritable(int fd, int raise)
1279 : {
1280 : #ifdef MS_WINDOWS
1281 : HANDLE handle;
1282 : DWORD flags;
1283 :
1284 : handle = _Py_get_osfhandle_noraise(fd);
1285 : if (handle == INVALID_HANDLE_VALUE) {
1286 : if (raise)
1287 : PyErr_SetFromErrno(PyExc_OSError);
1288 : return -1;
1289 : }
1290 :
1291 : if (!GetHandleInformation(handle, &flags)) {
1292 : if (raise)
1293 : PyErr_SetFromWindowsErr(0);
1294 : return -1;
1295 : }
1296 :
1297 : return (flags & HANDLE_FLAG_INHERIT);
1298 : #else
1299 : int flags;
1300 :
1301 3055 : flags = fcntl(fd, F_GETFD, 0);
1302 3055 : if (flags == -1) {
1303 2 : if (raise)
1304 2 : PyErr_SetFromErrno(PyExc_OSError);
1305 2 : return -1;
1306 : }
1307 3053 : return !(flags & FD_CLOEXEC);
1308 : #endif
1309 : }
1310 :
1311 : /* Get the inheritable flag of the specified file descriptor.
1312 : Return 1 if the file descriptor can be inherited, 0 if it cannot,
1313 : raise an exception and return -1 on error. */
1314 : int
1315 130 : _Py_get_inheritable(int fd)
1316 : {
1317 130 : return get_inheritable(fd, 1);
1318 : }
1319 :
1320 :
1321 : /* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1322 : static int
1323 483230 : set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1324 : {
1325 : #ifdef MS_WINDOWS
1326 : HANDLE handle;
1327 : DWORD flags;
1328 : #else
1329 : #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1330 : static int ioctl_works = -1;
1331 : int request;
1332 : int err;
1333 : #endif
1334 : int flags, new_flags;
1335 : int res;
1336 : #endif
1337 :
1338 : /* atomic_flag_works can only be used to make the file descriptor
1339 : non-inheritable */
1340 483230 : assert(!(atomic_flag_works != NULL && inheritable));
1341 :
1342 483230 : if (atomic_flag_works != NULL && !inheritable) {
1343 474355 : if (*atomic_flag_works == -1) {
1344 2925 : int isInheritable = get_inheritable(fd, raise);
1345 2925 : if (isInheritable == -1)
1346 0 : return -1;
1347 2925 : *atomic_flag_works = !isInheritable;
1348 : }
1349 :
1350 474355 : if (*atomic_flag_works)
1351 474355 : return 0;
1352 : }
1353 :
1354 : #ifdef MS_WINDOWS
1355 : handle = _Py_get_osfhandle_noraise(fd);
1356 : if (handle == INVALID_HANDLE_VALUE) {
1357 : if (raise)
1358 : PyErr_SetFromErrno(PyExc_OSError);
1359 : return -1;
1360 : }
1361 :
1362 : if (inheritable)
1363 : flags = HANDLE_FLAG_INHERIT;
1364 : else
1365 : flags = 0;
1366 :
1367 : /* This check can be removed once support for Windows 7 ends. */
1368 : #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \
1369 : GetFileType(handle) == FILE_TYPE_CHAR)
1370 :
1371 : if (!CONSOLE_PSEUDOHANDLE(handle) &&
1372 : !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1373 : if (raise)
1374 : PyErr_SetFromWindowsErr(0);
1375 : return -1;
1376 : }
1377 : #undef CONSOLE_PSEUDOHANDLE
1378 : return 0;
1379 :
1380 : #else
1381 :
1382 : #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1383 8875 : if (ioctl_works != 0 && raise != 0) {
1384 : /* fast-path: ioctl() only requires one syscall */
1385 : /* caveat: raise=0 is an indicator that we must be async-signal-safe
1386 : * thus avoid using ioctl() so we skip the fast-path. */
1387 2558 : if (inheritable)
1388 57 : request = FIONCLEX;
1389 : else
1390 2501 : request = FIOCLEX;
1391 2558 : err = ioctl(fd, request, NULL);
1392 2558 : if (!err) {
1393 2553 : ioctl_works = 1;
1394 2553 : return 0;
1395 : }
1396 :
1397 : #ifdef O_PATH
1398 5 : if (errno == EBADF) {
1399 : // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1400 : // on O_PATH file descriptors. Fall through to the fcntl()
1401 : // implementation.
1402 : }
1403 : else
1404 : #endif
1405 0 : if (errno != ENOTTY && errno != EACCES) {
1406 0 : if (raise)
1407 0 : PyErr_SetFromErrno(PyExc_OSError);
1408 0 : return -1;
1409 : }
1410 : else {
1411 : /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1412 : device". The ioctl is declared but not supported by the kernel.
1413 : Remember that ioctl() doesn't work. It is the case on
1414 : Illumos-based OS for example.
1415 :
1416 : Issue #27057: When SELinux policy disallows ioctl it will fail
1417 : with EACCES. While FIOCLEX is safe operation it may be
1418 : unavailable because ioctl was denied altogether.
1419 : This can be the case on Android. */
1420 0 : ioctl_works = 0;
1421 : }
1422 : /* fallback to fcntl() if ioctl() does not work */
1423 : }
1424 : #endif
1425 :
1426 : /* slow-path: fcntl() requires two syscalls */
1427 6322 : flags = fcntl(fd, F_GETFD);
1428 6322 : if (flags < 0) {
1429 3 : if (raise)
1430 3 : PyErr_SetFromErrno(PyExc_OSError);
1431 3 : return -1;
1432 : }
1433 :
1434 6319 : if (inheritable) {
1435 3111 : new_flags = flags & ~FD_CLOEXEC;
1436 : }
1437 : else {
1438 3208 : new_flags = flags | FD_CLOEXEC;
1439 : }
1440 :
1441 6319 : if (new_flags == flags) {
1442 : /* FD_CLOEXEC flag already set/cleared: nothing to do */
1443 143 : return 0;
1444 : }
1445 :
1446 6176 : res = fcntl(fd, F_SETFD, new_flags);
1447 6176 : if (res < 0) {
1448 0 : if (raise)
1449 0 : PyErr_SetFromErrno(PyExc_OSError);
1450 0 : return -1;
1451 : }
1452 6176 : return 0;
1453 : #endif
1454 : }
1455 :
1456 : /* Make the file descriptor non-inheritable.
1457 : Return 0 on success, set errno and return -1 on error. */
1458 : static int
1459 3183 : make_non_inheritable(int fd)
1460 : {
1461 3183 : return set_inheritable(fd, 0, 0, NULL);
1462 : }
1463 :
1464 : /* Set the inheritable flag of the specified file descriptor.
1465 : On success: return 0, on error: raise an exception and return -1.
1466 :
1467 : If atomic_flag_works is not NULL:
1468 :
1469 : * if *atomic_flag_works==-1, check if the inheritable is set on the file
1470 : descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1471 : set the inheritable flag
1472 : * if *atomic_flag_works==1: do nothing
1473 : * if *atomic_flag_works==0: set inheritable flag to False
1474 :
1475 : Set atomic_flag_works to NULL if no atomic flag was used to create the
1476 : file descriptor.
1477 :
1478 : atomic_flag_works can only be used to make a file descriptor
1479 : non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1480 : int
1481 470808 : _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1482 : {
1483 470808 : return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1484 : }
1485 :
1486 : /* Same as _Py_set_inheritable() but on error, set errno and
1487 : don't raise an exception.
1488 : This function is async-signal-safe. */
1489 : int
1490 3134 : _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1491 : {
1492 3134 : return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1493 : }
1494 :
1495 : static int
1496 5636 : _Py_open_impl(const char *pathname, int flags, int gil_held)
1497 : {
1498 : int fd;
1499 5636 : int async_err = 0;
1500 : #ifndef MS_WINDOWS
1501 : int *atomic_flag_works;
1502 : #endif
1503 :
1504 : #ifdef MS_WINDOWS
1505 : flags |= O_NOINHERIT;
1506 : #elif defined(O_CLOEXEC)
1507 5636 : atomic_flag_works = &_Py_open_cloexec_works;
1508 5636 : flags |= O_CLOEXEC;
1509 : #else
1510 : atomic_flag_works = NULL;
1511 : #endif
1512 :
1513 5636 : if (gil_held) {
1514 0 : PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1515 0 : if (pathname_obj == NULL) {
1516 0 : return -1;
1517 : }
1518 0 : if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1519 0 : Py_DECREF(pathname_obj);
1520 0 : return -1;
1521 : }
1522 :
1523 : do {
1524 0 : Py_BEGIN_ALLOW_THREADS
1525 0 : fd = open(pathname, flags);
1526 0 : Py_END_ALLOW_THREADS
1527 : } while (fd < 0
1528 0 : && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1529 0 : if (async_err) {
1530 0 : Py_DECREF(pathname_obj);
1531 0 : return -1;
1532 : }
1533 0 : if (fd < 0) {
1534 0 : PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1535 0 : Py_DECREF(pathname_obj);
1536 0 : return -1;
1537 : }
1538 0 : Py_DECREF(pathname_obj);
1539 : }
1540 : else {
1541 5636 : fd = open(pathname, flags);
1542 5636 : if (fd < 0)
1543 0 : return -1;
1544 : }
1545 :
1546 : #ifndef MS_WINDOWS
1547 5636 : if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1548 0 : close(fd);
1549 0 : return -1;
1550 : }
1551 : #endif
1552 :
1553 5636 : return fd;
1554 : }
1555 :
1556 : /* Open a file with the specified flags (wrapper to open() function).
1557 : Return a file descriptor on success. Raise an exception and return -1 on
1558 : error.
1559 :
1560 : The file descriptor is created non-inheritable.
1561 :
1562 : When interrupted by a signal (open() fails with EINTR), retry the syscall,
1563 : except if the Python signal handler raises an exception.
1564 :
1565 : Release the GIL to call open(). The caller must hold the GIL. */
1566 : int
1567 0 : _Py_open(const char *pathname, int flags)
1568 : {
1569 : /* _Py_open() must be called with the GIL held. */
1570 0 : assert(PyGILState_Check());
1571 0 : return _Py_open_impl(pathname, flags, 1);
1572 : }
1573 :
1574 : /* Open a file with the specified flags (wrapper to open() function).
1575 : Return a file descriptor on success. Set errno and return -1 on error.
1576 :
1577 : The file descriptor is created non-inheritable.
1578 :
1579 : If interrupted by a signal, fail with EINTR. */
1580 : int
1581 5636 : _Py_open_noraise(const char *pathname, int flags)
1582 : {
1583 5636 : return _Py_open_impl(pathname, flags, 0);
1584 : }
1585 :
1586 : /* Open a file. Use _wfopen() on Windows, encode the path to the locale
1587 : encoding and use fopen() otherwise.
1588 :
1589 : The file descriptor is created non-inheritable.
1590 :
1591 : If interrupted by a signal, fail with EINTR. */
1592 : FILE *
1593 15778 : _Py_wfopen(const wchar_t *path, const wchar_t *mode)
1594 : {
1595 : FILE *f;
1596 15778 : if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1597 0 : return NULL;
1598 : }
1599 : #ifndef MS_WINDOWS
1600 : char *cpath;
1601 : char cmode[10];
1602 : size_t r;
1603 15778 : r = wcstombs(cmode, mode, 10);
1604 15778 : if (r == DECODE_ERROR || r >= 10) {
1605 0 : errno = EINVAL;
1606 0 : return NULL;
1607 : }
1608 15778 : cpath = _Py_EncodeLocaleRaw(path, NULL);
1609 15778 : if (cpath == NULL) {
1610 0 : return NULL;
1611 : }
1612 15778 : f = fopen(cpath, cmode);
1613 15778 : PyMem_RawFree(cpath);
1614 : #else
1615 : f = _wfopen(path, mode);
1616 : #endif
1617 15778 : if (f == NULL)
1618 12595 : return NULL;
1619 3183 : if (make_non_inheritable(fileno(f)) < 0) {
1620 0 : fclose(f);
1621 0 : return NULL;
1622 : }
1623 3183 : return f;
1624 : }
1625 :
1626 :
1627 : /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem
1628 : encoding and call fopen() otherwise.
1629 :
1630 : Return the new file object on success. Raise an exception and return NULL
1631 : on error.
1632 :
1633 : The file descriptor is created non-inheritable.
1634 :
1635 : When interrupted by a signal (open() fails with EINTR), retry the syscall,
1636 : except if the Python signal handler raises an exception.
1637 :
1638 : Release the GIL to call _wfopen() or fopen(). The caller must hold
1639 : the GIL. */
1640 : FILE*
1641 1440 : _Py_fopen_obj(PyObject *path, const char *mode)
1642 : {
1643 : FILE *f;
1644 1440 : int async_err = 0;
1645 : #ifdef MS_WINDOWS
1646 : wchar_t wmode[10];
1647 : int usize;
1648 :
1649 : assert(PyGILState_Check());
1650 :
1651 : if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1652 : return NULL;
1653 : }
1654 : if (!PyUnicode_Check(path)) {
1655 : PyErr_Format(PyExc_TypeError,
1656 : "str file path expected under Windows, got %R",
1657 : Py_TYPE(path));
1658 : return NULL;
1659 : }
1660 :
1661 : wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1662 : if (wpath == NULL)
1663 : return NULL;
1664 :
1665 : usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1666 : wmode, Py_ARRAY_LENGTH(wmode));
1667 : if (usize == 0) {
1668 : PyErr_SetFromWindowsErr(0);
1669 : PyMem_Free(wpath);
1670 : return NULL;
1671 : }
1672 :
1673 : do {
1674 : Py_BEGIN_ALLOW_THREADS
1675 : f = _wfopen(wpath, wmode);
1676 : Py_END_ALLOW_THREADS
1677 : } while (f == NULL
1678 : && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1679 : PyMem_Free(wpath);
1680 : #else
1681 : PyObject *bytes;
1682 : const char *path_bytes;
1683 :
1684 1440 : assert(PyGILState_Check());
1685 :
1686 1440 : if (!PyUnicode_FSConverter(path, &bytes))
1687 2 : return NULL;
1688 1438 : path_bytes = PyBytes_AS_STRING(bytes);
1689 :
1690 1438 : if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1691 1 : Py_DECREF(bytes);
1692 1 : return NULL;
1693 : }
1694 :
1695 : do {
1696 1437 : Py_BEGIN_ALLOW_THREADS
1697 1437 : f = fopen(path_bytes, mode);
1698 1437 : Py_END_ALLOW_THREADS
1699 : } while (f == NULL
1700 1437 : && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1701 :
1702 1437 : Py_DECREF(bytes);
1703 : #endif
1704 1437 : if (async_err)
1705 0 : return NULL;
1706 :
1707 1437 : if (f == NULL) {
1708 968 : PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1709 968 : return NULL;
1710 : }
1711 :
1712 469 : if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1713 0 : fclose(f);
1714 0 : return NULL;
1715 : }
1716 469 : return f;
1717 : }
1718 :
1719 : /* Read count bytes from fd into buf.
1720 :
1721 : On success, return the number of read bytes, it can be lower than count.
1722 : If the current file offset is at or past the end of file, no bytes are read,
1723 : and read() returns zero.
1724 :
1725 : On error, raise an exception, set errno and return -1.
1726 :
1727 : When interrupted by a signal (read() fails with EINTR), retry the syscall.
1728 : If the Python signal handler raises an exception, the function returns -1
1729 : (the syscall is not retried).
1730 :
1731 : Release the GIL to call read(). The caller must hold the GIL. */
1732 : Py_ssize_t
1733 793158 : _Py_read(int fd, void *buf, size_t count)
1734 : {
1735 : Py_ssize_t n;
1736 : int err;
1737 793158 : int async_err = 0;
1738 :
1739 793158 : assert(PyGILState_Check());
1740 :
1741 : /* _Py_read() must not be called with an exception set, otherwise the
1742 : * caller may think that read() was interrupted by a signal and the signal
1743 : * handler raised an exception. */
1744 793158 : assert(!PyErr_Occurred());
1745 :
1746 793158 : if (count > _PY_READ_MAX) {
1747 0 : count = _PY_READ_MAX;
1748 : }
1749 :
1750 : _Py_BEGIN_SUPPRESS_IPH
1751 : do {
1752 793197 : Py_BEGIN_ALLOW_THREADS
1753 793197 : errno = 0;
1754 : #ifdef MS_WINDOWS
1755 : n = read(fd, buf, (int)count);
1756 : #else
1757 793197 : n = read(fd, buf, count);
1758 : #endif
1759 : /* save/restore errno because PyErr_CheckSignals()
1760 : * and PyErr_SetFromErrno() can modify it */
1761 793195 : err = errno;
1762 793195 : Py_END_ALLOW_THREADS
1763 125 : } while (n < 0 && err == EINTR &&
1764 793238 : !(async_err = PyErr_CheckSignals()));
1765 : _Py_END_SUPPRESS_IPH
1766 :
1767 793156 : if (async_err) {
1768 : /* read() was interrupted by a signal (failed with EINTR)
1769 : * and the Python signal handler raised an exception */
1770 4 : errno = err;
1771 4 : assert(errno == EINTR && PyErr_Occurred());
1772 4 : return -1;
1773 : }
1774 793152 : if (n < 0) {
1775 39 : PyErr_SetFromErrno(PyExc_OSError);
1776 39 : errno = err;
1777 39 : return -1;
1778 : }
1779 :
1780 793113 : return n;
1781 : }
1782 :
1783 : static Py_ssize_t
1784 317226 : _Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1785 : {
1786 : Py_ssize_t n;
1787 : int err;
1788 317226 : int async_err = 0;
1789 :
1790 : _Py_BEGIN_SUPPRESS_IPH
1791 : #ifdef MS_WINDOWS
1792 : if (count > 32767) {
1793 : /* Issue #11395: the Windows console returns an error (12: not
1794 : enough space error) on writing into stdout if stdout mode is
1795 : binary and the length is greater than 66,000 bytes (or less,
1796 : depending on heap usage). */
1797 : if (gil_held) {
1798 : Py_BEGIN_ALLOW_THREADS
1799 : if (isatty(fd)) {
1800 : count = 32767;
1801 : }
1802 : Py_END_ALLOW_THREADS
1803 : } else {
1804 : if (isatty(fd)) {
1805 : count = 32767;
1806 : }
1807 : }
1808 : }
1809 : #endif
1810 317226 : if (count > _PY_WRITE_MAX) {
1811 0 : count = _PY_WRITE_MAX;
1812 : }
1813 :
1814 317226 : if (gil_held) {
1815 : do {
1816 309941 : Py_BEGIN_ALLOW_THREADS
1817 309941 : errno = 0;
1818 : #ifdef MS_WINDOWS
1819 : n = write(fd, buf, (int)count);
1820 : #else
1821 309941 : n = write(fd, buf, count);
1822 : #endif
1823 : /* save/restore errno because PyErr_CheckSignals()
1824 : * and PyErr_SetFromErrno() can modify it */
1825 309941 : err = errno;
1826 309941 : Py_END_ALLOW_THREADS
1827 76 : } while (n < 0 && err == EINTR &&
1828 309917 : !(async_err = PyErr_CheckSignals()));
1829 : }
1830 : else {
1831 : do {
1832 7295 : errno = 0;
1833 : #ifdef MS_WINDOWS
1834 : n = write(fd, buf, (int)count);
1835 : #else
1836 7295 : n = write(fd, buf, count);
1837 : #endif
1838 7295 : err = errno;
1839 7295 : } while (n < 0 && err == EINTR);
1840 : }
1841 : _Py_END_SUPPRESS_IPH
1842 :
1843 317192 : if (async_err) {
1844 : /* write() was interrupted by a signal (failed with EINTR)
1845 : and the Python signal handler raised an exception (if gil_held is
1846 : nonzero). */
1847 0 : errno = err;
1848 0 : assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
1849 0 : return -1;
1850 : }
1851 317192 : if (n < 0) {
1852 62 : if (gil_held)
1853 56 : PyErr_SetFromErrno(PyExc_OSError);
1854 62 : errno = err;
1855 62 : return -1;
1856 : }
1857 :
1858 317130 : return n;
1859 : }
1860 :
1861 : /* Write count bytes of buf into fd.
1862 :
1863 : On success, return the number of written bytes, it can be lower than count
1864 : including 0. On error, raise an exception, set errno and return -1.
1865 :
1866 : When interrupted by a signal (write() fails with EINTR), retry the syscall.
1867 : If the Python signal handler raises an exception, the function returns -1
1868 : (the syscall is not retried).
1869 :
1870 : Release the GIL to call write(). The caller must hold the GIL. */
1871 : Py_ssize_t
1872 309931 : _Py_write(int fd, const void *buf, size_t count)
1873 : {
1874 309931 : assert(PyGILState_Check());
1875 :
1876 : /* _Py_write() must not be called with an exception set, otherwise the
1877 : * caller may think that write() was interrupted by a signal and the signal
1878 : * handler raised an exception. */
1879 309931 : assert(!PyErr_Occurred());
1880 :
1881 309931 : return _Py_write_impl(fd, buf, count, 1);
1882 : }
1883 :
1884 : /* Write count bytes of buf into fd.
1885 : *
1886 : * On success, return the number of written bytes, it can be lower than count
1887 : * including 0. On error, set errno and return -1.
1888 : *
1889 : * When interrupted by a signal (write() fails with EINTR), retry the syscall
1890 : * without calling the Python signal handler. */
1891 : Py_ssize_t
1892 7295 : _Py_write_noraise(int fd, const void *buf, size_t count)
1893 : {
1894 7295 : return _Py_write_impl(fd, buf, count, 0);
1895 : }
1896 :
1897 : #ifdef HAVE_READLINK
1898 :
1899 : /* Read value of symbolic link. Encode the path to the locale encoding, decode
1900 : the result from the locale encoding.
1901 :
1902 : Return -1 on encoding error, on readlink() error, if the internal buffer is
1903 : too short, on decoding error, or if 'buf' is too short. */
1904 : int
1905 3470 : _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
1906 : {
1907 : char *cpath;
1908 : char cbuf[MAXPATHLEN];
1909 3470 : size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
1910 : wchar_t *wbuf;
1911 : Py_ssize_t res;
1912 : size_t r1;
1913 :
1914 3470 : cpath = _Py_EncodeLocaleRaw(path, NULL);
1915 3470 : if (cpath == NULL) {
1916 0 : errno = EINVAL;
1917 0 : return -1;
1918 : }
1919 3470 : res = readlink(cpath, cbuf, cbuf_len);
1920 3470 : PyMem_RawFree(cpath);
1921 3470 : if (res == -1) {
1922 3437 : return -1;
1923 : }
1924 33 : if ((size_t)res == cbuf_len) {
1925 0 : errno = EINVAL;
1926 0 : return -1;
1927 : }
1928 33 : cbuf[res] = '\0'; /* buf will be null terminated */
1929 33 : wbuf = Py_DecodeLocale(cbuf, &r1);
1930 33 : if (wbuf == NULL) {
1931 0 : errno = EINVAL;
1932 0 : return -1;
1933 : }
1934 : /* wbuf must have space to store the trailing NUL character */
1935 33 : if (buflen <= r1) {
1936 0 : PyMem_RawFree(wbuf);
1937 0 : errno = EINVAL;
1938 0 : return -1;
1939 : }
1940 33 : wcsncpy(buf, wbuf, buflen);
1941 33 : PyMem_RawFree(wbuf);
1942 33 : return (int)r1;
1943 : }
1944 : #endif
1945 :
1946 : #ifdef HAVE_REALPATH
1947 :
1948 : /* Return the canonicalized absolute pathname. Encode path to the locale
1949 : encoding, decode the result from the locale encoding.
1950 :
1951 : Return NULL on encoding error, realpath() error, decoding error
1952 : or if 'resolved_path' is too short. */
1953 : wchar_t*
1954 228 : _Py_wrealpath(const wchar_t *path,
1955 : wchar_t *resolved_path, size_t resolved_path_len)
1956 : {
1957 : char *cpath;
1958 : char cresolved_path[MAXPATHLEN];
1959 : wchar_t *wresolved_path;
1960 : char *res;
1961 : size_t r;
1962 228 : cpath = _Py_EncodeLocaleRaw(path, NULL);
1963 228 : if (cpath == NULL) {
1964 0 : errno = EINVAL;
1965 0 : return NULL;
1966 : }
1967 228 : res = realpath(cpath, cresolved_path);
1968 228 : PyMem_RawFree(cpath);
1969 228 : if (res == NULL)
1970 20 : return NULL;
1971 :
1972 208 : wresolved_path = Py_DecodeLocale(cresolved_path, &r);
1973 208 : if (wresolved_path == NULL) {
1974 0 : errno = EINVAL;
1975 0 : return NULL;
1976 : }
1977 : /* wresolved_path must have space to store the trailing NUL character */
1978 208 : if (resolved_path_len <= r) {
1979 0 : PyMem_RawFree(wresolved_path);
1980 0 : errno = EINVAL;
1981 0 : return NULL;
1982 : }
1983 208 : wcsncpy(resolved_path, wresolved_path, resolved_path_len);
1984 208 : PyMem_RawFree(wresolved_path);
1985 208 : return resolved_path;
1986 : }
1987 : #endif
1988 :
1989 :
1990 : int
1991 73848 : _Py_isabs(const wchar_t *path)
1992 : {
1993 : #ifdef MS_WINDOWS
1994 : const wchar_t *tail;
1995 : HRESULT hr = PathCchSkipRoot(path, &tail);
1996 : if (FAILED(hr) || path == tail) {
1997 : return 0;
1998 : }
1999 : if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2000 : // Exclude paths with leading SEP
2001 : return 0;
2002 : }
2003 : if (tail == &path[2] && path[1] == L':') {
2004 : // Exclude drive-relative paths (e.g. C:filename.ext)
2005 : return 0;
2006 : }
2007 : return 1;
2008 : #else
2009 73848 : return (path[0] == SEP);
2010 : #endif
2011 : }
2012 :
2013 :
2014 : /* Get an absolute path.
2015 : On error (ex: fail to get the current directory), return -1.
2016 : On memory allocation failure, set *abspath_p to NULL and return 0.
2017 : On success, return a newly allocated to *abspath_p to and return 0.
2018 : The string must be freed by PyMem_RawFree(). */
2019 : int
2020 3073 : _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2021 : {
2022 3073 : if (path[0] == '\0' || !wcscmp(path, L".")) {
2023 : wchar_t cwd[MAXPATHLEN + 1];
2024 6 : cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2025 6 : if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2026 : /* unable to get the current directory */
2027 0 : return -1;
2028 : }
2029 6 : *abspath_p = _PyMem_RawWcsdup(cwd);
2030 6 : return 0;
2031 : }
2032 :
2033 3067 : if (_Py_isabs(path)) {
2034 2878 : *abspath_p = _PyMem_RawWcsdup(path);
2035 2878 : return 0;
2036 : }
2037 :
2038 : #ifdef MS_WINDOWS
2039 : return _PyOS_getfullpathname(path, abspath_p);
2040 : #else
2041 : wchar_t cwd[MAXPATHLEN + 1];
2042 189 : cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2043 189 : if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2044 : /* unable to get the current directory */
2045 0 : return -1;
2046 : }
2047 :
2048 189 : size_t cwd_len = wcslen(cwd);
2049 189 : size_t path_len = wcslen(path);
2050 189 : size_t len = cwd_len + 1 + path_len + 1;
2051 189 : if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2052 189 : *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2053 : }
2054 : else {
2055 0 : *abspath_p = NULL;
2056 : }
2057 189 : if (*abspath_p == NULL) {
2058 0 : return 0;
2059 : }
2060 :
2061 189 : wchar_t *abspath = *abspath_p;
2062 189 : memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2063 189 : abspath += cwd_len;
2064 :
2065 189 : *abspath = (wchar_t)SEP;
2066 189 : abspath++;
2067 :
2068 189 : memcpy(abspath, path, path_len * sizeof(wchar_t));
2069 189 : abspath += path_len;
2070 :
2071 189 : *abspath = 0;
2072 189 : return 0;
2073 : #endif
2074 : }
2075 :
2076 :
2077 : // The caller must ensure "buffer" is big enough.
2078 : static int
2079 22124 : join_relfile(wchar_t *buffer, size_t bufsize,
2080 : const wchar_t *dirname, const wchar_t *relfile)
2081 : {
2082 : #ifdef MS_WINDOWS
2083 : if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2084 : PATHCCH_ALLOW_LONG_PATHS))) {
2085 : return -1;
2086 : }
2087 : #else
2088 22124 : assert(!_Py_isabs(relfile));
2089 22124 : size_t dirlen = wcslen(dirname);
2090 22124 : size_t rellen = wcslen(relfile);
2091 22124 : size_t maxlen = bufsize - 1;
2092 22124 : if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2093 0 : return -1;
2094 : }
2095 22124 : if (dirlen == 0) {
2096 : // We do not add a leading separator.
2097 0 : wcscpy(buffer, relfile);
2098 : }
2099 : else {
2100 22124 : if (dirname != buffer) {
2101 2 : wcscpy(buffer, dirname);
2102 : }
2103 22124 : size_t relstart = dirlen;
2104 22124 : if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2105 22124 : buffer[dirlen] = SEP;
2106 22124 : relstart += 1;
2107 : }
2108 22124 : wcscpy(&buffer[relstart], relfile);
2109 : }
2110 : #endif
2111 22124 : return 0;
2112 : }
2113 :
2114 : /* Join the two paths together, like os.path.join(). Return NULL
2115 : if memory could not be allocated. The caller is responsible
2116 : for calling PyMem_RawFree() on the result. */
2117 : wchar_t *
2118 2 : _Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2119 : {
2120 2 : assert(dirname != NULL && relfile != NULL);
2121 : #ifndef MS_WINDOWS
2122 2 : assert(!_Py_isabs(relfile));
2123 : #endif
2124 2 : size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2125 2 : size_t bufsize = maxlen + 1;
2126 2 : wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2127 2 : if (filename == NULL) {
2128 0 : return NULL;
2129 : }
2130 2 : assert(wcslen(dirname) < MAXPATHLEN);
2131 2 : assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2132 2 : join_relfile(filename, bufsize, dirname, relfile);
2133 2 : return filename;
2134 : }
2135 :
2136 : /* Join the two paths together, like os.path.join().
2137 : dirname: the target buffer with the dirname already in place,
2138 : including trailing NUL
2139 : relfile: this must be a relative path
2140 : bufsize: total allocated size of the buffer
2141 : Return -1 if anything is wrong with the path lengths. */
2142 : int
2143 22122 : _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2144 : {
2145 22122 : assert(dirname != NULL && relfile != NULL);
2146 22122 : assert(bufsize > 0);
2147 22122 : return join_relfile(dirname, bufsize, dirname, relfile);
2148 : }
2149 :
2150 :
2151 : size_t
2152 0 : _Py_find_basename(const wchar_t *filename)
2153 : {
2154 0 : for (size_t i = wcslen(filename); i > 0; --i) {
2155 0 : if (filename[i] == SEP) {
2156 0 : return i + 1;
2157 : }
2158 : }
2159 0 : return 0;
2160 : }
2161 :
2162 : /* In-place path normalisation. Returns the start of the normalized
2163 : path, which will be within the original buffer. Guaranteed to not
2164 : make the path longer, and will not fail. 'size' is the length of
2165 : the path, if known. If -1, the first null character will be assumed
2166 : to be the end of the path. */
2167 : wchar_t *
2168 181686 : _Py_normpath(wchar_t *path, Py_ssize_t size)
2169 : {
2170 181686 : if (!path[0] || size == 0) {
2171 44 : return path;
2172 : }
2173 181642 : wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2174 181642 : wchar_t *p1 = path; // sequentially scanned address in the path
2175 181642 : wchar_t *p2 = path; // destination of a scanned character to be ljusted
2176 181642 : wchar_t *minP2 = path; // the beginning of the destination range
2177 181642 : wchar_t lastC = L'\0'; // the last ljusted character, p2[-1] in most cases
2178 :
2179 : #define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2180 : #ifdef ALTSEP
2181 : #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2182 : #else
2183 : #define IS_SEP(x) (*(x) == SEP)
2184 : #endif
2185 : #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2186 :
2187 : // Skip leading '.\'
2188 181642 : if (p1[0] == L'.' && IS_SEP(&p1[1])) {
2189 138 : path = &path[2];
2190 140 : while (IS_SEP(path) && !IS_END(path)) {
2191 2 : path++;
2192 : }
2193 138 : p1 = p2 = minP2 = path;
2194 138 : lastC = SEP;
2195 : }
2196 : #ifdef MS_WINDOWS
2197 : // Skip past drive segment and update minP2
2198 : else if (p1[0] && p1[1] == L':') {
2199 : *p2++ = *p1++;
2200 : *p2++ = *p1++;
2201 : minP2 = p2;
2202 : lastC = L':';
2203 : }
2204 : // Skip past all \\-prefixed paths, including \\?\, \\.\,
2205 : // and network paths, including the first segment.
2206 : else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
2207 : int sepCount = 2;
2208 : *p2++ = SEP;
2209 : *p2++ = SEP;
2210 : p1 += 2;
2211 : for (; !IS_END(p1) && sepCount; ++p1) {
2212 : if (IS_SEP(p1)) {
2213 : --sepCount;
2214 : *p2++ = lastC = SEP;
2215 : } else {
2216 : *p2++ = lastC = *p1;
2217 : }
2218 : }
2219 : if (sepCount) {
2220 : minP2 = p2; // Invalid path
2221 : } else {
2222 : minP2 = p2 - 1; // Absolute path has SEP at minP2
2223 : }
2224 : }
2225 : #else
2226 : // Skip past two leading SEPs
2227 181504 : else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
2228 30 : *p2++ = *p1++;
2229 30 : *p2++ = *p1++;
2230 30 : minP2 = p2 - 1; // Absolute path has SEP at minP2
2231 30 : lastC = SEP;
2232 : }
2233 : #endif /* MS_WINDOWS */
2234 :
2235 : /* if pEnd is specified, check that. Else, check for null terminator */
2236 10833200 : for (; !IS_END(p1); ++p1) {
2237 10651600 : wchar_t c = *p1;
2238 : #ifdef ALTSEP
2239 : if (c == ALTSEP) {
2240 : c = SEP;
2241 : }
2242 : #endif
2243 10651600 : if (lastC == SEP) {
2244 1267500 : if (c == L'.') {
2245 4380 : int sep_at_1 = SEP_OR_END(&p1[1]);
2246 4380 : int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2247 4380 : if (sep_at_2 && p1[1] == L'.') {
2248 444 : wchar_t *p3 = p2;
2249 879 : while (p3 != minP2 && *--p3 == SEP) { }
2250 1321 : while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2251 444 : if (p2 == minP2
2252 435 : || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2253 : {
2254 : // Previous segment is also ../, so append instead.
2255 : // Relative path does not absorb ../ at minP2 as well.
2256 19 : *p2++ = L'.';
2257 19 : *p2++ = L'.';
2258 19 : lastC = L'.';
2259 425 : } else if (p3[0] == SEP) {
2260 : // Absolute path, so absorb segment
2261 263 : p2 = p3 + 1;
2262 : } else {
2263 162 : p2 = p3;
2264 : }
2265 444 : p1 += 1;
2266 3936 : } else if (sep_at_1) {
2267 : } else {
2268 3495 : *p2++ = lastC = c;
2269 : }
2270 1263120 : } else if (c == SEP) {
2271 : } else {
2272 1262390 : *p2++ = lastC = c;
2273 : }
2274 : } else {
2275 9384080 : *p2++ = lastC = c;
2276 : }
2277 : }
2278 181642 : *p2 = L'\0';
2279 181642 : if (p2 != minP2) {
2280 182190 : while (--p2 != minP2 && *p2 == SEP) {
2281 550 : *p2 = L'\0';
2282 : }
2283 : }
2284 : #undef SEP_OR_END
2285 : #undef IS_SEP
2286 : #undef IS_END
2287 181642 : return path;
2288 : }
2289 :
2290 :
2291 : /* Get the current directory. buflen is the buffer size in wide characters
2292 : including the null character. Decode the path from the locale encoding.
2293 :
2294 : Return NULL on getcwd() error, on decoding error, or if 'buf' is
2295 : too short. */
2296 : wchar_t*
2297 819 : _Py_wgetcwd(wchar_t *buf, size_t buflen)
2298 : {
2299 : #ifdef MS_WINDOWS
2300 : int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2301 : return _wgetcwd(buf, ibuflen);
2302 : #else
2303 : char fname[MAXPATHLEN];
2304 : wchar_t *wname;
2305 : size_t len;
2306 :
2307 819 : if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2308 0 : return NULL;
2309 819 : wname = Py_DecodeLocale(fname, &len);
2310 819 : if (wname == NULL)
2311 0 : return NULL;
2312 : /* wname must have space to store the trailing NUL character */
2313 819 : if (buflen <= len) {
2314 0 : PyMem_RawFree(wname);
2315 0 : return NULL;
2316 : }
2317 819 : wcsncpy(buf, wname, buflen);
2318 819 : PyMem_RawFree(wname);
2319 819 : return buf;
2320 : #endif
2321 : }
2322 :
2323 : /* Duplicate a file descriptor. The new file descriptor is created as
2324 : non-inheritable. Return a new file descriptor on success, raise an OSError
2325 : exception and return -1 on error.
2326 :
2327 : The GIL is released to call dup(). The caller must hold the GIL. */
2328 : int
2329 15795 : _Py_dup(int fd)
2330 : {
2331 : #ifdef MS_WINDOWS
2332 : HANDLE handle;
2333 : #endif
2334 :
2335 15795 : assert(PyGILState_Check());
2336 :
2337 : #ifdef MS_WINDOWS
2338 : handle = _Py_get_osfhandle(fd);
2339 : if (handle == INVALID_HANDLE_VALUE)
2340 : return -1;
2341 :
2342 : Py_BEGIN_ALLOW_THREADS
2343 : _Py_BEGIN_SUPPRESS_IPH
2344 : fd = dup(fd);
2345 : _Py_END_SUPPRESS_IPH
2346 : Py_END_ALLOW_THREADS
2347 : if (fd < 0) {
2348 : PyErr_SetFromErrno(PyExc_OSError);
2349 : return -1;
2350 : }
2351 :
2352 : if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2353 : _Py_BEGIN_SUPPRESS_IPH
2354 : close(fd);
2355 : _Py_END_SUPPRESS_IPH
2356 : return -1;
2357 : }
2358 : #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2359 15795 : Py_BEGIN_ALLOW_THREADS
2360 : _Py_BEGIN_SUPPRESS_IPH
2361 15795 : fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2362 : _Py_END_SUPPRESS_IPH
2363 15795 : Py_END_ALLOW_THREADS
2364 15795 : if (fd < 0) {
2365 3 : PyErr_SetFromErrno(PyExc_OSError);
2366 3 : return -1;
2367 : }
2368 :
2369 : #else
2370 : Py_BEGIN_ALLOW_THREADS
2371 : _Py_BEGIN_SUPPRESS_IPH
2372 : fd = dup(fd);
2373 : _Py_END_SUPPRESS_IPH
2374 : Py_END_ALLOW_THREADS
2375 : if (fd < 0) {
2376 : PyErr_SetFromErrno(PyExc_OSError);
2377 : return -1;
2378 : }
2379 :
2380 : if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2381 : _Py_BEGIN_SUPPRESS_IPH
2382 : close(fd);
2383 : _Py_END_SUPPRESS_IPH
2384 : return -1;
2385 : }
2386 : #endif
2387 15792 : return fd;
2388 : }
2389 :
2390 : #ifndef MS_WINDOWS
2391 : /* Get the blocking mode of the file descriptor.
2392 : Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2393 : raise an exception and return -1 on error. */
2394 : int
2395 344 : _Py_get_blocking(int fd)
2396 : {
2397 : int flags;
2398 : _Py_BEGIN_SUPPRESS_IPH
2399 344 : flags = fcntl(fd, F_GETFL, 0);
2400 : _Py_END_SUPPRESS_IPH
2401 344 : if (flags < 0) {
2402 1 : PyErr_SetFromErrno(PyExc_OSError);
2403 1 : return -1;
2404 : }
2405 :
2406 343 : return !(flags & O_NONBLOCK);
2407 : }
2408 :
2409 : /* Set the blocking mode of the specified file descriptor.
2410 :
2411 : Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2412 : otherwise.
2413 :
2414 : Return 0 on success, raise an exception and return -1 on error. */
2415 : int
2416 327 : _Py_set_blocking(int fd, int blocking)
2417 : {
2418 : /* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2419 : Use fcntl() instead. */
2420 : #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2421 327 : int arg = !blocking;
2422 327 : if (ioctl(fd, FIONBIO, &arg) < 0)
2423 1 : goto error;
2424 : #else
2425 : int flags, res;
2426 :
2427 : _Py_BEGIN_SUPPRESS_IPH
2428 : flags = fcntl(fd, F_GETFL, 0);
2429 : if (flags >= 0) {
2430 : if (blocking)
2431 : flags = flags & (~O_NONBLOCK);
2432 : else
2433 : flags = flags | O_NONBLOCK;
2434 :
2435 : res = fcntl(fd, F_SETFL, flags);
2436 : } else {
2437 : res = -1;
2438 : }
2439 : _Py_END_SUPPRESS_IPH
2440 :
2441 : if (res < 0)
2442 : goto error;
2443 : #endif
2444 326 : return 0;
2445 :
2446 1 : error:
2447 1 : PyErr_SetFromErrno(PyExc_OSError);
2448 1 : return -1;
2449 : }
2450 : #else /* MS_WINDOWS */
2451 : void*
2452 : _Py_get_osfhandle_noraise(int fd)
2453 : {
2454 : void *handle;
2455 : _Py_BEGIN_SUPPRESS_IPH
2456 : handle = (void*)_get_osfhandle(fd);
2457 : _Py_END_SUPPRESS_IPH
2458 : return handle;
2459 : }
2460 :
2461 : void*
2462 : _Py_get_osfhandle(int fd)
2463 : {
2464 : void *handle = _Py_get_osfhandle_noraise(fd);
2465 : if (handle == INVALID_HANDLE_VALUE)
2466 : PyErr_SetFromErrno(PyExc_OSError);
2467 :
2468 : return handle;
2469 : }
2470 :
2471 : int
2472 : _Py_open_osfhandle_noraise(void *handle, int flags)
2473 : {
2474 : int fd;
2475 : _Py_BEGIN_SUPPRESS_IPH
2476 : fd = _open_osfhandle((intptr_t)handle, flags);
2477 : _Py_END_SUPPRESS_IPH
2478 : return fd;
2479 : }
2480 :
2481 : int
2482 : _Py_open_osfhandle(void *handle, int flags)
2483 : {
2484 : int fd = _Py_open_osfhandle_noraise(handle, flags);
2485 : if (fd == -1)
2486 : PyErr_SetFromErrno(PyExc_OSError);
2487 :
2488 : return fd;
2489 : }
2490 : #endif /* MS_WINDOWS */
2491 :
2492 : int
2493 389 : _Py_GetLocaleconvNumeric(struct lconv *lc,
2494 : PyObject **decimal_point, PyObject **thousands_sep)
2495 : {
2496 389 : assert(decimal_point != NULL);
2497 389 : assert(thousands_sep != NULL);
2498 :
2499 : #ifndef MS_WINDOWS
2500 389 : int change_locale = 0;
2501 389 : if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2502 0 : change_locale = 1;
2503 : }
2504 389 : if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2505 0 : change_locale = 1;
2506 : }
2507 :
2508 : /* Keep a copy of the LC_CTYPE locale */
2509 389 : char *oldloc = NULL, *loc = NULL;
2510 389 : if (change_locale) {
2511 0 : oldloc = setlocale(LC_CTYPE, NULL);
2512 0 : if (!oldloc) {
2513 0 : PyErr_SetString(PyExc_RuntimeWarning,
2514 : "failed to get LC_CTYPE locale");
2515 0 : return -1;
2516 : }
2517 :
2518 0 : oldloc = _PyMem_Strdup(oldloc);
2519 0 : if (!oldloc) {
2520 0 : PyErr_NoMemory();
2521 0 : return -1;
2522 : }
2523 :
2524 0 : loc = setlocale(LC_NUMERIC, NULL);
2525 0 : if (loc != NULL && strcmp(loc, oldloc) == 0) {
2526 0 : loc = NULL;
2527 : }
2528 :
2529 0 : if (loc != NULL) {
2530 : /* Only set the locale temporarily the LC_CTYPE locale
2531 : if LC_NUMERIC locale is different than LC_CTYPE locale and
2532 : decimal_point and/or thousands_sep are non-ASCII or longer than
2533 : 1 byte */
2534 0 : setlocale(LC_CTYPE, loc);
2535 : }
2536 : }
2537 :
2538 : #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2539 : #else /* MS_WINDOWS */
2540 : /* Use _W_* fields of Windows strcut lconv */
2541 : #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2542 : #endif /* MS_WINDOWS */
2543 :
2544 389 : int res = -1;
2545 :
2546 389 : *decimal_point = GET_LOCALE_STRING(decimal_point);
2547 389 : if (*decimal_point == NULL) {
2548 0 : goto done;
2549 : }
2550 :
2551 389 : *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2552 389 : if (*thousands_sep == NULL) {
2553 0 : goto done;
2554 : }
2555 :
2556 389 : res = 0;
2557 :
2558 389 : done:
2559 : #ifndef MS_WINDOWS
2560 389 : if (loc != NULL) {
2561 0 : setlocale(LC_CTYPE, oldloc);
2562 : }
2563 389 : PyMem_Free(oldloc);
2564 : #endif
2565 389 : return res;
2566 :
2567 : #undef GET_LOCALE_STRING
2568 : }
2569 :
2570 : /* Our selection logic for which function to use is as follows:
2571 : * 1. If close_range(2) is available, always prefer that; it's better for
2572 : * contiguous ranges like this than fdwalk(3) which entails iterating over
2573 : * the entire fd space and simply doing nothing for those outside the range.
2574 : * 2. If closefrom(2) is available, we'll attempt to use that next if we're
2575 : * closing up to sysconf(_SC_OPEN_MAX).
2576 : * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
2577 : * as that will be more performant if the range happens to have any chunk of
2578 : * non-opened fd in the middle.
2579 : * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
2580 : */
2581 : #ifdef __FreeBSD__
2582 : # define USE_CLOSEFROM
2583 : #endif /* __FreeBSD__ */
2584 :
2585 : #ifdef HAVE_FDWALK
2586 : # define USE_FDWALK
2587 : #endif /* HAVE_FDWALK */
2588 :
2589 : #ifdef USE_FDWALK
2590 : static int
2591 : _fdwalk_close_func(void *lohi, int fd)
2592 : {
2593 : int lo = ((int *)lohi)[0];
2594 : int hi = ((int *)lohi)[1];
2595 :
2596 : if (fd >= hi) {
2597 : return 1;
2598 : }
2599 : else if (fd >= lo) {
2600 : /* Ignore errors */
2601 : (void)close(fd);
2602 : }
2603 : return 0;
2604 : }
2605 : #endif /* USE_FDWALK */
2606 :
2607 : /* Closes all file descriptors in [first, last], ignoring errors. */
2608 : void
2609 2 : _Py_closerange(int first, int last)
2610 : {
2611 2 : first = Py_MAX(first, 0);
2612 : _Py_BEGIN_SUPPRESS_IPH
2613 : #ifdef HAVE_CLOSE_RANGE
2614 : if (close_range(first, last, 0) == 0) {
2615 : /* close_range() ignores errors when it closes file descriptors.
2616 : * Possible reasons of an error return are lack of kernel support
2617 : * or denial of the underlying syscall by a seccomp sandbox on Linux.
2618 : * Fallback to other methods in case of any error. */
2619 : }
2620 : else
2621 : #endif /* HAVE_CLOSE_RANGE */
2622 : #ifdef USE_CLOSEFROM
2623 : if (last >= sysconf(_SC_OPEN_MAX)) {
2624 : /* Any errors encountered while closing file descriptors are ignored */
2625 : closefrom(first);
2626 : }
2627 : else
2628 : #endif /* USE_CLOSEFROM */
2629 : #ifdef USE_FDWALK
2630 : {
2631 : int lohi[2];
2632 : lohi[0] = first;
2633 : lohi[1] = last + 1;
2634 : fdwalk(_fdwalk_close_func, lohi);
2635 : }
2636 : #else
2637 : {
2638 12 : for (int i = first; i <= last; i++) {
2639 : /* Ignore errors */
2640 10 : (void)close(i);
2641 : }
2642 : }
2643 : #endif /* USE_FDWALK */
2644 : _Py_END_SUPPRESS_IPH
2645 2 : }
|