Line data Source code
1 : /* -*- Mode: C; c-file-style: "python" -*- */
2 :
3 : #include <Python.h>
4 : #include "pycore_dtoa.h" // _Py_dg_strtod()
5 : #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
6 : #include <locale.h>
7 :
8 : /* Case-insensitive string match used for nan and inf detection; t should be
9 : lower-case. Returns 1 for a successful match, 0 otherwise. */
10 :
11 : static int
12 36560 : case_insensitive_match(const char *s, const char *t)
13 : {
14 57910 : while(*t && Py_TOLOWER(*s) == *t) {
15 21350 : s++;
16 21350 : t++;
17 : }
18 36560 : return *t ? 0 : 1;
19 : }
20 :
21 : /* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
22 : "infinity", with an optional leading sign of "+" or "-". On success,
23 : return the NaN or Infinity as a double and set *endptr to point just beyond
24 : the successfully parsed portion of the string. On failure, return -1.0 and
25 : set *endptr to point to the start of the string. */
26 :
27 : #if _PY_SHORT_FLOAT_REPR == 1
28 :
29 : double
30 18280 : _Py_parse_inf_or_nan(const char *p, char **endptr)
31 : {
32 : double retval;
33 : const char *s;
34 18280 : int negate = 0;
35 :
36 18280 : s = p;
37 18280 : if (*s == '-') {
38 7035 : negate = 1;
39 7035 : s++;
40 : }
41 11245 : else if (*s == '+') {
42 475 : s++;
43 : }
44 18280 : if (case_insensitive_match(s, "inf")) {
45 4435 : s += 3;
46 4435 : if (case_insensitive_match(s, "inity"))
47 108 : s += 5;
48 4435 : retval = _Py_dg_infinity(negate);
49 : }
50 13845 : else if (case_insensitive_match(s, "nan")) {
51 2487 : s += 3;
52 2487 : retval = _Py_dg_stdnan(negate);
53 : }
54 : else {
55 11358 : s = p;
56 11358 : retval = -1.0;
57 : }
58 18280 : *endptr = (char *)s;
59 18280 : return retval;
60 : }
61 :
62 : #else
63 :
64 : double
65 : _Py_parse_inf_or_nan(const char *p, char **endptr)
66 : {
67 : double retval;
68 : const char *s;
69 : int negate = 0;
70 :
71 : s = p;
72 : if (*s == '-') {
73 : negate = 1;
74 : s++;
75 : }
76 : else if (*s == '+') {
77 : s++;
78 : }
79 : if (case_insensitive_match(s, "inf")) {
80 : s += 3;
81 : if (case_insensitive_match(s, "inity"))
82 : s += 5;
83 : retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
84 : }
85 : else if (case_insensitive_match(s, "nan")) {
86 : s += 3;
87 : retval = negate ? -Py_NAN : Py_NAN;
88 : }
89 : else {
90 : s = p;
91 : retval = -1.0;
92 : }
93 : *endptr = (char *)s;
94 : return retval;
95 : }
96 :
97 : #endif
98 :
99 : /**
100 : * _PyOS_ascii_strtod:
101 : * @nptr: the string to convert to a numeric value.
102 : * @endptr: if non-%NULL, it returns the character after
103 : * the last character used in the conversion.
104 : *
105 : * Converts a string to a #gdouble value.
106 : * This function behaves like the standard strtod() function
107 : * does in the C locale. It does this without actually
108 : * changing the current locale, since that would not be
109 : * thread-safe.
110 : *
111 : * This function is typically used when reading configuration
112 : * files or other non-user input that should be locale independent.
113 : * To handle input from the user you should normally use the
114 : * locale-sensitive system strtod() function.
115 : *
116 : * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 : * is returned (according to the sign of the value), and %ERANGE is
118 : * stored in %errno. If the correct value would cause underflow,
119 : * zero is returned and %ERANGE is stored in %errno.
120 : * If memory allocation fails, %ENOMEM is stored in %errno.
121 : *
122 : * This function resets %errno before calling strtod() so that
123 : * you can reliably detect overflow and underflow.
124 : *
125 : * Return value: the #gdouble value.
126 : **/
127 :
128 : #if _PY_SHORT_FLOAT_REPR == 1
129 :
130 : static double
131 118681 : _PyOS_ascii_strtod(const char *nptr, char **endptr)
132 : {
133 : double result;
134 : _Py_SET_53BIT_PRECISION_HEADER;
135 :
136 118681 : assert(nptr != NULL);
137 : /* Set errno to zero, so that we can distinguish zero results
138 : and underflows */
139 118681 : errno = 0;
140 :
141 118681 : _Py_SET_53BIT_PRECISION_START;
142 118681 : result = _Py_dg_strtod(nptr, endptr);
143 118681 : _Py_SET_53BIT_PRECISION_END;
144 :
145 118681 : if (*endptr == nptr)
146 : /* string might represent an inf or nan */
147 8041 : result = _Py_parse_inf_or_nan(nptr, endptr);
148 :
149 118681 : return result;
150 :
151 : }
152 :
153 : #else
154 :
155 : /*
156 : Use system strtod; since strtod is locale aware, we may
157 : have to first fix the decimal separator.
158 :
159 : Note that unlike _Py_dg_strtod, the system strtod may not always give
160 : correctly rounded results.
161 : */
162 :
163 : static double
164 : _PyOS_ascii_strtod(const char *nptr, char **endptr)
165 : {
166 : char *fail_pos;
167 : double val;
168 : struct lconv *locale_data;
169 : const char *decimal_point;
170 : size_t decimal_point_len;
171 : const char *p, *decimal_point_pos;
172 : const char *end = NULL; /* Silence gcc */
173 : const char *digits_pos = NULL;
174 : int negate = 0;
175 :
176 : assert(nptr != NULL);
177 :
178 : fail_pos = NULL;
179 :
180 : locale_data = localeconv();
181 : decimal_point = locale_data->decimal_point;
182 : decimal_point_len = strlen(decimal_point);
183 :
184 : assert(decimal_point_len != 0);
185 :
186 : decimal_point_pos = NULL;
187 :
188 : /* Parse infinities and nans */
189 : val = _Py_parse_inf_or_nan(nptr, endptr);
190 : if (*endptr != nptr)
191 : return val;
192 :
193 : /* Set errno to zero, so that we can distinguish zero results
194 : and underflows */
195 : errno = 0;
196 :
197 : /* We process the optional sign manually, then pass the remainder to
198 : the system strtod. This ensures that the result of an underflow
199 : has the correct sign. (bug #1725) */
200 : p = nptr;
201 : /* Process leading sign, if present */
202 : if (*p == '-') {
203 : negate = 1;
204 : p++;
205 : }
206 : else if (*p == '+') {
207 : p++;
208 : }
209 :
210 : /* Some platform strtods accept hex floats; Python shouldn't (at the
211 : moment), so we check explicitly for strings starting with '0x'. */
212 : if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213 : goto invalid_string;
214 :
215 : /* Check that what's left begins with a digit or decimal point */
216 : if (!Py_ISDIGIT(*p) && *p != '.')
217 : goto invalid_string;
218 :
219 : digits_pos = p;
220 : if (decimal_point[0] != '.' ||
221 : decimal_point[1] != 0)
222 : {
223 : /* Look for a '.' in the input; if present, it'll need to be
224 : swapped for the current locale's decimal point before we
225 : call strtod. On the other hand, if we find the current
226 : locale's decimal point then the input is invalid. */
227 : while (Py_ISDIGIT(*p))
228 : p++;
229 :
230 : if (*p == '.')
231 : {
232 : decimal_point_pos = p++;
233 :
234 : /* locate end of number */
235 : while (Py_ISDIGIT(*p))
236 : p++;
237 :
238 : if (*p == 'e' || *p == 'E')
239 : p++;
240 : if (*p == '+' || *p == '-')
241 : p++;
242 : while (Py_ISDIGIT(*p))
243 : p++;
244 : end = p;
245 : }
246 : else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247 : /* Python bug #1417699 */
248 : goto invalid_string;
249 : /* For the other cases, we need not convert the decimal
250 : point */
251 : }
252 :
253 : if (decimal_point_pos) {
254 : char *copy, *c;
255 : /* Create a copy of the input, with the '.' converted to the
256 : locale-specific decimal point */
257 : copy = (char *)PyMem_Malloc(end - digits_pos +
258 : 1 + decimal_point_len);
259 : if (copy == NULL) {
260 : *endptr = (char *)nptr;
261 : errno = ENOMEM;
262 : return val;
263 : }
264 :
265 : c = copy;
266 : memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267 : c += decimal_point_pos - digits_pos;
268 : memcpy(c, decimal_point, decimal_point_len);
269 : c += decimal_point_len;
270 : memcpy(c, decimal_point_pos + 1,
271 : end - (decimal_point_pos + 1));
272 : c += end - (decimal_point_pos + 1);
273 : *c = 0;
274 :
275 : val = strtod(copy, &fail_pos);
276 :
277 : if (fail_pos)
278 : {
279 : if (fail_pos > decimal_point_pos)
280 : fail_pos = (char *)digits_pos +
281 : (fail_pos - copy) -
282 : (decimal_point_len - 1);
283 : else
284 : fail_pos = (char *)digits_pos +
285 : (fail_pos - copy);
286 : }
287 :
288 : PyMem_Free(copy);
289 :
290 : }
291 : else {
292 : val = strtod(digits_pos, &fail_pos);
293 : }
294 :
295 : if (fail_pos == digits_pos)
296 : goto invalid_string;
297 :
298 : if (negate && fail_pos != nptr)
299 : val = -val;
300 : *endptr = fail_pos;
301 :
302 : return val;
303 :
304 : invalid_string:
305 : *endptr = (char*)nptr;
306 : errno = EINVAL;
307 : return -1.0;
308 : }
309 :
310 : #endif
311 :
312 : /* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313 : as a string of ASCII characters) to a float. The string should not have
314 : leading or trailing whitespace. The conversion is independent of the
315 : current locale.
316 :
317 : If endptr is NULL, try to convert the whole string. Raise ValueError and
318 : return -1.0 if the string is not a valid representation of a floating-point
319 : number.
320 :
321 : If endptr is non-NULL, try to convert as much of the string as possible.
322 : If no initial segment of the string is the valid representation of a
323 : floating-point number then *endptr is set to point to the beginning of the
324 : string, -1.0 is returned and again ValueError is raised.
325 :
326 : On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327 : if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328 : exception is raised. Otherwise, overflow_exception should point to
329 : a Python exception, this exception will be raised, -1.0 will be returned,
330 : and *endptr will point just past the end of the converted value.
331 :
332 : If any other failure occurs (for example lack of memory), -1.0 is returned
333 : and the appropriate Python exception will have been set.
334 : */
335 :
336 : double
337 118681 : PyOS_string_to_double(const char *s,
338 : char **endptr,
339 : PyObject *overflow_exception)
340 : {
341 118681 : double x, result=-1.0;
342 : char *fail_pos;
343 :
344 118681 : errno = 0;
345 118681 : x = _PyOS_ascii_strtod(s, &fail_pos);
346 :
347 118681 : if (errno == ENOMEM) {
348 0 : PyErr_NoMemory();
349 0 : fail_pos = (char *)s;
350 : }
351 118681 : else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
352 5 : PyErr_Format(PyExc_ValueError,
353 : "could not convert string to float: "
354 : "'%.200s'", s);
355 118676 : else if (fail_pos == s)
356 1328 : PyErr_Format(PyExc_ValueError,
357 : "could not convert string to float: "
358 : "'%.200s'", s);
359 117348 : else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
360 0 : PyErr_Format(overflow_exception,
361 : "value too large to convert to float: "
362 : "'%.200s'", s);
363 : else
364 117348 : result = x;
365 :
366 118681 : if (endptr != NULL)
367 61949 : *endptr = fail_pos;
368 118681 : return result;
369 : }
370 :
371 : /* Remove underscores that follow the underscore placement rule from
372 : the string and then call the `innerfunc` function on the result.
373 : It should return a new object or NULL on exception.
374 :
375 : `what` is used for the error message emitted when underscores are detected
376 : that don't follow the rule. `arg` is an opaque pointer passed to the inner
377 : function.
378 :
379 : This is used to implement underscore-agnostic conversion for floats
380 : and complex numbers.
381 : */
382 : PyObject *
383 57125 : _Py_string_to_number_with_underscores(
384 : const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
385 : PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
386 : {
387 : char prev;
388 : const char *p, *last;
389 : char *dup, *end;
390 : PyObject *result;
391 :
392 57125 : assert(s[orig_len] == '\0');
393 :
394 57125 : if (strchr(s, '_') == NULL) {
395 57029 : return innerfunc(s, orig_len, arg);
396 : }
397 :
398 96 : dup = PyMem_Malloc(orig_len + 1);
399 96 : if (dup == NULL) {
400 0 : return PyErr_NoMemory();
401 : }
402 96 : end = dup;
403 96 : prev = '\0';
404 96 : last = s + orig_len;
405 620 : for (p = s; *p; p++) {
406 567 : if (*p == '_') {
407 : /* Underscores are only allowed after digits. */
408 138 : if (!(prev >= '0' && prev <= '9')) {
409 28 : goto error;
410 : }
411 : }
412 : else {
413 429 : *end++ = *p;
414 : /* Underscores are only allowed before digits. */
415 429 : if (prev == '_' && !(*p >= '0' && *p <= '9')) {
416 15 : goto error;
417 : }
418 : }
419 524 : prev = *p;
420 : }
421 : /* Underscores are not allowed at the end. */
422 53 : if (prev == '_') {
423 4 : goto error;
424 : }
425 : /* No embedded NULs allowed. */
426 49 : if (p != last) {
427 1 : goto error;
428 : }
429 48 : *end = '\0';
430 48 : result = innerfunc(dup, end - dup, arg);
431 48 : PyMem_Free(dup);
432 48 : return result;
433 :
434 48 : error:
435 48 : PyMem_Free(dup);
436 48 : PyErr_Format(PyExc_ValueError,
437 : "could not convert string to %s: "
438 : "%R", what, obj);
439 48 : return NULL;
440 : }
441 :
442 : #if _PY_SHORT_FLOAT_REPR == 0
443 :
444 : /* Given a string that may have a decimal point in the current
445 : locale, change it back to a dot. Since the string cannot get
446 : longer, no need for a maximum buffer size parameter. */
447 : Py_LOCAL_INLINE(void)
448 : change_decimal_from_locale_to_dot(char* buffer)
449 : {
450 : struct lconv *locale_data = localeconv();
451 : const char *decimal_point = locale_data->decimal_point;
452 :
453 : if (decimal_point[0] != '.' || decimal_point[1] != 0) {
454 : size_t decimal_point_len = strlen(decimal_point);
455 :
456 : if (*buffer == '+' || *buffer == '-')
457 : buffer++;
458 : while (Py_ISDIGIT(*buffer))
459 : buffer++;
460 : if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
461 : *buffer = '.';
462 : buffer++;
463 : if (decimal_point_len > 1) {
464 : /* buffer needs to get smaller */
465 : size_t rest_len = strlen(buffer +
466 : (decimal_point_len - 1));
467 : memmove(buffer,
468 : buffer + (decimal_point_len - 1),
469 : rest_len);
470 : buffer[rest_len] = 0;
471 : }
472 : }
473 : }
474 : }
475 :
476 :
477 : /* From the C99 standard, section 7.19.6:
478 : The exponent always contains at least two digits, and only as many more digits
479 : as necessary to represent the exponent.
480 : */
481 : #define MIN_EXPONENT_DIGITS 2
482 :
483 : /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
484 : in length. */
485 : Py_LOCAL_INLINE(void)
486 : ensure_minimum_exponent_length(char* buffer, size_t buf_size)
487 : {
488 : char *p = strpbrk(buffer, "eE");
489 : if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
490 : char *start = p + 2;
491 : int exponent_digit_cnt = 0;
492 : int leading_zero_cnt = 0;
493 : int in_leading_zeros = 1;
494 : int significant_digit_cnt;
495 :
496 : /* Skip over the exponent and the sign. */
497 : p += 2;
498 :
499 : /* Find the end of the exponent, keeping track of leading
500 : zeros. */
501 : while (*p && Py_ISDIGIT(*p)) {
502 : if (in_leading_zeros && *p == '0')
503 : ++leading_zero_cnt;
504 : if (*p != '0')
505 : in_leading_zeros = 0;
506 : ++p;
507 : ++exponent_digit_cnt;
508 : }
509 :
510 : significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
511 : if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
512 : /* If there are 2 exactly digits, we're done,
513 : regardless of what they contain */
514 : }
515 : else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
516 : int extra_zeros_cnt;
517 :
518 : /* There are more than 2 digits in the exponent. See
519 : if we can delete some of the leading zeros */
520 : if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
521 : significant_digit_cnt = MIN_EXPONENT_DIGITS;
522 : extra_zeros_cnt = exponent_digit_cnt -
523 : significant_digit_cnt;
524 :
525 : /* Delete extra_zeros_cnt worth of characters from the
526 : front of the exponent */
527 : assert(extra_zeros_cnt >= 0);
528 :
529 : /* Add one to significant_digit_cnt to copy the
530 : trailing 0 byte, thus setting the length */
531 : memmove(start,
532 : start + extra_zeros_cnt,
533 : significant_digit_cnt + 1);
534 : }
535 : else {
536 : /* If there are fewer than 2 digits, add zeros
537 : until there are 2, if there's enough room */
538 : int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
539 : if (start + zeros + exponent_digit_cnt + 1
540 : < buffer + buf_size) {
541 : memmove(start + zeros, start,
542 : exponent_digit_cnt + 1);
543 : memset(start, '0', zeros);
544 : }
545 : }
546 : }
547 : }
548 :
549 : /* Remove trailing zeros after the decimal point from a numeric string; also
550 : remove the decimal point if all digits following it are zero. The numeric
551 : string must end in '\0', and should not have any leading or trailing
552 : whitespace. Assumes that the decimal point is '.'. */
553 : Py_LOCAL_INLINE(void)
554 : remove_trailing_zeros(char *buffer)
555 : {
556 : char *old_fraction_end, *new_fraction_end, *end, *p;
557 :
558 : p = buffer;
559 : if (*p == '-' || *p == '+')
560 : /* Skip leading sign, if present */
561 : ++p;
562 : while (Py_ISDIGIT(*p))
563 : ++p;
564 :
565 : /* if there's no decimal point there's nothing to do */
566 : if (*p++ != '.')
567 : return;
568 :
569 : /* scan any digits after the point */
570 : while (Py_ISDIGIT(*p))
571 : ++p;
572 : old_fraction_end = p;
573 :
574 : /* scan up to ending '\0' */
575 : while (*p != '\0')
576 : p++;
577 : /* +1 to make sure that we move the null byte as well */
578 : end = p+1;
579 :
580 : /* scan back from fraction_end, looking for removable zeros */
581 : p = old_fraction_end;
582 : while (*(p-1) == '0')
583 : --p;
584 : /* and remove point if we've got that far */
585 : if (*(p-1) == '.')
586 : --p;
587 : new_fraction_end = p;
588 :
589 : memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
590 : }
591 :
592 : /* Ensure that buffer has a decimal point in it. The decimal point will not
593 : be in the current locale, it will always be '.'. Don't add a decimal point
594 : if an exponent is present. Also, convert to exponential notation where
595 : adding a '.0' would produce too many significant digits (see issue 5864).
596 :
597 : Returns a pointer to the fixed buffer, or NULL on failure.
598 : */
599 : Py_LOCAL_INLINE(char *)
600 : ensure_decimal_point(char* buffer, size_t buf_size, int precision)
601 : {
602 : int digit_count, insert_count = 0, convert_to_exp = 0;
603 : const char *chars_to_insert;
604 : char *digits_start;
605 :
606 : /* search for the first non-digit character */
607 : char *p = buffer;
608 : if (*p == '-' || *p == '+')
609 : /* Skip leading sign, if present. I think this could only
610 : ever be '-', but it can't hurt to check for both. */
611 : ++p;
612 : digits_start = p;
613 : while (*p && Py_ISDIGIT(*p))
614 : ++p;
615 : digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
616 :
617 : if (*p == '.') {
618 : if (Py_ISDIGIT(*(p+1))) {
619 : /* Nothing to do, we already have a decimal
620 : point and a digit after it */
621 : }
622 : else {
623 : /* We have a decimal point, but no following
624 : digit. Insert a zero after the decimal. */
625 : /* can't ever get here via PyOS_double_to_string */
626 : assert(precision == -1);
627 : ++p;
628 : chars_to_insert = "0";
629 : insert_count = 1;
630 : }
631 : }
632 : else if (!(*p == 'e' || *p == 'E')) {
633 : /* Don't add ".0" if we have an exponent. */
634 : if (digit_count == precision) {
635 : /* issue 5864: don't add a trailing .0 in the case
636 : where the '%g'-formatted result already has as many
637 : significant digits as were requested. Switch to
638 : exponential notation instead. */
639 : convert_to_exp = 1;
640 : /* no exponent, no point, and we shouldn't land here
641 : for infs and nans, so we must be at the end of the
642 : string. */
643 : assert(*p == '\0');
644 : }
645 : else {
646 : assert(precision == -1 || digit_count < precision);
647 : chars_to_insert = ".0";
648 : insert_count = 2;
649 : }
650 : }
651 : if (insert_count) {
652 : size_t buf_len = strlen(buffer);
653 : if (buf_len + insert_count + 1 >= buf_size) {
654 : /* If there is not enough room in the buffer
655 : for the additional text, just skip it. It's
656 : not worth generating an error over. */
657 : }
658 : else {
659 : memmove(p + insert_count, p,
660 : buffer + strlen(buffer) - p + 1);
661 : memcpy(p, chars_to_insert, insert_count);
662 : }
663 : }
664 : if (convert_to_exp) {
665 : int written;
666 : size_t buf_avail;
667 : p = digits_start;
668 : /* insert decimal point */
669 : assert(digit_count >= 1);
670 : memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
671 : p[1] = '.';
672 : p += digit_count+1;
673 : assert(p <= buf_size+buffer);
674 : buf_avail = buf_size+buffer-p;
675 : if (buf_avail == 0)
676 : return NULL;
677 : /* Add exponent. It's okay to use lower case 'e': we only
678 : arrive here as a result of using the empty format code or
679 : repr/str builtins and those never want an upper case 'E' */
680 : written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
681 : if (!(0 <= written &&
682 : written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
683 : /* output truncated, or something else bad happened */
684 : return NULL;
685 : remove_trailing_zeros(buffer);
686 : }
687 : return buffer;
688 : }
689 :
690 : /* see FORMATBUFLEN in unicodeobject.c */
691 : #define FLOAT_FORMATBUFLEN 120
692 :
693 : /**
694 : * _PyOS_ascii_formatd:
695 : * @buffer: A buffer to place the resulting string in
696 : * @buf_size: The length of the buffer.
697 : * @format: The printf()-style format to use for the
698 : * code to use for converting.
699 : * @d: The #gdouble to convert
700 : * @precision: The precision to use when formatting.
701 : *
702 : * Converts a #gdouble to a string, using the '.' as
703 : * decimal point. To format the number you pass in
704 : * a printf()-style format string. Allowed conversion
705 : * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
706 : *
707 : * 'Z' is the same as 'g', except it always has a decimal and
708 : * at least one digit after the decimal.
709 : *
710 : * Return value: The pointer to the buffer with the converted string.
711 : * On failure returns NULL but does not set any Python exception.
712 : **/
713 : static char *
714 : _PyOS_ascii_formatd(char *buffer,
715 : size_t buf_size,
716 : const char *format,
717 : double d,
718 : int precision)
719 : {
720 : char format_char;
721 : size_t format_len = strlen(format);
722 :
723 : /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but
724 : also with at least one character past the decimal. */
725 : char tmp_format[FLOAT_FORMATBUFLEN];
726 :
727 : /* The last character in the format string must be the format char */
728 : format_char = format[format_len - 1];
729 :
730 : if (format[0] != '%')
731 : return NULL;
732 :
733 : /* I'm not sure why this test is here. It's ensuring that the format
734 : string after the first character doesn't have a single quote, a
735 : lowercase l, or a percent. This is the reverse of the commented-out
736 : test about 10 lines ago. */
737 : if (strpbrk(format + 1, "'l%"))
738 : return NULL;
739 :
740 : /* Also curious about this function is that it accepts format strings
741 : like "%xg", which are invalid for floats. In general, the
742 : interface to this function is not very good, but changing it is
743 : difficult because it's a public API. */
744 :
745 : if (!(format_char == 'e' || format_char == 'E' ||
746 : format_char == 'f' || format_char == 'F' ||
747 : format_char == 'g' || format_char == 'G' ||
748 : format_char == 'Z'))
749 : return NULL;
750 :
751 : /* Map 'Z' format_char to 'g', by copying the format string and
752 : replacing the final char with a 'g' */
753 : if (format_char == 'Z') {
754 : if (format_len + 1 >= sizeof(tmp_format)) {
755 : /* The format won't fit in our copy. Error out. In
756 : practice, this will never happen and will be
757 : detected by returning NULL */
758 : return NULL;
759 : }
760 : strcpy(tmp_format, format);
761 : tmp_format[format_len - 1] = 'g';
762 : format = tmp_format;
763 : }
764 :
765 :
766 : /* Have PyOS_snprintf do the hard work */
767 : PyOS_snprintf(buffer, buf_size, format, d);
768 :
769 : /* Do various fixups on the return string */
770 :
771 : /* Get the current locale, and find the decimal point string.
772 : Convert that string back to a dot. */
773 : change_decimal_from_locale_to_dot(buffer);
774 :
775 : /* If an exponent exists, ensure that the exponent is at least
776 : MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
777 : for the extra zeros. Also, if there are more than
778 : MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
779 : back to MIN_EXPONENT_DIGITS */
780 : ensure_minimum_exponent_length(buffer, buf_size);
781 :
782 : /* If format_char is 'Z', make sure we have at least one character
783 : after the decimal point (and make sure we have a decimal point);
784 : also switch to exponential notation in some edge cases where the
785 : extra character would produce more significant digits that we
786 : really want. */
787 : if (format_char == 'Z')
788 : buffer = ensure_decimal_point(buffer, buf_size, precision);
789 :
790 : return buffer;
791 : }
792 :
793 : /* The fallback code to use if _Py_dg_dtoa is not available. */
794 :
795 : char * PyOS_double_to_string(double val,
796 : char format_code,
797 : int precision,
798 : int flags,
799 : int *type)
800 : {
801 : char format[32];
802 : Py_ssize_t bufsize;
803 : char *buf;
804 : int t, exp;
805 : int upper = 0;
806 :
807 : /* Validate format_code, and map upper and lower case */
808 : switch (format_code) {
809 : case 'e': /* exponent */
810 : case 'f': /* fixed */
811 : case 'g': /* general */
812 : break;
813 : case 'E':
814 : upper = 1;
815 : format_code = 'e';
816 : break;
817 : case 'F':
818 : upper = 1;
819 : format_code = 'f';
820 : break;
821 : case 'G':
822 : upper = 1;
823 : format_code = 'g';
824 : break;
825 : case 'r': /* repr format */
826 : /* Supplied precision is unused, must be 0. */
827 : if (precision != 0) {
828 : PyErr_BadInternalCall();
829 : return NULL;
830 : }
831 : /* The repr() precision (17 significant decimal digits) is the
832 : minimal number that is guaranteed to have enough precision
833 : so that if the number is read back in the exact same binary
834 : value is recreated. This is true for IEEE floating point
835 : by design, and also happens to work for all other modern
836 : hardware. */
837 : precision = 17;
838 : format_code = 'g';
839 : break;
840 : default:
841 : PyErr_BadInternalCall();
842 : return NULL;
843 : }
844 :
845 : /* Here's a quick-and-dirty calculation to figure out how big a buffer
846 : we need. In general, for a finite float we need:
847 :
848 : 1 byte for each digit of the decimal significand, and
849 :
850 : 1 for a possible sign
851 : 1 for a possible decimal point
852 : 2 for a possible [eE][+-]
853 : 1 for each digit of the exponent; if we allow 19 digits
854 : total then we're safe up to exponents of 2**63.
855 : 1 for the trailing nul byte
856 :
857 : This gives a total of 24 + the number of digits in the significand,
858 : and the number of digits in the significand is:
859 :
860 : for 'g' format: at most precision, except possibly
861 : when precision == 0, when it's 1.
862 : for 'e' format: precision+1
863 : for 'f' format: precision digits after the point, at least 1
864 : before. To figure out how many digits appear before the point
865 : we have to examine the size of the number. If fabs(val) < 1.0
866 : then there will be only one digit before the point. If
867 : fabs(val) >= 1.0, then there are at most
868 :
869 : 1+floor(log10(ceiling(fabs(val))))
870 :
871 : digits before the point (where the 'ceiling' allows for the
872 : possibility that the rounding rounds the integer part of val
873 : up). A safe upper bound for the above quantity is
874 : 1+floor(exp/3), where exp is the unique integer such that 0.5
875 : <= fabs(val)/2**exp < 1.0. This exp can be obtained from
876 : frexp.
877 :
878 : So we allow room for precision+1 digits for all formats, plus an
879 : extra floor(exp/3) digits for 'f' format.
880 :
881 : */
882 :
883 : if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
884 : /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
885 : bufsize = 5;
886 : else {
887 : bufsize = 25 + precision;
888 : if (format_code == 'f' && fabs(val) >= 1.0) {
889 : frexp(val, &exp);
890 : bufsize += exp/3;
891 : }
892 : }
893 :
894 : buf = PyMem_Malloc(bufsize);
895 : if (buf == NULL) {
896 : PyErr_NoMemory();
897 : return NULL;
898 : }
899 :
900 : /* Handle nan and inf. */
901 : if (Py_IS_NAN(val)) {
902 : strcpy(buf, "nan");
903 : t = Py_DTST_NAN;
904 : } else if (Py_IS_INFINITY(val)) {
905 : if (copysign(1., val) == 1.)
906 : strcpy(buf, "inf");
907 : else
908 : strcpy(buf, "-inf");
909 : t = Py_DTST_INFINITE;
910 : } else {
911 : t = Py_DTST_FINITE;
912 : if (flags & Py_DTSF_ADD_DOT_0)
913 : format_code = 'Z';
914 :
915 : PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
916 : (flags & Py_DTSF_ALT ? "#" : ""), precision,
917 : format_code);
918 : _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
919 :
920 : if (flags & Py_DTSF_NO_NEG_0 && buf[0] == '-') {
921 : char *buf2 = buf + 1;
922 : while (*buf2 == '0' || *buf2 == '.') {
923 : ++buf2;
924 : }
925 : if (*buf2 == 0 || *buf2 == 'e') {
926 : size_t len = buf2 - buf + strlen(buf2);
927 : assert(buf[len] == 0);
928 : memmove(buf, buf+1, len);
929 : }
930 : }
931 : }
932 :
933 : /* Add sign when requested. It's convenient (esp. when formatting
934 : complex numbers) to include a sign even for inf and nan. */
935 : if (flags & Py_DTSF_SIGN && buf[0] != '-') {
936 : size_t len = strlen(buf);
937 : /* the bufsize calculations above should ensure that we've got
938 : space to add a sign */
939 : assert((size_t)bufsize >= len+2);
940 : memmove(buf+1, buf, len+1);
941 : buf[0] = '+';
942 : }
943 : if (upper) {
944 : /* Convert to upper case. */
945 : char *p1;
946 : for (p1 = buf; *p1; p1++)
947 : *p1 = Py_TOUPPER(*p1);
948 : }
949 :
950 : if (type)
951 : *type = t;
952 : return buf;
953 : }
954 :
955 : #else // _PY_SHORT_FLOAT_REPR == 1
956 :
957 : /* _Py_dg_dtoa is available. */
958 :
959 : /* I'm using a lookup table here so that I don't have to invent a non-locale
960 : specific way to convert to uppercase */
961 : #define OFS_INF 0
962 : #define OFS_NAN 1
963 : #define OFS_E 2
964 :
965 : /* The lengths of these are known to the code below, so don't change them */
966 : static const char * const lc_float_strings[] = {
967 : "inf",
968 : "nan",
969 : "e",
970 : };
971 : static const char * const uc_float_strings[] = {
972 : "INF",
973 : "NAN",
974 : "E",
975 : };
976 :
977 :
978 : /* Convert a double d to a string, and return a PyMem_Malloc'd block of
979 : memory contain the resulting string.
980 :
981 : Arguments:
982 : d is the double to be converted
983 : format_code is one of 'e', 'f', 'g', 'r'. 'e', 'f' and 'g'
984 : correspond to '%e', '%f' and '%g'; 'r' corresponds to repr.
985 : mode is one of '0', '2' or '3', and is completely determined by
986 : format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
987 : precision is the desired precision
988 : always_add_sign is nonzero if a '+' sign should be included for positive
989 : numbers
990 : add_dot_0_if_integer is nonzero if integers in non-exponential form
991 : should have ".0" added. Only applies to format codes 'r' and 'g'.
992 : use_alt_formatting is nonzero if alternative formatting should be
993 : used. Only applies to format codes 'e', 'f' and 'g'. For code 'g',
994 : at most one of use_alt_formatting and add_dot_0_if_integer should
995 : be nonzero.
996 : type, if non-NULL, will be set to one of these constants to identify
997 : the type of the 'd' argument:
998 : Py_DTST_FINITE
999 : Py_DTST_INFINITE
1000 : Py_DTST_NAN
1001 :
1002 : Returns a PyMem_Malloc'd block of memory containing the resulting string,
1003 : or NULL on error. If NULL is returned, the Python error has been set.
1004 : */
1005 :
1006 : static char *
1007 3554970 : format_float_short(double d, char format_code,
1008 : int mode, int precision,
1009 : int always_add_sign, int add_dot_0_if_integer,
1010 : int use_alt_formatting, int no_negative_zero,
1011 : const char * const *float_strings, int *type)
1012 : {
1013 3554970 : char *buf = NULL;
1014 3554970 : char *p = NULL;
1015 3554970 : Py_ssize_t bufsize = 0;
1016 : char *digits, *digits_end;
1017 3554970 : int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1018 : Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1019 : _Py_SET_53BIT_PRECISION_HEADER;
1020 :
1021 : /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1022 : Must be matched by a call to _Py_dg_freedtoa. */
1023 3554970 : _Py_SET_53BIT_PRECISION_START;
1024 3554970 : digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1025 : &digits_end);
1026 3554970 : _Py_SET_53BIT_PRECISION_END;
1027 :
1028 3554970 : decpt = (Py_ssize_t)decpt_as_int;
1029 3554970 : if (digits == NULL) {
1030 : /* The only failure mode is no memory. */
1031 0 : PyErr_NoMemory();
1032 0 : goto exit;
1033 : }
1034 3554970 : assert(digits_end != NULL && digits_end >= digits);
1035 3554970 : digits_len = digits_end - digits;
1036 :
1037 3554970 : if (no_negative_zero && sign == 1 &&
1038 25 : (digits_len == 0 || (digits_len == 1 && digits[0] == '0'))) {
1039 22 : sign = 0;
1040 : }
1041 :
1042 3554970 : if (digits_len && !Py_ISDIGIT(digits[0])) {
1043 : /* Infinities and nans here; adapt Gay's output,
1044 : so convert Infinity to inf and NaN to nan, and
1045 : ignore sign of nan. Then return. */
1046 :
1047 : /* ignore the actual sign of a nan */
1048 15435 : if (digits[0] == 'n' || digits[0] == 'N')
1049 1535 : sign = 0;
1050 :
1051 : /* We only need 5 bytes to hold the result "+inf\0" . */
1052 15435 : bufsize = 5; /* Used later in an assert. */
1053 15435 : buf = (char *)PyMem_Malloc(bufsize);
1054 15435 : if (buf == NULL) {
1055 0 : PyErr_NoMemory();
1056 0 : goto exit;
1057 : }
1058 15435 : p = buf;
1059 :
1060 15435 : if (sign == 1) {
1061 6283 : *p++ = '-';
1062 : }
1063 9152 : else if (always_add_sign) {
1064 192 : *p++ = '+';
1065 : }
1066 15435 : if (digits[0] == 'i' || digits[0] == 'I') {
1067 13900 : strncpy(p, float_strings[OFS_INF], 3);
1068 13900 : p += 3;
1069 :
1070 13900 : if (type)
1071 84 : *type = Py_DTST_INFINITE;
1072 : }
1073 1535 : else if (digits[0] == 'n' || digits[0] == 'N') {
1074 1535 : strncpy(p, float_strings[OFS_NAN], 3);
1075 1535 : p += 3;
1076 :
1077 1535 : if (type)
1078 84 : *type = Py_DTST_NAN;
1079 : }
1080 : else {
1081 : /* shouldn't get here: Gay's code should always return
1082 : something starting with a digit, an 'I', or 'N' */
1083 0 : Py_UNREACHABLE();
1084 : }
1085 15435 : goto exit;
1086 : }
1087 :
1088 : /* The result must be finite (not inf or nan). */
1089 3539530 : if (type)
1090 3349940 : *type = Py_DTST_FINITE;
1091 :
1092 :
1093 : /* We got digits back, format them. We may need to pad 'digits'
1094 : either on the left or right (or both) with extra zeros, so in
1095 : general the resulting string has the form
1096 :
1097 : [<sign>]<zeros><digits><zeros>[<exponent>]
1098 :
1099 : where either of the <zeros> pieces could be empty, and there's a
1100 : decimal point that could appear either in <digits> or in the
1101 : leading or trailing <zeros>.
1102 :
1103 : Imagine an infinite 'virtual' string vdigits, consisting of the
1104 : string 'digits' (starting at index 0) padded on both the left and
1105 : right with infinite strings of zeros. We want to output a slice
1106 :
1107 : vdigits[vdigits_start : vdigits_end]
1108 :
1109 : of this virtual string. Thus if vdigits_start < 0 then we'll end
1110 : up producing some leading zeros; if vdigits_end > digits_len there
1111 : will be trailing zeros in the output. The next section of code
1112 : determines whether to use an exponent or not, figures out the
1113 : position 'decpt' of the decimal point, and computes 'vdigits_start'
1114 : and 'vdigits_end'. */
1115 3539530 : vdigits_end = digits_len;
1116 3539530 : switch (format_code) {
1117 4303 : case 'e':
1118 4303 : use_exp = 1;
1119 4303 : vdigits_end = precision;
1120 4303 : break;
1121 32689 : case 'f':
1122 32689 : vdigits_end = decpt + precision;
1123 32689 : break;
1124 3346880 : case 'g':
1125 6693330 : if (decpt <= -4 || decpt >
1126 3346450 : (add_dot_0_if_integer ? precision-1 : precision))
1127 2850 : use_exp = 1;
1128 3346880 : if (use_alt_formatting)
1129 1878 : vdigits_end = precision;
1130 3346880 : break;
1131 155663 : case 'r':
1132 : /* convert to exponential format at 1e16. We used to convert
1133 : at 1e17, but that gives odd-looking results for some values
1134 : when a 16-digit 'shortest' repr is padded with bogus zeros.
1135 : For example, repr(2e16+8) would give 20000000000000010.0;
1136 : the true value is 20000000000000008.0. */
1137 155663 : if (decpt <= -4 || decpt > 16)
1138 89092 : use_exp = 1;
1139 155663 : break;
1140 0 : default:
1141 0 : PyErr_BadInternalCall();
1142 0 : goto exit;
1143 : }
1144 :
1145 : /* if using an exponent, reset decimal point position to 1 and adjust
1146 : exponent accordingly.*/
1147 3539530 : if (use_exp) {
1148 96245 : exp = (int)decpt - 1;
1149 96245 : decpt = 1;
1150 : }
1151 : /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1152 : decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1153 3539530 : vdigits_start = decpt <= 0 ? decpt-1 : 0;
1154 3539530 : if (!use_exp && add_dot_0_if_integer)
1155 64723 : vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1156 : else
1157 3474810 : vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1158 :
1159 : /* double check inequalities */
1160 3539530 : assert(vdigits_start <= 0 &&
1161 : 0 <= digits_len &&
1162 : digits_len <= vdigits_end);
1163 : /* decimal point should be in (vdigits_start, vdigits_end] */
1164 3539530 : assert(vdigits_start < decpt && decpt <= vdigits_end);
1165 :
1166 : /* Compute an upper bound how much memory we need. This might be a few
1167 : chars too long, but no big deal. */
1168 3539530 : bufsize =
1169 : /* sign, decimal point and trailing 0 byte */
1170 3539530 : 3 +
1171 :
1172 : /* total digit count (including zero padding on both sides) */
1173 3539530 : (vdigits_end - vdigits_start) +
1174 :
1175 : /* exponent "e+100", max 3 numerical digits */
1176 3539530 : (use_exp ? 5 : 0);
1177 :
1178 : /* Now allocate the memory and initialize p to point to the start of
1179 : it. */
1180 3539530 : buf = (char *)PyMem_Malloc(bufsize);
1181 3539530 : if (buf == NULL) {
1182 0 : PyErr_NoMemory();
1183 0 : goto exit;
1184 : }
1185 3539530 : p = buf;
1186 :
1187 : /* Add a negative sign if negative, and a plus sign if non-negative
1188 : and always_add_sign is true. */
1189 3539530 : if (sign == 1)
1190 3408270 : *p++ = '-';
1191 131266 : else if (always_add_sign)
1192 506 : *p++ = '+';
1193 :
1194 : /* note that exactly one of the three 'if' conditions is true,
1195 : so we include exactly one decimal point */
1196 : /* Zero padding on left of digit string */
1197 3539530 : if (decpt <= 0) {
1198 35878 : memset(p, '0', decpt-vdigits_start);
1199 35878 : p += decpt - vdigits_start;
1200 35878 : *p++ = '.';
1201 35878 : memset(p, '0', 0-decpt);
1202 35878 : p += 0-decpt;
1203 : }
1204 : else {
1205 3503660 : memset(p, '0', 0-vdigits_start);
1206 3503660 : p += 0 - vdigits_start;
1207 : }
1208 :
1209 : /* Digits, with included decimal point */
1210 3539530 : if (0 < decpt && decpt <= digits_len) {
1211 3497050 : strncpy(p, digits, decpt-0);
1212 3497050 : p += decpt-0;
1213 3497050 : *p++ = '.';
1214 3497050 : strncpy(p, digits+decpt, digits_len-decpt);
1215 3497050 : p += digits_len-decpt;
1216 : }
1217 : else {
1218 42488 : strncpy(p, digits, digits_len);
1219 42488 : p += digits_len;
1220 : }
1221 :
1222 : /* And zeros on the right */
1223 3539530 : if (digits_len < decpt) {
1224 6610 : memset(p, '0', decpt-digits_len);
1225 6610 : p += decpt-digits_len;
1226 6610 : *p++ = '.';
1227 6610 : memset(p, '0', vdigits_end-decpt);
1228 6610 : p += vdigits_end-decpt;
1229 : }
1230 : else {
1231 3532920 : memset(p, '0', vdigits_end-digits_len);
1232 3532920 : p += vdigits_end-digits_len;
1233 : }
1234 :
1235 : /* Delete a trailing decimal pt unless using alternative formatting. */
1236 3539530 : if (p[-1] == '.' && !use_alt_formatting)
1237 3349880 : p--;
1238 :
1239 : /* Now that we've done zero padding, add an exponent if needed. */
1240 3539530 : if (use_exp) {
1241 96245 : *p++ = float_strings[OFS_E][0];
1242 96245 : exp_len = sprintf(p, "%+.02d", exp);
1243 96245 : p += exp_len;
1244 : }
1245 3443290 : exit:
1246 3554970 : if (buf) {
1247 3554970 : *p = '\0';
1248 : /* It's too late if this fails, as we've already stepped on
1249 : memory that isn't ours. But it's an okay debugging test. */
1250 3554970 : assert(p-buf < bufsize);
1251 : }
1252 3554970 : if (digits)
1253 3554970 : _Py_dg_freedtoa(digits);
1254 :
1255 3554970 : return buf;
1256 : }
1257 :
1258 :
1259 3554970 : char * PyOS_double_to_string(double val,
1260 : char format_code,
1261 : int precision,
1262 : int flags,
1263 : int *type)
1264 : {
1265 3554970 : const char * const *float_strings = lc_float_strings;
1266 : int mode;
1267 :
1268 : /* Validate format_code, and map upper and lower case. Compute the
1269 : mode and make any adjustments as needed. */
1270 3554970 : switch (format_code) {
1271 : /* exponent */
1272 3622 : case 'E':
1273 3622 : float_strings = uc_float_strings;
1274 3622 : format_code = 'e';
1275 : /* Fall through. */
1276 7983 : case 'e':
1277 7983 : mode = 2;
1278 7983 : precision++;
1279 7983 : break;
1280 :
1281 : /* fixed */
1282 3641 : case 'F':
1283 3641 : float_strings = uc_float_strings;
1284 3641 : format_code = 'f';
1285 : /* Fall through. */
1286 36397 : case 'f':
1287 36397 : mode = 3;
1288 36397 : break;
1289 :
1290 : /* general */
1291 3619 : case 'G':
1292 3619 : float_strings = uc_float_strings;
1293 3619 : format_code = 'g';
1294 : /* Fall through. */
1295 3350560 : case 'g':
1296 3350560 : mode = 2;
1297 : /* precision 0 makes no sense for 'g' format; interpret as 1 */
1298 3350560 : if (precision == 0)
1299 3126 : precision = 1;
1300 3350560 : break;
1301 :
1302 : /* repr format */
1303 160030 : case 'r':
1304 160030 : mode = 0;
1305 : /* Supplied precision is unused, must be 0. */
1306 160030 : if (precision != 0) {
1307 0 : PyErr_BadInternalCall();
1308 0 : return NULL;
1309 : }
1310 160030 : break;
1311 :
1312 0 : default:
1313 0 : PyErr_BadInternalCall();
1314 0 : return NULL;
1315 : }
1316 :
1317 3554970 : return format_float_short(val, format_code, mode, precision,
1318 : flags & Py_DTSF_SIGN,
1319 : flags & Py_DTSF_ADD_DOT_0,
1320 : flags & Py_DTSF_ALT,
1321 : flags & Py_DTSF_NO_NEG_0,
1322 : float_strings, type);
1323 : }
1324 : #endif // _PY_SHORT_FLOAT_REPR == 1
|