LCOV - code coverage report
Current view: top level - Python - formatter_unicode.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 650 700 92.9 %
Date: 2022-07-07 18:19:46 Functions: 23 23 100.0 %

          Line data    Source code
       1             : /* implements the unicode (as opposed to string) version of the
       2             :    built-in formatters for string, int, float.  that is, the versions
       3             :    of int.__float__, etc., that take and return unicode objects */
       4             : 
       5             : #include "Python.h"
       6             : #include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
       7             : #include "pycore_long.h"          // _PyLong_FormatWriter()
       8             : #include <locale.h>
       9             : 
      10             : /* Raises an exception about an unknown presentation type for this
      11             :  * type. */
      12             : 
      13             : static void
      14         411 : unknown_presentation_type(Py_UCS4 presentation_type,
      15             :                           const char* type_name)
      16             : {
      17             :     /* %c might be out-of-range, hence the two cases. */
      18         411 :     if (presentation_type > 32 && presentation_type < 128)
      19         411 :         PyErr_Format(PyExc_ValueError,
      20             :                      "Unknown format code '%c' "
      21             :                      "for object of type '%.200s'",
      22         411 :                      (char)presentation_type,
      23             :                      type_name);
      24             :     else
      25           0 :         PyErr_Format(PyExc_ValueError,
      26             :                      "Unknown format code '\\x%x' "
      27             :                      "for object of type '%.200s'",
      28             :                      (unsigned int)presentation_type,
      29             :                      type_name);
      30         411 : }
      31             : 
      32             : static void
      33          19 : invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
      34             : {
      35          19 :     assert(specifier == ',' || specifier == '_');
      36          19 :     if (presentation_type > 32 && presentation_type < 128)
      37          19 :         PyErr_Format(PyExc_ValueError,
      38             :                      "Cannot specify '%c' with '%c'.",
      39          19 :                      specifier, (char)presentation_type);
      40             :     else
      41           0 :         PyErr_Format(PyExc_ValueError,
      42             :                      "Cannot specify '%c' with '\\x%x'.",
      43             :                      specifier, (unsigned int)presentation_type);
      44          19 : }
      45             : 
      46             : static void
      47           8 : invalid_comma_and_underscore(void)
      48             : {
      49           8 :     PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
      50           8 : }
      51             : 
      52             : /*
      53             :     get_integer consumes 0 or more decimal digit characters from an
      54             :     input string, updates *result with the corresponding positive
      55             :     integer, and returns the number of digits consumed.
      56             : 
      57             :     returns -1 on error.
      58             : */
      59             : static int
      60     7204230 : get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
      61             :                   Py_ssize_t *result)
      62             : {
      63     7204230 :     Py_ssize_t accumulator, digitval, pos = *ppos;
      64             :     int numdigits;
      65     7204230 :     int kind = PyUnicode_KIND(str);
      66     7204230 :     const void *data = PyUnicode_DATA(str);
      67             : 
      68     7204230 :     accumulator = numdigits = 0;
      69    14386400 :     for (; pos < end; pos++, numdigits++) {
      70    14377700 :         digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
      71    14377700 :         if (digitval < 0)
      72     7195580 :             break;
      73             :         /*
      74             :            Detect possible overflow before it happens:
      75             : 
      76             :               accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
      77             :               accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
      78             :         */
      79     7182170 :         if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
      80           7 :             PyErr_Format(PyExc_ValueError,
      81             :                          "Too many decimal digits in format string");
      82           7 :             *ppos = pos;
      83           7 :             return -1;
      84             :         }
      85     7182160 :         accumulator = accumulator * 10 + digitval;
      86             :     }
      87     7204220 :     *ppos = pos;
      88     7204220 :     *result = accumulator;
      89     7204220 :     return numdigits;
      90             : }
      91             : 
      92             : /************************************************************************/
      93             : /*********** standard format specifier parsing **************************/
      94             : /************************************************************************/
      95             : 
      96             : /* returns true if this character is a specifier alignment token */
      97             : Py_LOCAL_INLINE(int)
      98     7682440 : is_alignment_token(Py_UCS4 c)
      99             : {
     100     7682440 :     switch (c) {
     101        1595 :     case '<': case '>': case '=': case '^':
     102        1595 :         return 1;
     103     7680850 :     default:
     104     7680850 :         return 0;
     105             :     }
     106             : }
     107             : 
     108             : /* returns true if this character is a sign element */
     109             : Py_LOCAL_INLINE(int)
     110     3851700 : is_sign_element(Py_UCS4 c)
     111             : {
     112     3851700 :     switch (c) {
     113       18670 :     case ' ': case '+': case '-':
     114       18670 :         return 1;
     115     3833040 :     default:
     116     3833040 :         return 0;
     117             :     }
     118             : }
     119             : 
     120             : /* Locale type codes. LT_NO_LOCALE must be zero. */
     121             : enum LocaleType {
     122             :     LT_NO_LOCALE = 0,
     123             :     LT_DEFAULT_LOCALE = ',',
     124             :     LT_UNDERSCORE_LOCALE = '_',
     125             :     LT_UNDER_FOUR_LOCALE,
     126             :     LT_CURRENT_LOCALE
     127             : };
     128             : 
     129             : typedef struct {
     130             :     Py_UCS4 fill_char;
     131             :     Py_UCS4 align;
     132             :     int alternate;
     133             :     int no_neg_0;
     134             :     Py_UCS4 sign;
     135             :     Py_ssize_t width;
     136             :     enum LocaleType thousands_separators;
     137             :     Py_ssize_t precision;
     138             :     Py_UCS4 type;
     139             : } InternalFormatSpec;
     140             : 
     141             : 
     142             : /*
     143             :   ptr points to the start of the format_spec, end points just past its end.
     144             :   fills in format with the parsed information.
     145             :   returns 1 on success, 0 on failure.
     146             :   if failure, sets the exception
     147             : */
     148             : static int
     149     3851710 : parse_internal_render_format_spec(PyObject *obj,
     150             :                                   PyObject *format_spec,
     151             :                                   Py_ssize_t start, Py_ssize_t end,
     152             :                                   InternalFormatSpec *format,
     153             :                                   char default_type,
     154             :                                   char default_align)
     155             : {
     156     3851710 :     Py_ssize_t pos = start;
     157     3851710 :     int kind = PyUnicode_KIND(format_spec);
     158     3851710 :     const void *data = PyUnicode_DATA(format_spec);
     159             :     /* end-pos is used throughout this code to specify the length of
     160             :        the input string */
     161             : #define READ_spec(index) PyUnicode_READ(kind, data, index)
     162             : 
     163             :     Py_ssize_t consumed;
     164     3851710 :     int align_specified = 0;
     165     3851710 :     int fill_char_specified = 0;
     166             : 
     167     3851710 :     format->fill_char = ' ';
     168     3851710 :     format->align = default_align;
     169     3851710 :     format->alternate = 0;
     170     3851710 :     format->no_neg_0 = 0;
     171     3851710 :     format->sign = '\0';
     172     3851710 :     format->width = -1;
     173     3851710 :     format->thousands_separators = LT_NO_LOCALE;
     174     3851710 :     format->precision = -1;
     175     3851710 :     format->type = default_type;
     176             : 
     177             :     /* If the second char is an alignment token,
     178             :        then parse the fill char */
     179     3851710 :     if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
     180          74 :         format->align = READ_spec(pos+1);
     181          74 :         format->fill_char = READ_spec(pos);
     182          74 :         fill_char_specified = 1;
     183          74 :         align_specified = 1;
     184          74 :         pos += 2;
     185             :     }
     186     3851640 :     else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
     187        1521 :         format->align = READ_spec(pos);
     188        1521 :         align_specified = 1;
     189        1521 :         ++pos;
     190             :     }
     191             : 
     192             :     /* Parse the various sign options */
     193     3851710 :     if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
     194       18670 :         format->sign = READ_spec(pos);
     195       18670 :         ++pos;
     196             :     }
     197             : 
     198             :     /* If the next character is z, request coercion of negative 0.
     199             :        Applies only to floats. */
     200     3851710 :     if (end-pos >= 1 && READ_spec(pos) == 'z') {
     201          50 :         format->no_neg_0 = 1;
     202          50 :         ++pos;
     203             :     }
     204             : 
     205             :     /* If the next character is #, we're in alternate mode.  This only
     206             :        applies to integers. */
     207     3851710 :     if (end-pos >= 1 && READ_spec(pos) == '#') {
     208         194 :         format->alternate = 1;
     209         194 :         ++pos;
     210             :     }
     211             : 
     212             :     /* The special case for 0-padding (backwards compat) */
     213     3851710 :     if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
     214      455219 :         format->fill_char = '0';
     215      455219 :         if (!align_specified && default_align == '>') {
     216      455214 :             format->align = '=';
     217             :         }
     218      455219 :         ++pos;
     219             :     }
     220             : 
     221     3851710 :     consumed = get_integer(format_spec, &pos, end, &format->width);
     222     3851710 :     if (consumed == -1)
     223             :         /* Overflow error. Exception already set. */
     224           3 :         return 0;
     225             : 
     226             :     /* If consumed is 0, we didn't consume any characters for the
     227             :        width. In that case, reset the width to -1, because
     228             :        get_integer() will have set it to zero. -1 is how we record
     229             :        that the width wasn't specified. */
     230     3851710 :     if (consumed == 0)
     231     3388970 :         format->width = -1;
     232             : 
     233             :     /* Comma signifies add thousands separators */
     234     3851710 :     if (end-pos && READ_spec(pos) == ',') {
     235          56 :         format->thousands_separators = LT_DEFAULT_LOCALE;
     236          56 :         ++pos;
     237             :     }
     238             :     /* Underscore signifies add thousands separators */
     239     3851710 :     if (end-pos && READ_spec(pos) == '_') {
     240         102 :         if (format->thousands_separators != LT_NO_LOCALE) {
     241           4 :             invalid_comma_and_underscore();
     242           4 :             return 0;
     243             :         }
     244          98 :         format->thousands_separators = LT_UNDERSCORE_LOCALE;
     245          98 :         ++pos;
     246             :     }
     247     3851700 :     if (end-pos && READ_spec(pos) == ',') {
     248           6 :         if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
     249           4 :             invalid_comma_and_underscore();
     250           4 :             return 0;
     251             :         }
     252             :     }
     253             : 
     254             :     /* Parse field precision */
     255     3851700 :     if (end-pos && READ_spec(pos) == '.') {
     256     3352520 :         ++pos;
     257             : 
     258     3352520 :         consumed = get_integer(format_spec, &pos, end, &format->precision);
     259     3352520 :         if (consumed == -1)
     260             :             /* Overflow error. Exception already set. */
     261           4 :             return 0;
     262             : 
     263             :         /* Not having a precision after a dot is an error. */
     264     3352510 :         if (consumed == 0) {
     265           0 :             PyErr_Format(PyExc_ValueError,
     266             :                          "Format specifier missing precision");
     267           0 :             return 0;
     268             :         }
     269             : 
     270             :     }
     271             : 
     272             :     /* Finally, parse the type field. */
     273             : 
     274     3851700 :     if (end-pos > 1) {
     275             :         /* More than one char remains, so this is an invalid format
     276             :            specifier. */
     277             :         /* Create a temporary object that contains the format spec we're
     278             :            operating on.  It's format_spec[start:end] (in Python syntax). */
     279          15 :         PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
     280          15 :                                          (char*)data + kind*start,
     281             :                                          end-start);
     282          15 :         if (actual_format_spec != NULL) {
     283          15 :             PyErr_Format(PyExc_ValueError,
     284             :                 "Invalid format specifier '%U' for object of type '%.200s'",
     285          15 :                 actual_format_spec, Py_TYPE(obj)->tp_name);
     286          15 :             Py_DECREF(actual_format_spec);
     287             :         }
     288          15 :         return 0;
     289             :     }
     290             : 
     291     3851680 :     if (end-pos == 1) {
     292     3843030 :         format->type = READ_spec(pos);
     293     3843030 :         ++pos;
     294             :     }
     295             : 
     296             :     /* Do as much validating as we can, just by looking at the format
     297             :        specifier.  Do not take into account what type of formatting
     298             :        we're doing (int, float, string). */
     299             : 
     300     3851680 :     if (format->thousands_separators) {
     301         146 :         switch (format->type) {
     302         107 :         case 'd':
     303             :         case 'e':
     304             :         case 'f':
     305             :         case 'g':
     306             :         case 'E':
     307             :         case 'G':
     308             :         case '%':
     309             :         case 'F':
     310             :         case '\0':
     311             :             /* These are allowed. See PEP 378.*/
     312         107 :             break;
     313          27 :         case 'b':
     314             :         case 'o':
     315             :         case 'x':
     316             :         case 'X':
     317             :             /* Underscores are allowed in bin/oct/hex. See PEP 515. */
     318          27 :             if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
     319             :                 /* Every four digits, not every three, in bin/oct/hex. */
     320          20 :                 format->thousands_separators = LT_UNDER_FOUR_LOCALE;
     321          20 :                 break;
     322             :             }
     323             :             /* fall through */
     324             :         default:
     325          19 :             invalid_thousands_separator_type(format->thousands_separators, format->type);
     326          19 :             return 0;
     327             :         }
     328     3851530 :     }
     329             : 
     330     3851660 :     assert (format->align <= 127);
     331     3851660 :     assert (format->sign <= 127);
     332     3851660 :     return 1;
     333             : }
     334             : 
     335             : /* Calculate the padding needed. */
     336             : static void
     337        7200 : calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
     338             :              Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
     339             :              Py_ssize_t *n_total)
     340             : {
     341        7200 :     if (width >= 0) {
     342        6422 :         if (nchars > width)
     343         195 :             *n_total = nchars;
     344             :         else
     345        6227 :             *n_total = width;
     346             :     }
     347             :     else {
     348             :         /* not specified, use all of the chars and no more */
     349         778 :         *n_total = nchars;
     350             :     }
     351             : 
     352             :     /* Figure out how much leading space we need, based on the
     353             :        aligning */
     354        7200 :     if (align == '>')
     355        1290 :         *n_lpadding = *n_total - nchars;
     356        5910 :     else if (align == '^')
     357          48 :         *n_lpadding = (*n_total - nchars) / 2;
     358        5862 :     else if (align == '<' || align == '=')
     359        5862 :         *n_lpadding = 0;
     360             :     else {
     361             :         /* We should never have an unspecified alignment. */
     362           0 :         Py_UNREACHABLE();
     363             :     }
     364             : 
     365        7200 :     *n_rpadding = *n_total - nchars - *n_lpadding;
     366        7200 : }
     367             : 
     368             : /* Do the padding, and return a pointer to where the caller-supplied
     369             :    content goes. */
     370             : static int
     371        7200 : fill_padding(_PyUnicodeWriter *writer,
     372             :              Py_ssize_t nchars,
     373             :              Py_UCS4 fill_char, Py_ssize_t n_lpadding,
     374             :              Py_ssize_t n_rpadding)
     375             : {
     376             :     Py_ssize_t pos;
     377             : 
     378             :     /* Pad on left. */
     379        7200 :     if (n_lpadding) {
     380        1177 :         pos = writer->pos;
     381        1177 :         _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
     382             :     }
     383             : 
     384             :     /* Pad on right. */
     385        7200 :     if (n_rpadding) {
     386        5095 :         pos = writer->pos + nchars + n_lpadding;
     387        5095 :         _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
     388             :     }
     389             : 
     390             :     /* Pointer to the user content. */
     391        7200 :     writer->pos += n_lpadding;
     392        7200 :     return 0;
     393             : }
     394             : 
     395             : /************************************************************************/
     396             : /*********** common routines for numeric formatting *********************/
     397             : /************************************************************************/
     398             : 
     399             : /* Locale info needed for formatting integers and the part of floats
     400             :    before and including the decimal. Note that locales only support
     401             :    8-bit chars, not unicode. */
     402             : typedef struct {
     403             :     PyObject *decimal_point;
     404             :     PyObject *thousands_sep;
     405             :     const char *grouping;
     406             :     char *grouping_buffer;
     407             : } LocaleInfo;
     408             : 
     409             : #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
     410             : 
     411             : /* describes the layout for an integer, see the comment in
     412             :    calc_number_widths() for details */
     413             : typedef struct {
     414             :     Py_ssize_t n_lpadding;
     415             :     Py_ssize_t n_prefix;
     416             :     Py_ssize_t n_spadding;
     417             :     Py_ssize_t n_rpadding;
     418             :     char sign;
     419             :     Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
     420             :     Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
     421             :                                     any grouping chars. */
     422             :     Py_ssize_t n_decimal;   /* 0 if only an integer */
     423             :     Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
     424             :                                excluding the decimal itself, if
     425             :                                present. */
     426             : 
     427             :     /* These 2 are not the widths of fields, but are needed by
     428             :        STRINGLIB_GROUPING. */
     429             :     Py_ssize_t n_digits;    /* The number of digits before a decimal
     430             :                                or exponent. */
     431             :     Py_ssize_t n_min_width; /* The min_width we used when we computed
     432             :                                the n_grouped_digits width. */
     433             : } NumberFieldWidths;
     434             : 
     435             : 
     436             : /* Given a number of the form:
     437             :    digits[remainder]
     438             :    where ptr points to the start and end points to the end, find where
     439             :     the integer part ends. This could be a decimal, an exponent, both,
     440             :     or neither.
     441             :    If a decimal point is present, set *has_decimal and increment
     442             :     remainder beyond it.
     443             :    Results are undefined (but shouldn't crash) for improperly
     444             :     formatted strings.
     445             : */
     446             : static void
     447         467 : parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
     448             :              Py_ssize_t *n_remainder, int *has_decimal)
     449             : {
     450             :     Py_ssize_t remainder;
     451         467 :     int kind = PyUnicode_KIND(s);
     452         467 :     const void *data = PyUnicode_DATA(s);
     453             : 
     454        1225 :     while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
     455         758 :         ++pos;
     456         467 :     remainder = pos;
     457             : 
     458             :     /* Does remainder start with a decimal point? */
     459         467 :     *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
     460             : 
     461             :     /* Skip the decimal point. */
     462         467 :     if (*has_decimal)
     463         280 :         remainder++;
     464             : 
     465         467 :     *n_remainder = end - remainder;
     466         467 : }
     467             : 
     468             : /* not all fields of format are used.  for example, precision is
     469             :    unused.  should this take discrete params in order to be more clear
     470             :    about what it does?  or is passing a single format parameter easier
     471             :    and more efficient enough to justify a little obfuscation?
     472             :    Return -1 on error. */
     473             : static Py_ssize_t
     474      474734 : calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
     475             :                    Py_UCS4 sign_char, Py_ssize_t n_start,
     476             :                    Py_ssize_t n_end, Py_ssize_t n_remainder,
     477             :                    int has_decimal, const LocaleInfo *locale,
     478             :                    const InternalFormatSpec *format, Py_UCS4 *maxchar)
     479             : {
     480             :     Py_ssize_t n_non_digit_non_padding;
     481             :     Py_ssize_t n_padding;
     482             : 
     483      474734 :     spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
     484      474734 :     spec->n_lpadding = 0;
     485      474734 :     spec->n_prefix = n_prefix;
     486      474734 :     spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
     487      474734 :     spec->n_remainder = n_remainder;
     488      474734 :     spec->n_spadding = 0;
     489      474734 :     spec->n_rpadding = 0;
     490      474734 :     spec->sign = '\0';
     491      474734 :     spec->n_sign = 0;
     492             : 
     493             :     /* the output will look like:
     494             :        |                                                                                         |
     495             :        | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
     496             :        |                                                                                         |
     497             : 
     498             :        sign is computed from format->sign and the actual
     499             :        sign of the number
     500             : 
     501             :        prefix is given (it's for the '0x' prefix)
     502             : 
     503             :        digits is already known
     504             : 
     505             :        the total width is either given, or computed from the
     506             :        actual digits
     507             : 
     508             :        only one of lpadding, spadding, and rpadding can be non-zero,
     509             :        and it's calculated from the width and other fields
     510             :     */
     511             : 
     512             :     /* compute the various parts we're going to write */
     513      474734 :     switch (format->sign) {
     514       18631 :     case '+':
     515             :         /* always put a + or - */
     516       18631 :         spec->n_sign = 1;
     517       18631 :         spec->sign = (sign_char == '-' ? '-' : '+');
     518       18631 :         break;
     519          80 :     case ' ':
     520          80 :         spec->n_sign = 1;
     521          80 :         spec->sign = (sign_char == '-' ? '-' : ' ');
     522          80 :         break;
     523      456023 :     default:
     524             :         /* Not specified, or the default (-) */
     525      456023 :         if (sign_char == '-') {
     526          88 :             spec->n_sign = 1;
     527          88 :             spec->sign = '-';
     528             :         }
     529             :     }
     530             : 
     531             :     /* The number of chars used for non-digits and non-padding. */
     532      474734 :     n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
     533      474734 :         spec->n_remainder;
     534             : 
     535             :     /* min_width can go negative, that's okay. format->width == -1 means
     536             :        we don't care. */
     537      474734 :     if (format->fill_char == '0' && format->align == '=')
     538      455205 :         spec->n_min_width = format->width - n_non_digit_non_padding;
     539             :     else
     540       19529 :         spec->n_min_width = 0;
     541             : 
     542      474734 :     if (spec->n_digits == 0)
     543             :         /* This case only occurs when using 'c' formatting, we need
     544             :            to special case it because the grouping code always wants
     545             :            to have at least one character. */
     546         117 :         spec->n_grouped_digits = 0;
     547             :     else {
     548             :         Py_UCS4 grouping_maxchar;
     549      474617 :         spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
     550             :             NULL, 0,
     551             :             NULL, 0, spec->n_digits,
     552             :             spec->n_min_width,
     553             :             locale->grouping, locale->thousands_sep, &grouping_maxchar);
     554      474617 :         if (spec->n_grouped_digits == -1) {
     555           0 :             return -1;
     556             :         }
     557      474617 :         *maxchar = Py_MAX(*maxchar, grouping_maxchar);
     558             :     }
     559             : 
     560             :     /* Given the desired width and the total of digit and non-digit
     561             :        space we consume, see if we need any padding. format->width can
     562             :        be negative (meaning no padding), but this code still works in
     563             :        that case. */
     564      474734 :     n_padding = format->width -
     565      474734 :                         (n_non_digit_non_padding + spec->n_grouped_digits);
     566      474734 :     if (n_padding > 0) {
     567             :         /* Some padding is needed. Determine if it's left, space, or right. */
     568         268 :         switch (format->align) {
     569          39 :         case '<':
     570          39 :             spec->n_rpadding = n_padding;
     571          39 :             break;
     572          27 :         case '^':
     573          27 :             spec->n_lpadding = n_padding / 2;
     574          27 :             spec->n_rpadding = n_padding - spec->n_lpadding;
     575          27 :             break;
     576           8 :         case '=':
     577           8 :             spec->n_spadding = n_padding;
     578           8 :             break;
     579         194 :         case '>':
     580         194 :             spec->n_lpadding = n_padding;
     581         194 :             break;
     582           0 :         default:
     583             :             /* Shouldn't get here */
     584           0 :             Py_UNREACHABLE();
     585             :         }
     586      474466 :     }
     587             : 
     588      474734 :     if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
     589         268 :         *maxchar = Py_MAX(*maxchar, format->fill_char);
     590             : 
     591      474734 :     if (spec->n_decimal)
     592         280 :         *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
     593             : 
     594      474734 :     return spec->n_lpadding + spec->n_sign + spec->n_prefix +
     595      474734 :         spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
     596      474734 :         spec->n_remainder + spec->n_rpadding;
     597             : }
     598             : 
     599             : /* Fill in the digit parts of a number's string representation,
     600             :    as determined in calc_number_widths().
     601             :    Return -1 on error, or 0 on success. */
     602             : static int
     603      474728 : fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
     604             :             PyObject *digits, Py_ssize_t d_start,
     605             :             PyObject *prefix, Py_ssize_t p_start,
     606             :             Py_UCS4 fill_char,
     607             :             LocaleInfo *locale, int toupper)
     608             : {
     609             :     /* Used to keep track of digits, decimal, and remainder. */
     610      474728 :     Py_ssize_t d_pos = d_start;
     611      474728 :     const int kind = writer->kind;
     612      474728 :     const void *data = writer->data;
     613             :     Py_ssize_t r;
     614             : 
     615      474728 :     if (spec->n_lpadding) {
     616         221 :         _PyUnicode_FastFill(writer->buffer,
     617             :                             writer->pos, spec->n_lpadding, fill_char);
     618         221 :         writer->pos += spec->n_lpadding;
     619             :     }
     620      474728 :     if (spec->n_sign == 1) {
     621       18799 :         PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
     622       18799 :         writer->pos++;
     623             :     }
     624      474728 :     if (spec->n_prefix) {
     625          49 :         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
     626             :                                       prefix, p_start,
     627             :                                       spec->n_prefix);
     628          49 :         if (toupper) {
     629             :             Py_ssize_t t;
     630          45 :             for (t = 0; t < spec->n_prefix; t++) {
     631          30 :                 Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
     632          30 :                 c = Py_TOUPPER(c);
     633          30 :                 assert (c <= 127);
     634          30 :                 PyUnicode_WRITE(kind, data, writer->pos + t, c);
     635             :             }
     636             :         }
     637          49 :         writer->pos += spec->n_prefix;
     638             :     }
     639      474728 :     if (spec->n_spadding) {
     640           8 :         _PyUnicode_FastFill(writer->buffer,
     641             :                             writer->pos, spec->n_spadding, fill_char);
     642           8 :         writer->pos += spec->n_spadding;
     643             :     }
     644             : 
     645             :     /* Only for type 'c' special case, it has no digits. */
     646      474728 :     if (spec->n_digits != 0) {
     647             :         /* Fill the digits with InsertThousandsGrouping. */
     648      474611 :         r = _PyUnicode_InsertThousandsGrouping(
     649             :                 writer, spec->n_grouped_digits,
     650             :                 digits, d_pos, spec->n_digits,
     651             :                 spec->n_min_width,
     652             :                 locale->grouping, locale->thousands_sep, NULL);
     653      474611 :         if (r == -1)
     654           0 :             return -1;
     655      474611 :         assert(r == spec->n_grouped_digits);
     656      474611 :         d_pos += spec->n_digits;
     657             :     }
     658      474728 :     if (toupper) {
     659             :         Py_ssize_t t;
     660        2577 :         for (t = 0; t < spec->n_grouped_digits; t++) {
     661        1863 :             Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
     662        1863 :             c = Py_TOUPPER(c);
     663        1863 :             if (c > 127) {
     664           0 :                 PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
     665           0 :                 return -1;
     666             :             }
     667        1863 :             PyUnicode_WRITE(kind, data, writer->pos + t, c);
     668             :         }
     669             :     }
     670      474728 :     writer->pos += spec->n_grouped_digits;
     671             : 
     672      474728 :     if (spec->n_decimal) {
     673         280 :         _PyUnicode_FastCopyCharacters(
     674             :             writer->buffer, writer->pos,
     675             :             locale->decimal_point, 0, spec->n_decimal);
     676         280 :         writer->pos += spec->n_decimal;
     677         280 :         d_pos += 1;
     678             :     }
     679             : 
     680      474728 :     if (spec->n_remainder) {
     681         398 :         _PyUnicode_FastCopyCharacters(
     682             :             writer->buffer, writer->pos,
     683             :             digits, d_pos, spec->n_remainder);
     684         398 :         writer->pos += spec->n_remainder;
     685             :         /* d_pos += spec->n_remainder; */
     686             :     }
     687             : 
     688      474728 :     if (spec->n_rpadding) {
     689          66 :         _PyUnicode_FastFill(writer->buffer,
     690             :                             writer->pos, spec->n_rpadding,
     691             :                             fill_char);
     692          66 :         writer->pos += spec->n_rpadding;
     693             :     }
     694      474728 :     return 0;
     695             : }
     696             : 
     697             : static const char no_grouping[1] = {CHAR_MAX};
     698             : 
     699             : /* Find the decimal point character(s?), thousands_separator(s?), and
     700             :    grouping description, either for the current locale if type is
     701             :    LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
     702             :    LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
     703             : static int
     704      474640 : get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
     705             : {
     706      474640 :     switch (type) {
     707         122 :     case LT_CURRENT_LOCALE: {
     708         122 :         struct lconv *lc = localeconv();
     709         122 :         if (_Py_GetLocaleconvNumeric(lc,
     710             :                                      &locale_info->decimal_point,
     711             :                                      &locale_info->thousands_sep) < 0) {
     712           0 :             return -1;
     713             :         }
     714             : 
     715             :         /* localeconv() grouping can become a dangling pointer or point
     716             :            to a different string if another thread calls localeconv() during
     717             :            the string formatting. Copy the string to avoid this risk. */
     718         122 :         locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
     719         122 :         if (locale_info->grouping_buffer == NULL) {
     720           0 :             PyErr_NoMemory();
     721           0 :             return -1;
     722             :         }
     723         122 :         locale_info->grouping = locale_info->grouping_buffer;
     724         122 :         break;
     725             :     }
     726         127 :     case LT_DEFAULT_LOCALE:
     727             :     case LT_UNDERSCORE_LOCALE:
     728             :     case LT_UNDER_FOUR_LOCALE:
     729         127 :         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
     730         127 :         locale_info->thousands_sep = PyUnicode_FromOrdinal(
     731             :             type == LT_DEFAULT_LOCALE ? ',' : '_');
     732         127 :         if (!locale_info->decimal_point || !locale_info->thousands_sep)
     733           0 :             return -1;
     734         127 :         if (type != LT_UNDER_FOUR_LOCALE)
     735         107 :             locale_info->grouping = "\3"; /* Group every 3 characters.  The
     736             :                                          (implicit) trailing 0 means repeat
     737             :                                          infinitely. */
     738             :         else
     739          20 :             locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
     740         127 :         break;
     741      474391 :     case LT_NO_LOCALE:
     742      474391 :         locale_info->decimal_point = PyUnicode_FromOrdinal('.');
     743      474391 :         locale_info->thousands_sep = PyUnicode_New(0, 0);
     744      474391 :         if (!locale_info->decimal_point || !locale_info->thousands_sep)
     745           0 :             return -1;
     746      474391 :         locale_info->grouping = no_grouping;
     747      474391 :         break;
     748             :     }
     749      474640 :     return 0;
     750             : }
     751             : 
     752             : static void
     753      474658 : free_locale_info(LocaleInfo *locale_info)
     754             : {
     755      474658 :     Py_XDECREF(locale_info->decimal_point);
     756      474658 :     Py_XDECREF(locale_info->thousands_sep);
     757      474658 :     PyMem_Free(locale_info->grouping_buffer);
     758      474658 : }
     759             : 
     760             : /************************************************************************/
     761             : /*********** string formatting ******************************************/
     762             : /************************************************************************/
     763             : 
     764             : static int
     765        7969 : format_string_internal(PyObject *value, const InternalFormatSpec *format,
     766             :                        _PyUnicodeWriter *writer)
     767             : {
     768             :     Py_ssize_t lpad;
     769             :     Py_ssize_t rpad;
     770             :     Py_ssize_t total;
     771             :     Py_ssize_t len;
     772        7969 :     int result = -1;
     773             :     Py_UCS4 maxchar;
     774             : 
     775        7969 :     assert(PyUnicode_IS_READY(value));
     776        7969 :     len = PyUnicode_GET_LENGTH(value);
     777             : 
     778             :     /* sign is not allowed on strings */
     779        7969 :     if (format->sign != '\0') {
     780           3 :         if (format->sign == ' ') {
     781           1 :             PyErr_SetString(PyExc_ValueError,
     782             :                 "Space not allowed in string format specifier");
     783             :         }
     784             :         else {
     785           2 :             PyErr_SetString(PyExc_ValueError,
     786             :                 "Sign not allowed in string format specifier");
     787             :         }
     788           3 :         goto done;
     789             :     }
     790             : 
     791             :     /* negative 0 coercion is not allowed on strings */
     792        7966 :     if (format->no_neg_0) {
     793           1 :         PyErr_SetString(PyExc_ValueError,
     794             :                         "Negative zero coercion (z) not allowed in string format "
     795             :                         "specifier");
     796           1 :         goto done;
     797             :     }
     798             : 
     799             :     /* alternate is not allowed on strings */
     800        7965 :     if (format->alternate) {
     801           2 :         PyErr_SetString(PyExc_ValueError,
     802             :                         "Alternate form (#) not allowed in string format "
     803             :                         "specifier");
     804           2 :         goto done;
     805             :     }
     806             : 
     807             :     /* '=' alignment not allowed on strings */
     808        7963 :     if (format->align == '=') {
     809           1 :         PyErr_SetString(PyExc_ValueError,
     810             :                         "'=' alignment not allowed "
     811             :                         "in string format specifier");
     812           1 :         goto done;
     813             :     }
     814             : 
     815        7962 :     if ((format->width == -1 || format->width <= len)
     816        2291 :         && (format->precision == -1 || format->precision >= len)) {
     817             :         /* Fast path */
     818         856 :         return _PyUnicodeWriter_WriteStr(writer, value);
     819             :     }
     820             : 
     821             :     /* if precision is specified, output no more that format.precision
     822             :        characters */
     823        7106 :     if (format->precision >= 0 && len >= format->precision) {
     824        2331 :         len = format->precision;
     825             :     }
     826             : 
     827        7106 :     calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
     828             : 
     829        7106 :     maxchar = writer->maxchar;
     830        7106 :     if (lpad != 0 || rpad != 0)
     831        6200 :         maxchar = Py_MAX(maxchar, format->fill_char);
     832        7106 :     if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
     833        7102 :         Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
     834        7102 :         maxchar = Py_MAX(maxchar, valmaxchar);
     835             :     }
     836             : 
     837             :     /* allocate the resulting string */
     838        7106 :     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
     839           0 :         goto done;
     840             : 
     841             :     /* Write into that space. First the padding. */
     842        7106 :     result = fill_padding(writer, len, format->fill_char, lpad, rpad);
     843        7106 :     if (result == -1)
     844           0 :         goto done;
     845             : 
     846             :     /* Then the source string. */
     847        7106 :     if (len) {
     848        5278 :         _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
     849             :                                       value, 0, len);
     850             :     }
     851        7106 :     writer->pos += (len + rpad);
     852        7106 :     result = 0;
     853             : 
     854        7113 : done:
     855        7113 :     return result;
     856             : }
     857             : 
     858             : 
     859             : /************************************************************************/
     860             : /*********** long formatting ********************************************/
     861             : /************************************************************************/
     862             : 
     863             : static int
     864      493265 : format_long_internal(PyObject *value, const InternalFormatSpec *format,
     865             :                      _PyUnicodeWriter *writer)
     866             : {
     867      493265 :     int result = -1;
     868      493265 :     Py_UCS4 maxchar = 127;
     869      493265 :     PyObject *tmp = NULL;
     870             :     Py_ssize_t inumeric_chars;
     871      493265 :     Py_UCS4 sign_char = '\0';
     872             :     Py_ssize_t n_digits;       /* count of digits need from the computed
     873             :                                   string */
     874      493265 :     Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
     875             :                                    produces non-digits */
     876      493265 :     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
     877             :     Py_ssize_t n_total;
     878      493265 :     Py_ssize_t prefix = 0;
     879             :     NumberFieldWidths spec;
     880             :     long x;
     881             : 
     882             :     /* Locale settings, either from the actual locale or
     883             :        from a hard-code pseudo-locale */
     884      493265 :     LocaleInfo locale = LocaleInfo_STATIC_INIT;
     885             : 
     886             :     /* no precision allowed on integers */
     887      493265 :     if (format->precision != -1) {
     888           2 :         PyErr_SetString(PyExc_ValueError,
     889             :                         "Precision not allowed in integer format specifier");
     890           2 :         goto done;
     891             :     }
     892             :     /* no negative zero coercion on integers */
     893      493263 :     if (format->no_neg_0) {
     894           9 :         PyErr_SetString(PyExc_ValueError,
     895             :                         "Negative zero coercion (z) not allowed in integer"
     896             :                         " format specifier");
     897           9 :         goto done;
     898             :     }
     899             : 
     900             :     /* special case for character formatting */
     901      493254 :     if (format->type == 'c') {
     902             :         /* error to specify a sign */
     903           4 :         if (format->sign != '\0') {
     904           2 :             PyErr_SetString(PyExc_ValueError,
     905             :                             "Sign not allowed with integer"
     906             :                             " format specifier 'c'");
     907           2 :             goto done;
     908             :         }
     909             :         /* error to request alternate format */
     910           2 :         if (format->alternate) {
     911           1 :             PyErr_SetString(PyExc_ValueError,
     912             :                             "Alternate form (#) not allowed with integer"
     913             :                             " format specifier 'c'");
     914           1 :             goto done;
     915             :         }
     916             : 
     917             :         /* taken from unicodeobject.c formatchar() */
     918             :         /* Integer input truncated to a character */
     919           1 :         x = PyLong_AsLong(value);
     920           1 :         if (x == -1 && PyErr_Occurred())
     921           0 :             goto done;
     922           1 :         if (x < 0 || x > 0x10ffff) {
     923           0 :             PyErr_SetString(PyExc_OverflowError,
     924             :                             "%c arg not in range(0x110000)");
     925           0 :             goto done;
     926             :         }
     927           1 :         tmp = PyUnicode_FromOrdinal(x);
     928           1 :         inumeric_chars = 0;
     929           1 :         n_digits = 1;
     930           1 :         maxchar = Py_MAX(maxchar, (Py_UCS4)x);
     931             : 
     932             :         /* As a sort-of hack, we tell calc_number_widths that we only
     933             :            have "remainder" characters. calc_number_widths thinks
     934             :            these are characters that don't get formatted, only copied
     935             :            into the output string. We do this for 'c' formatting,
     936             :            because the characters are likely to be non-digits. */
     937           1 :         n_remainder = 1;
     938             :     }
     939             :     else {
     940             :         int base;
     941      493250 :         int leading_chars_to_skip = 0;  /* Number of characters added by
     942             :                                            PyNumber_ToBase that we want to
     943             :                                            skip over. */
     944             : 
     945             :         /* Compute the base and how many characters will be added by
     946             :            PyNumber_ToBase */
     947      493250 :         switch (format->type) {
     948          42 :         case 'b':
     949          42 :             base = 2;
     950          42 :             leading_chars_to_skip = 2; /* 0b */
     951          42 :             break;
     952          40 :         case 'o':
     953          40 :             base = 8;
     954          40 :             leading_chars_to_skip = 2; /* 0o */
     955          40 :             break;
     956      466132 :         case 'x':
     957             :         case 'X':
     958      466132 :             base = 16;
     959      466132 :             leading_chars_to_skip = 2; /* 0x */
     960      466132 :             break;
     961       27036 :         default:  /* shouldn't be needed, but stops a compiler warning */
     962             :         case 'd':
     963             :         case 'n':
     964       27036 :             base = 10;
     965       27036 :             break;
     966             :         }
     967             : 
     968      493250 :         if (format->sign != '+' && format->sign != ' '
     969      474761 :             && format->width == -1
     970       19153 :             && format->type != 'X' && format->type != 'n'
     971       19072 :             && !format->thousands_separators
     972       18990 :             && PyLong_CheckExact(value))
     973             :         {
     974             :             /* Fast path */
     975       18984 :             return _PyLong_FormatWriter(writer, value, base, format->alternate);
     976             :         }
     977             : 
     978             :         /* The number of prefix chars is the same as the leading
     979             :            chars to skip */
     980      474266 :         if (format->alternate)
     981          49 :             n_prefix = leading_chars_to_skip;
     982             : 
     983             :         /* Do the hard part, converting to a string in a given base */
     984      474266 :         tmp = _PyLong_Format(value, base);
     985      474266 :         if (tmp == NULL || PyUnicode_READY(tmp) == -1)
     986           0 :             goto done;
     987             : 
     988      474266 :         inumeric_chars = 0;
     989      474266 :         n_digits = PyUnicode_GET_LENGTH(tmp);
     990             : 
     991      474266 :         prefix = inumeric_chars;
     992             : 
     993             :         /* Is a sign character present in the output?  If so, remember it
     994             :            and skip it */
     995      474266 :         if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
     996        5285 :             sign_char = '-';
     997        5285 :             ++prefix;
     998        5285 :             ++leading_chars_to_skip;
     999             :         }
    1000             : 
    1001             :         /* Skip over the leading chars (0x, 0b, etc.) */
    1002      474266 :         n_digits -= leading_chars_to_skip;
    1003      474266 :         inumeric_chars += leading_chars_to_skip;
    1004             :     }
    1005             : 
    1006             :     /* Determine the grouping, separator, and decimal point, if any. */
    1007      474267 :     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1008             :                         format->thousands_separators,
    1009             :                         &locale) == -1)
    1010           0 :         goto done;
    1011             : 
    1012             :     /* Calculate how much memory we'll need. */
    1013      474267 :     n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
    1014             :                                  inumeric_chars + n_digits, n_remainder, 0,
    1015             :                                  &locale, format, &maxchar);
    1016      474267 :     if (n_total == -1) {
    1017           0 :         goto done;
    1018             :     }
    1019             : 
    1020             :     /* Allocate the memory. */
    1021      474267 :     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
    1022           0 :         goto done;
    1023             : 
    1024             :     /* Populate the memory. */
    1025      474267 :     result = fill_number(writer, &spec,
    1026             :                          tmp, inumeric_chars,
    1027             :                          tmp, prefix, format->fill_char,
    1028      474267 :                          &locale, format->type == 'X');
    1029             : 
    1030      474281 : done:
    1031      474281 :     Py_XDECREF(tmp);
    1032      474281 :     free_locale_info(&locale);
    1033      474281 :     return result;
    1034             : }
    1035             : 
    1036             : /************************************************************************/
    1037             : /*********** float formatting *******************************************/
    1038             : /************************************************************************/
    1039             : 
    1040             : /* much of this is taken from unicodeobject.c */
    1041             : static int
    1042     3349920 : format_float_internal(PyObject *value,
    1043             :                       const InternalFormatSpec *format,
    1044             :                       _PyUnicodeWriter *writer)
    1045             : {
    1046     3349920 :     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1047             :     Py_ssize_t n_digits;
    1048             :     Py_ssize_t n_remainder;
    1049             :     Py_ssize_t n_total;
    1050             :     int has_decimal;
    1051             :     double val;
    1052     3349920 :     int precision, default_precision = 6;
    1053     3349920 :     Py_UCS4 type = format->type;
    1054     3349920 :     int add_pct = 0;
    1055             :     Py_ssize_t index;
    1056             :     NumberFieldWidths spec;
    1057     3349920 :     int flags = 0;
    1058     3349920 :     int result = -1;
    1059     3349920 :     Py_UCS4 maxchar = 127;
    1060     3349920 :     Py_UCS4 sign_char = '\0';
    1061             :     int float_type; /* Used to see if we have a nan, inf, or regular float. */
    1062     3349920 :     PyObject *unicode_tmp = NULL;
    1063             : 
    1064             :     /* Locale settings, either from the actual locale or
    1065             :        from a hard-code pseudo-locale */
    1066     3349920 :     LocaleInfo locale = LocaleInfo_STATIC_INIT;
    1067             : 
    1068     3349920 :     if (format->precision > INT_MAX) {
    1069           1 :         PyErr_SetString(PyExc_ValueError, "precision too big");
    1070           1 :         goto done;
    1071             :     }
    1072     3349920 :     precision = (int)format->precision;
    1073             : 
    1074     3349920 :     if (format->alternate)
    1075          92 :         flags |= Py_DTSF_ALT;
    1076     3349920 :     if (format->no_neg_0)
    1077          35 :         flags |= Py_DTSF_NO_NEG_0;
    1078             : 
    1079     3349920 :     if (type == '\0') {
    1080             :         /* Omitted type specifier.  Behaves in the same way as repr(x)
    1081             :            and str(x) if no precision is given, else like 'g', but with
    1082             :            at least one digit after the decimal point. */
    1083          55 :         flags |= Py_DTSF_ADD_DOT_0;
    1084          55 :         type = 'r';
    1085          55 :         default_precision = 0;
    1086             :     }
    1087             : 
    1088     3349920 :     if (type == 'n')
    1089             :         /* 'n' is the same as 'g', except for the locale used to
    1090             :            format the result. We take care of that later. */
    1091          43 :         type = 'g';
    1092             : 
    1093     3349920 :     val = PyFloat_AsDouble(value);
    1094     3349920 :     if (val == -1.0 && PyErr_Occurred())
    1095           0 :         goto done;
    1096             : 
    1097     3349920 :     if (type == '%') {
    1098          40 :         type = 'f';
    1099          40 :         val *= 100;
    1100          40 :         add_pct = 1;
    1101             :     }
    1102             : 
    1103     3349920 :     if (precision < 0)
    1104         662 :         precision = default_precision;
    1105     3349260 :     else if (type == 'r')
    1106           9 :         type = 'g';
    1107             : 
    1108             :     /* Cast "type", because if we're in unicode we need to pass an
    1109             :        8-bit char. This is safe, because we've restricted what "type"
    1110             :        can be. */
    1111     3349920 :     buf = PyOS_double_to_string(val, (char)type, precision, flags,
    1112             :                                 &float_type);
    1113     3349920 :     if (buf == NULL)
    1114           0 :         goto done;
    1115     3349920 :     n_digits = strlen(buf);
    1116             : 
    1117     3349920 :     if (add_pct) {
    1118             :         /* We know that buf has a trailing zero (since we just called
    1119             :            strlen() on it), and we don't use that fact any more. So we
    1120             :            can just write over the trailing zero. */
    1121          40 :         buf[n_digits] = '%';
    1122          40 :         n_digits += 1;
    1123             :     }
    1124             : 
    1125     3349920 :     if (format->sign != '+' && format->sign != ' '
    1126     3349790 :         && format->width == -1
    1127     3349680 :         && format->type != 'n'
    1128     3349640 :         && !format->thousands_separators)
    1129             :     {
    1130             :         /* Fast path */
    1131     3349640 :         result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
    1132     3349640 :         PyMem_Free(buf);
    1133     3349640 :         return result;
    1134             :     }
    1135             : 
    1136             :     /* Since there is no unicode version of PyOS_double_to_string,
    1137             :        just use the 8 bit version and then convert to unicode. */
    1138         279 :     unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
    1139         279 :     PyMem_Free(buf);
    1140         279 :     if (unicode_tmp == NULL)
    1141           0 :         goto done;
    1142             : 
    1143             :     /* Is a sign character present in the output?  If so, remember it
    1144             :        and skip it */
    1145         279 :     index = 0;
    1146         279 :     if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
    1147          76 :         sign_char = '-';
    1148          76 :         ++index;
    1149          76 :         --n_digits;
    1150             :     }
    1151             : 
    1152             :     /* Determine if we have any "remainder" (after the digits, might include
    1153             :        decimal or exponent or both (or neither)) */
    1154         279 :     parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
    1155             : 
    1156             :     /* Determine the grouping, separator, and decimal point, if any. */
    1157         279 :     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1158             :                         format->thousands_separators,
    1159             :                         &locale) == -1)
    1160           0 :         goto done;
    1161             : 
    1162             :     /* Calculate how much memory we'll need. */
    1163         279 :     n_total = calc_number_widths(&spec, 0, sign_char, index,
    1164             :                                  index + n_digits, n_remainder, has_decimal,
    1165             :                                  &locale, format, &maxchar);
    1166         279 :     if (n_total == -1) {
    1167           0 :         goto done;
    1168             :     }
    1169             : 
    1170             :     /* Allocate the memory. */
    1171         279 :     if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
    1172           0 :         goto done;
    1173             : 
    1174             :     /* Populate the memory. */
    1175         279 :     result = fill_number(writer, &spec,
    1176             :                          unicode_tmp, index,
    1177             :                          NULL, 0, format->fill_char,
    1178             :                          &locale, 0);
    1179             : 
    1180         280 : done:
    1181         280 :     Py_XDECREF(unicode_tmp);
    1182         280 :     free_locale_info(&locale);
    1183         280 :     return result;
    1184             : }
    1185             : 
    1186             : /************************************************************************/
    1187             : /*********** complex formatting *****************************************/
    1188             : /************************************************************************/
    1189             : 
    1190             : static int
    1191          97 : format_complex_internal(PyObject *value,
    1192             :                         const InternalFormatSpec *format,
    1193             :                         _PyUnicodeWriter *writer)
    1194             : {
    1195             :     double re;
    1196             :     double im;
    1197          97 :     char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1198          97 :     char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
    1199             : 
    1200          97 :     InternalFormatSpec tmp_format = *format;
    1201             :     Py_ssize_t n_re_digits;
    1202             :     Py_ssize_t n_im_digits;
    1203             :     Py_ssize_t n_re_remainder;
    1204             :     Py_ssize_t n_im_remainder;
    1205             :     Py_ssize_t n_re_total;
    1206             :     Py_ssize_t n_im_total;
    1207             :     int re_has_decimal;
    1208             :     int im_has_decimal;
    1209          97 :     int precision, default_precision = 6;
    1210          97 :     Py_UCS4 type = format->type;
    1211             :     Py_ssize_t i_re;
    1212             :     Py_ssize_t i_im;
    1213             :     NumberFieldWidths re_spec;
    1214             :     NumberFieldWidths im_spec;
    1215          97 :     int flags = 0;
    1216          97 :     int result = -1;
    1217          97 :     Py_UCS4 maxchar = 127;
    1218             :     int rkind;
    1219             :     void *rdata;
    1220          97 :     Py_UCS4 re_sign_char = '\0';
    1221          97 :     Py_UCS4 im_sign_char = '\0';
    1222             :     int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
    1223             :     int im_float_type;
    1224          97 :     int add_parens = 0;
    1225          97 :     int skip_re = 0;
    1226             :     Py_ssize_t lpad;
    1227             :     Py_ssize_t rpad;
    1228             :     Py_ssize_t total;
    1229          97 :     PyObject *re_unicode_tmp = NULL;
    1230          97 :     PyObject *im_unicode_tmp = NULL;
    1231             : 
    1232             :     /* Locale settings, either from the actual locale or
    1233             :        from a hard-code pseudo-locale */
    1234          97 :     LocaleInfo locale = LocaleInfo_STATIC_INIT;
    1235             : 
    1236          97 :     if (format->precision > INT_MAX) {
    1237           1 :         PyErr_SetString(PyExc_ValueError, "precision too big");
    1238           1 :         goto done;
    1239             :     }
    1240          96 :     precision = (int)format->precision;
    1241             : 
    1242             :     /* Zero padding is not allowed. */
    1243          96 :     if (format->fill_char == '0') {
    1244           1 :         PyErr_SetString(PyExc_ValueError,
    1245             :                         "Zero padding is not allowed in complex format "
    1246             :                         "specifier");
    1247           1 :         goto done;
    1248             :     }
    1249             : 
    1250             :     /* Neither is '=' alignment . */
    1251          95 :     if (format->align == '=') {
    1252           1 :         PyErr_SetString(PyExc_ValueError,
    1253             :                         "'=' alignment flag is not allowed in complex format "
    1254             :                         "specifier");
    1255           1 :         goto done;
    1256             :     }
    1257             : 
    1258          94 :     re = PyComplex_RealAsDouble(value);
    1259          94 :     if (re == -1.0 && PyErr_Occurred())
    1260           0 :         goto done;
    1261          94 :     im = PyComplex_ImagAsDouble(value);
    1262          94 :     if (im == -1.0 && PyErr_Occurred())
    1263           0 :         goto done;
    1264             : 
    1265          94 :     if (format->alternate)
    1266          11 :         flags |= Py_DTSF_ALT;
    1267          94 :     if (format->no_neg_0)
    1268           4 :         flags |= Py_DTSF_NO_NEG_0;
    1269             : 
    1270          94 :     if (type == '\0') {
    1271             :         /* Omitted type specifier. Should be like str(self). */
    1272          25 :         type = 'r';
    1273          25 :         default_precision = 0;
    1274          25 :         if (re == 0.0 && copysign(1.0, re) == 1.0)
    1275           6 :             skip_re = 1;
    1276             :         else
    1277          19 :             add_parens = 1;
    1278             :     }
    1279             : 
    1280          94 :     if (type == 'n')
    1281             :         /* 'n' is the same as 'g', except for the locale used to
    1282             :            format the result. We take care of that later. */
    1283           0 :         type = 'g';
    1284             : 
    1285          94 :     if (precision < 0)
    1286          63 :         precision = default_precision;
    1287          31 :     else if (type == 'r')
    1288           1 :         type = 'g';
    1289             : 
    1290             :     /* Cast "type", because if we're in unicode we need to pass an
    1291             :        8-bit char. This is safe, because we've restricted what "type"
    1292             :        can be. */
    1293          94 :     re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
    1294             :                                    &re_float_type);
    1295          94 :     if (re_buf == NULL)
    1296           0 :         goto done;
    1297          94 :     im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
    1298             :                                    &im_float_type);
    1299          94 :     if (im_buf == NULL)
    1300           0 :         goto done;
    1301             : 
    1302          94 :     n_re_digits = strlen(re_buf);
    1303          94 :     n_im_digits = strlen(im_buf);
    1304             : 
    1305             :     /* Since there is no unicode version of PyOS_double_to_string,
    1306             :        just use the 8 bit version and then convert to unicode. */
    1307          94 :     re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
    1308          94 :     if (re_unicode_tmp == NULL)
    1309           0 :         goto done;
    1310          94 :     i_re = 0;
    1311             : 
    1312          94 :     im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
    1313          94 :     if (im_unicode_tmp == NULL)
    1314           0 :         goto done;
    1315          94 :     i_im = 0;
    1316             : 
    1317             :     /* Is a sign character present in the output?  If so, remember it
    1318             :        and skip it */
    1319          94 :     if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
    1320          19 :         re_sign_char = '-';
    1321          19 :         ++i_re;
    1322          19 :         --n_re_digits;
    1323             :     }
    1324          94 :     if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
    1325          19 :         im_sign_char = '-';
    1326          19 :         ++i_im;
    1327          19 :         --n_im_digits;
    1328             :     }
    1329             : 
    1330             :     /* Determine if we have any "remainder" (after the digits, might include
    1331             :        decimal or exponent or both (or neither)) */
    1332          94 :     parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
    1333             :                  &n_re_remainder, &re_has_decimal);
    1334          94 :     parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
    1335             :                  &n_im_remainder, &im_has_decimal);
    1336             : 
    1337             :     /* Determine the grouping, separator, and decimal point, if any. */
    1338          94 :     if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
    1339             :                         format->thousands_separators,
    1340             :                         &locale) == -1)
    1341           0 :         goto done;
    1342             : 
    1343             :     /* Turn off any padding. We'll do it later after we've composed
    1344             :        the numbers without padding. */
    1345          94 :     tmp_format.fill_char = '\0';
    1346          94 :     tmp_format.align = '<';
    1347          94 :     tmp_format.width = -1;
    1348             : 
    1349             :     /* Calculate how much memory we'll need. */
    1350          94 :     n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
    1351             :                                     i_re, i_re + n_re_digits, n_re_remainder,
    1352             :                                     re_has_decimal, &locale, &tmp_format,
    1353             :                                     &maxchar);
    1354          94 :     if (n_re_total == -1) {
    1355           0 :         goto done;
    1356             :     }
    1357             : 
    1358             :     /* Same formatting, but always include a sign, unless the real part is
    1359             :      * going to be omitted, in which case we use whatever sign convention was
    1360             :      * requested by the original format. */
    1361          94 :     if (!skip_re)
    1362          88 :         tmp_format.sign = '+';
    1363          94 :     n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
    1364             :                                     i_im, i_im + n_im_digits, n_im_remainder,
    1365             :                                     im_has_decimal, &locale, &tmp_format,
    1366             :                                     &maxchar);
    1367          94 :     if (n_im_total == -1) {
    1368           0 :         goto done;
    1369             :     }
    1370             : 
    1371          94 :     if (skip_re)
    1372           6 :         n_re_total = 0;
    1373             : 
    1374             :     /* Add 1 for the 'j', and optionally 2 for parens. */
    1375          94 :     calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
    1376             :                  format->width, format->align, &lpad, &rpad, &total);
    1377             : 
    1378          94 :     if (lpad || rpad)
    1379          24 :         maxchar = Py_MAX(maxchar, format->fill_char);
    1380             : 
    1381          94 :     if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
    1382           0 :         goto done;
    1383          94 :     rkind = writer->kind;
    1384          94 :     rdata = writer->data;
    1385             : 
    1386             :     /* Populate the memory. First, the padding. */
    1387          94 :     result = fill_padding(writer,
    1388          94 :                           n_re_total + n_im_total + 1 + add_parens * 2,
    1389             :                           format->fill_char, lpad, rpad);
    1390          94 :     if (result == -1)
    1391           0 :         goto done;
    1392             : 
    1393          94 :     if (add_parens) {
    1394          19 :         PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
    1395          19 :         writer->pos++;
    1396             :     }
    1397             : 
    1398          94 :     if (!skip_re) {
    1399          88 :         result = fill_number(writer, &re_spec,
    1400             :                              re_unicode_tmp, i_re,
    1401             :                              NULL, 0,
    1402             :                              0,
    1403             :                              &locale, 0);
    1404          88 :         if (result == -1)
    1405           0 :             goto done;
    1406             :     }
    1407          94 :     result = fill_number(writer, &im_spec,
    1408             :                          im_unicode_tmp, i_im,
    1409             :                          NULL, 0,
    1410             :                          0,
    1411             :                          &locale, 0);
    1412          94 :     if (result == -1)
    1413           0 :         goto done;
    1414          94 :     PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
    1415          94 :     writer->pos++;
    1416             : 
    1417          94 :     if (add_parens) {
    1418          19 :         PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
    1419          19 :         writer->pos++;
    1420             :     }
    1421             : 
    1422          94 :     writer->pos += rpad;
    1423             : 
    1424          97 : done:
    1425          97 :     PyMem_Free(re_buf);
    1426          97 :     PyMem_Free(im_buf);
    1427          97 :     Py_XDECREF(re_unicode_tmp);
    1428          97 :     Py_XDECREF(im_unicode_tmp);
    1429          97 :     free_locale_info(&locale);
    1430          97 :     return result;
    1431             : }
    1432             : 
    1433             : /************************************************************************/
    1434             : /*********** built in formatters ****************************************/
    1435             : /************************************************************************/
    1436             : static int
    1437       16546 : format_obj(PyObject *obj, _PyUnicodeWriter *writer)
    1438             : {
    1439             :     PyObject *str;
    1440             :     int err;
    1441             : 
    1442       16546 :     str = PyObject_Str(obj);
    1443       16546 :     if (str == NULL)
    1444           0 :         return -1;
    1445       16546 :     err = _PyUnicodeWriter_WriteStr(writer, str);
    1446       16546 :     Py_DECREF(str);
    1447       16546 :     return err;
    1448             : }
    1449             : 
    1450             : int
    1451      559571 : _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1452             :                                 PyObject *obj,
    1453             :                                 PyObject *format_spec,
    1454             :                                 Py_ssize_t start, Py_ssize_t end)
    1455             : {
    1456             :     InternalFormatSpec format;
    1457             : 
    1458      559571 :     assert(PyUnicode_Check(obj));
    1459             : 
    1460             :     /* check for the special case of zero length format spec, make
    1461             :        it equivalent to str(obj) */
    1462      559571 :     if (start == end) {
    1463      551598 :         if (PyUnicode_CheckExact(obj))
    1464      551587 :             return _PyUnicodeWriter_WriteStr(writer, obj);
    1465             :         else
    1466          11 :             return format_obj(obj, writer);
    1467             :     }
    1468             : 
    1469             :     /* parse the format_spec */
    1470        7973 :     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1471             :                                            &format, 's', '<'))
    1472           4 :         return -1;
    1473             : 
    1474             :     /* type conversion? */
    1475        7969 :     switch (format.type) {
    1476        7969 :     case 's':
    1477             :         /* no type conversion needed, already a string.  do the formatting */
    1478        7969 :         return format_string_internal(obj, &format, writer);
    1479           0 :     default:
    1480             :         /* unknown */
    1481           0 :         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1482           0 :         return -1;
    1483             :     }
    1484             : }
    1485             : 
    1486             : int
    1487     4122990 : _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1488             :                              PyObject *obj,
    1489             :                              PyObject *format_spec,
    1490             :                              Py_ssize_t start, Py_ssize_t end)
    1491             : {
    1492     4122990 :     PyObject *tmp = NULL;
    1493             :     InternalFormatSpec format;
    1494     4122990 :     int result = -1;
    1495             : 
    1496             :     /* check for the special case of zero length format spec, make
    1497             :        it equivalent to str(obj) */
    1498     4122990 :     if (start == end) {
    1499      288813 :         if (PyLong_CheckExact(obj))
    1500      288502 :             return _PyLong_FormatWriter(writer, obj, 10, 0);
    1501             :         else
    1502         311 :             return format_obj(obj, writer);
    1503             :     }
    1504             : 
    1505             :     /* parse the format_spec */
    1506     3834180 :     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1507             :                                            &format, 'd', '>'))
    1508          35 :         goto done;
    1509             : 
    1510             :     /* type conversion? */
    1511     3834140 :     switch (format.type) {
    1512      493265 :     case 'b':
    1513             :     case 'c':
    1514             :     case 'd':
    1515             :     case 'o':
    1516             :     case 'x':
    1517             :     case 'X':
    1518             :     case 'n':
    1519             :         /* no type conversion needed, already an int.  do the formatting */
    1520      493265 :         result = format_long_internal(obj, &format, writer);
    1521      493265 :         break;
    1522             : 
    1523     3340570 :     case 'e':
    1524             :     case 'E':
    1525             :     case 'f':
    1526             :     case 'F':
    1527             :     case 'g':
    1528             :     case 'G':
    1529             :     case '%':
    1530             :         /* convert to float */
    1531     3340570 :         tmp = PyNumber_Float(obj);
    1532     3340570 :         if (tmp == NULL)
    1533           0 :             goto done;
    1534     3340570 :         result = format_float_internal(tmp, &format, writer);
    1535     3340570 :         break;
    1536             : 
    1537         306 :     default:
    1538             :         /* unknown */
    1539         306 :         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1540         306 :         goto done;
    1541             :     }
    1542             : 
    1543     3834180 : done:
    1544     3834180 :     Py_XDECREF(tmp);
    1545     3834180 :     return result;
    1546             : }
    1547             : 
    1548             : int
    1549       25667 : _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1550             :                               PyObject *obj,
    1551             :                               PyObject *format_spec,
    1552             :                               Py_ssize_t start, Py_ssize_t end)
    1553             : {
    1554             :     InternalFormatSpec format;
    1555             : 
    1556             :     /* check for the special case of zero length format spec, make
    1557             :        it equivalent to str(obj) */
    1558       25667 :     if (start == end)
    1559       16213 :         return format_obj(obj, writer);
    1560             : 
    1561             :     /* parse the format_spec */
    1562        9454 :     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1563             :                                            &format, '\0', '>'))
    1564           6 :         return -1;
    1565             : 
    1566             :     /* type conversion? */
    1567        9448 :     switch (format.type) {
    1568        9349 :     case '\0': /* No format code: like 'g', but with at least one decimal. */
    1569             :     case 'e':
    1570             :     case 'E':
    1571             :     case 'f':
    1572             :     case 'F':
    1573             :     case 'g':
    1574             :     case 'G':
    1575             :     case 'n':
    1576             :     case '%':
    1577             :         /* no conversion, already a float.  do the formatting */
    1578        9349 :         return format_float_internal(obj, &format, writer);
    1579             : 
    1580          99 :     default:
    1581             :         /* unknown */
    1582          99 :         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1583          99 :         return -1;
    1584             :     }
    1585             : }
    1586             : 
    1587             : int
    1588         118 : _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
    1589             :                                 PyObject *obj,
    1590             :                                 PyObject *format_spec,
    1591             :                                 Py_ssize_t start, Py_ssize_t end)
    1592             : {
    1593             :     InternalFormatSpec format;
    1594             : 
    1595             :     /* check for the special case of zero length format spec, make
    1596             :        it equivalent to str(obj) */
    1597         118 :     if (start == end)
    1598          11 :         return format_obj(obj, writer);
    1599             : 
    1600             :     /* parse the format_spec */
    1601         107 :     if (!parse_internal_render_format_spec(obj, format_spec, start, end,
    1602             :                                            &format, '\0', '>'))
    1603           4 :         return -1;
    1604             : 
    1605             :     /* type conversion? */
    1606         103 :     switch (format.type) {
    1607          97 :     case '\0': /* No format code: like 'g', but with at least one decimal. */
    1608             :     case 'e':
    1609             :     case 'E':
    1610             :     case 'f':
    1611             :     case 'F':
    1612             :     case 'g':
    1613             :     case 'G':
    1614             :     case 'n':
    1615             :         /* no conversion, already a complex.  do the formatting */
    1616          97 :         return format_complex_internal(obj, &format, writer);
    1617             : 
    1618           6 :     default:
    1619             :         /* unknown */
    1620           6 :         unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
    1621           6 :         return -1;
    1622             :     }
    1623             : }

Generated by: LCOV version 1.14