Line data Source code
1 : /* implements the unicode (as opposed to string) version of the
2 : built-in formatters for string, int, float. that is, the versions
3 : of int.__float__, etc., that take and return unicode objects */
4 :
5 : #include "Python.h"
6 : #include "pycore_fileutils.h" // _Py_GetLocaleconvNumeric()
7 : #include "pycore_long.h" // _PyLong_FormatWriter()
8 : #include <locale.h>
9 :
10 : /* Raises an exception about an unknown presentation type for this
11 : * type. */
12 :
13 : static void
14 411 : unknown_presentation_type(Py_UCS4 presentation_type,
15 : const char* type_name)
16 : {
17 : /* %c might be out-of-range, hence the two cases. */
18 411 : if (presentation_type > 32 && presentation_type < 128)
19 411 : PyErr_Format(PyExc_ValueError,
20 : "Unknown format code '%c' "
21 : "for object of type '%.200s'",
22 411 : (char)presentation_type,
23 : type_name);
24 : else
25 0 : PyErr_Format(PyExc_ValueError,
26 : "Unknown format code '\\x%x' "
27 : "for object of type '%.200s'",
28 : (unsigned int)presentation_type,
29 : type_name);
30 411 : }
31 :
32 : static void
33 19 : invalid_thousands_separator_type(char specifier, Py_UCS4 presentation_type)
34 : {
35 19 : assert(specifier == ',' || specifier == '_');
36 19 : if (presentation_type > 32 && presentation_type < 128)
37 19 : PyErr_Format(PyExc_ValueError,
38 : "Cannot specify '%c' with '%c'.",
39 19 : specifier, (char)presentation_type);
40 : else
41 0 : PyErr_Format(PyExc_ValueError,
42 : "Cannot specify '%c' with '\\x%x'.",
43 : specifier, (unsigned int)presentation_type);
44 19 : }
45 :
46 : static void
47 8 : invalid_comma_and_underscore(void)
48 : {
49 8 : PyErr_Format(PyExc_ValueError, "Cannot specify both ',' and '_'.");
50 8 : }
51 :
52 : /*
53 : get_integer consumes 0 or more decimal digit characters from an
54 : input string, updates *result with the corresponding positive
55 : integer, and returns the number of digits consumed.
56 :
57 : returns -1 on error.
58 : */
59 : static int
60 7204230 : get_integer(PyObject *str, Py_ssize_t *ppos, Py_ssize_t end,
61 : Py_ssize_t *result)
62 : {
63 7204230 : Py_ssize_t accumulator, digitval, pos = *ppos;
64 : int numdigits;
65 7204230 : int kind = PyUnicode_KIND(str);
66 7204230 : const void *data = PyUnicode_DATA(str);
67 :
68 7204230 : accumulator = numdigits = 0;
69 14386400 : for (; pos < end; pos++, numdigits++) {
70 14377700 : digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ(kind, data, pos));
71 14377700 : if (digitval < 0)
72 7195580 : break;
73 : /*
74 : Detect possible overflow before it happens:
75 :
76 : accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
77 : accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
78 : */
79 7182170 : if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
80 7 : PyErr_Format(PyExc_ValueError,
81 : "Too many decimal digits in format string");
82 7 : *ppos = pos;
83 7 : return -1;
84 : }
85 7182160 : accumulator = accumulator * 10 + digitval;
86 : }
87 7204220 : *ppos = pos;
88 7204220 : *result = accumulator;
89 7204220 : return numdigits;
90 : }
91 :
92 : /************************************************************************/
93 : /*********** standard format specifier parsing **************************/
94 : /************************************************************************/
95 :
96 : /* returns true if this character is a specifier alignment token */
97 : Py_LOCAL_INLINE(int)
98 7682440 : is_alignment_token(Py_UCS4 c)
99 : {
100 7682440 : switch (c) {
101 1595 : case '<': case '>': case '=': case '^':
102 1595 : return 1;
103 7680850 : default:
104 7680850 : return 0;
105 : }
106 : }
107 :
108 : /* returns true if this character is a sign element */
109 : Py_LOCAL_INLINE(int)
110 3851700 : is_sign_element(Py_UCS4 c)
111 : {
112 3851700 : switch (c) {
113 18670 : case ' ': case '+': case '-':
114 18670 : return 1;
115 3833040 : default:
116 3833040 : return 0;
117 : }
118 : }
119 :
120 : /* Locale type codes. LT_NO_LOCALE must be zero. */
121 : enum LocaleType {
122 : LT_NO_LOCALE = 0,
123 : LT_DEFAULT_LOCALE = ',',
124 : LT_UNDERSCORE_LOCALE = '_',
125 : LT_UNDER_FOUR_LOCALE,
126 : LT_CURRENT_LOCALE
127 : };
128 :
129 : typedef struct {
130 : Py_UCS4 fill_char;
131 : Py_UCS4 align;
132 : int alternate;
133 : int no_neg_0;
134 : Py_UCS4 sign;
135 : Py_ssize_t width;
136 : enum LocaleType thousands_separators;
137 : Py_ssize_t precision;
138 : Py_UCS4 type;
139 : } InternalFormatSpec;
140 :
141 :
142 : /*
143 : ptr points to the start of the format_spec, end points just past its end.
144 : fills in format with the parsed information.
145 : returns 1 on success, 0 on failure.
146 : if failure, sets the exception
147 : */
148 : static int
149 3851710 : parse_internal_render_format_spec(PyObject *obj,
150 : PyObject *format_spec,
151 : Py_ssize_t start, Py_ssize_t end,
152 : InternalFormatSpec *format,
153 : char default_type,
154 : char default_align)
155 : {
156 3851710 : Py_ssize_t pos = start;
157 3851710 : int kind = PyUnicode_KIND(format_spec);
158 3851710 : const void *data = PyUnicode_DATA(format_spec);
159 : /* end-pos is used throughout this code to specify the length of
160 : the input string */
161 : #define READ_spec(index) PyUnicode_READ(kind, data, index)
162 :
163 : Py_ssize_t consumed;
164 3851710 : int align_specified = 0;
165 3851710 : int fill_char_specified = 0;
166 :
167 3851710 : format->fill_char = ' ';
168 3851710 : format->align = default_align;
169 3851710 : format->alternate = 0;
170 3851710 : format->no_neg_0 = 0;
171 3851710 : format->sign = '\0';
172 3851710 : format->width = -1;
173 3851710 : format->thousands_separators = LT_NO_LOCALE;
174 3851710 : format->precision = -1;
175 3851710 : format->type = default_type;
176 :
177 : /* If the second char is an alignment token,
178 : then parse the fill char */
179 3851710 : if (end-pos >= 2 && is_alignment_token(READ_spec(pos+1))) {
180 74 : format->align = READ_spec(pos+1);
181 74 : format->fill_char = READ_spec(pos);
182 74 : fill_char_specified = 1;
183 74 : align_specified = 1;
184 74 : pos += 2;
185 : }
186 3851640 : else if (end-pos >= 1 && is_alignment_token(READ_spec(pos))) {
187 1521 : format->align = READ_spec(pos);
188 1521 : align_specified = 1;
189 1521 : ++pos;
190 : }
191 :
192 : /* Parse the various sign options */
193 3851710 : if (end-pos >= 1 && is_sign_element(READ_spec(pos))) {
194 18670 : format->sign = READ_spec(pos);
195 18670 : ++pos;
196 : }
197 :
198 : /* If the next character is z, request coercion of negative 0.
199 : Applies only to floats. */
200 3851710 : if (end-pos >= 1 && READ_spec(pos) == 'z') {
201 50 : format->no_neg_0 = 1;
202 50 : ++pos;
203 : }
204 :
205 : /* If the next character is #, we're in alternate mode. This only
206 : applies to integers. */
207 3851710 : if (end-pos >= 1 && READ_spec(pos) == '#') {
208 194 : format->alternate = 1;
209 194 : ++pos;
210 : }
211 :
212 : /* The special case for 0-padding (backwards compat) */
213 3851710 : if (!fill_char_specified && end-pos >= 1 && READ_spec(pos) == '0') {
214 455219 : format->fill_char = '0';
215 455219 : if (!align_specified && default_align == '>') {
216 455214 : format->align = '=';
217 : }
218 455219 : ++pos;
219 : }
220 :
221 3851710 : consumed = get_integer(format_spec, &pos, end, &format->width);
222 3851710 : if (consumed == -1)
223 : /* Overflow error. Exception already set. */
224 3 : return 0;
225 :
226 : /* If consumed is 0, we didn't consume any characters for the
227 : width. In that case, reset the width to -1, because
228 : get_integer() will have set it to zero. -1 is how we record
229 : that the width wasn't specified. */
230 3851710 : if (consumed == 0)
231 3388970 : format->width = -1;
232 :
233 : /* Comma signifies add thousands separators */
234 3851710 : if (end-pos && READ_spec(pos) == ',') {
235 56 : format->thousands_separators = LT_DEFAULT_LOCALE;
236 56 : ++pos;
237 : }
238 : /* Underscore signifies add thousands separators */
239 3851710 : if (end-pos && READ_spec(pos) == '_') {
240 102 : if (format->thousands_separators != LT_NO_LOCALE) {
241 4 : invalid_comma_and_underscore();
242 4 : return 0;
243 : }
244 98 : format->thousands_separators = LT_UNDERSCORE_LOCALE;
245 98 : ++pos;
246 : }
247 3851700 : if (end-pos && READ_spec(pos) == ',') {
248 6 : if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
249 4 : invalid_comma_and_underscore();
250 4 : return 0;
251 : }
252 : }
253 :
254 : /* Parse field precision */
255 3851700 : if (end-pos && READ_spec(pos) == '.') {
256 3352520 : ++pos;
257 :
258 3352520 : consumed = get_integer(format_spec, &pos, end, &format->precision);
259 3352520 : if (consumed == -1)
260 : /* Overflow error. Exception already set. */
261 4 : return 0;
262 :
263 : /* Not having a precision after a dot is an error. */
264 3352510 : if (consumed == 0) {
265 0 : PyErr_Format(PyExc_ValueError,
266 : "Format specifier missing precision");
267 0 : return 0;
268 : }
269 :
270 : }
271 :
272 : /* Finally, parse the type field. */
273 :
274 3851700 : if (end-pos > 1) {
275 : /* More than one char remains, so this is an invalid format
276 : specifier. */
277 : /* Create a temporary object that contains the format spec we're
278 : operating on. It's format_spec[start:end] (in Python syntax). */
279 15 : PyObject* actual_format_spec = PyUnicode_FromKindAndData(kind,
280 15 : (char*)data + kind*start,
281 : end-start);
282 15 : if (actual_format_spec != NULL) {
283 15 : PyErr_Format(PyExc_ValueError,
284 : "Invalid format specifier '%U' for object of type '%.200s'",
285 15 : actual_format_spec, Py_TYPE(obj)->tp_name);
286 15 : Py_DECREF(actual_format_spec);
287 : }
288 15 : return 0;
289 : }
290 :
291 3851680 : if (end-pos == 1) {
292 3843030 : format->type = READ_spec(pos);
293 3843030 : ++pos;
294 : }
295 :
296 : /* Do as much validating as we can, just by looking at the format
297 : specifier. Do not take into account what type of formatting
298 : we're doing (int, float, string). */
299 :
300 3851680 : if (format->thousands_separators) {
301 146 : switch (format->type) {
302 107 : case 'd':
303 : case 'e':
304 : case 'f':
305 : case 'g':
306 : case 'E':
307 : case 'G':
308 : case '%':
309 : case 'F':
310 : case '\0':
311 : /* These are allowed. See PEP 378.*/
312 107 : break;
313 27 : case 'b':
314 : case 'o':
315 : case 'x':
316 : case 'X':
317 : /* Underscores are allowed in bin/oct/hex. See PEP 515. */
318 27 : if (format->thousands_separators == LT_UNDERSCORE_LOCALE) {
319 : /* Every four digits, not every three, in bin/oct/hex. */
320 20 : format->thousands_separators = LT_UNDER_FOUR_LOCALE;
321 20 : break;
322 : }
323 : /* fall through */
324 : default:
325 19 : invalid_thousands_separator_type(format->thousands_separators, format->type);
326 19 : return 0;
327 : }
328 3851530 : }
329 :
330 3851660 : assert (format->align <= 127);
331 3851660 : assert (format->sign <= 127);
332 3851660 : return 1;
333 : }
334 :
335 : /* Calculate the padding needed. */
336 : static void
337 7200 : calc_padding(Py_ssize_t nchars, Py_ssize_t width, Py_UCS4 align,
338 : Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
339 : Py_ssize_t *n_total)
340 : {
341 7200 : if (width >= 0) {
342 6422 : if (nchars > width)
343 195 : *n_total = nchars;
344 : else
345 6227 : *n_total = width;
346 : }
347 : else {
348 : /* not specified, use all of the chars and no more */
349 778 : *n_total = nchars;
350 : }
351 :
352 : /* Figure out how much leading space we need, based on the
353 : aligning */
354 7200 : if (align == '>')
355 1290 : *n_lpadding = *n_total - nchars;
356 5910 : else if (align == '^')
357 48 : *n_lpadding = (*n_total - nchars) / 2;
358 5862 : else if (align == '<' || align == '=')
359 5862 : *n_lpadding = 0;
360 : else {
361 : /* We should never have an unspecified alignment. */
362 0 : Py_UNREACHABLE();
363 : }
364 :
365 7200 : *n_rpadding = *n_total - nchars - *n_lpadding;
366 7200 : }
367 :
368 : /* Do the padding, and return a pointer to where the caller-supplied
369 : content goes. */
370 : static int
371 7200 : fill_padding(_PyUnicodeWriter *writer,
372 : Py_ssize_t nchars,
373 : Py_UCS4 fill_char, Py_ssize_t n_lpadding,
374 : Py_ssize_t n_rpadding)
375 : {
376 : Py_ssize_t pos;
377 :
378 : /* Pad on left. */
379 7200 : if (n_lpadding) {
380 1177 : pos = writer->pos;
381 1177 : _PyUnicode_FastFill(writer->buffer, pos, n_lpadding, fill_char);
382 : }
383 :
384 : /* Pad on right. */
385 7200 : if (n_rpadding) {
386 5095 : pos = writer->pos + nchars + n_lpadding;
387 5095 : _PyUnicode_FastFill(writer->buffer, pos, n_rpadding, fill_char);
388 : }
389 :
390 : /* Pointer to the user content. */
391 7200 : writer->pos += n_lpadding;
392 7200 : return 0;
393 : }
394 :
395 : /************************************************************************/
396 : /*********** common routines for numeric formatting *********************/
397 : /************************************************************************/
398 :
399 : /* Locale info needed for formatting integers and the part of floats
400 : before and including the decimal. Note that locales only support
401 : 8-bit chars, not unicode. */
402 : typedef struct {
403 : PyObject *decimal_point;
404 : PyObject *thousands_sep;
405 : const char *grouping;
406 : char *grouping_buffer;
407 : } LocaleInfo;
408 :
409 : #define LocaleInfo_STATIC_INIT {0, 0, 0, 0}
410 :
411 : /* describes the layout for an integer, see the comment in
412 : calc_number_widths() for details */
413 : typedef struct {
414 : Py_ssize_t n_lpadding;
415 : Py_ssize_t n_prefix;
416 : Py_ssize_t n_spadding;
417 : Py_ssize_t n_rpadding;
418 : char sign;
419 : Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */
420 : Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
421 : any grouping chars. */
422 : Py_ssize_t n_decimal; /* 0 if only an integer */
423 : Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
424 : excluding the decimal itself, if
425 : present. */
426 :
427 : /* These 2 are not the widths of fields, but are needed by
428 : STRINGLIB_GROUPING. */
429 : Py_ssize_t n_digits; /* The number of digits before a decimal
430 : or exponent. */
431 : Py_ssize_t n_min_width; /* The min_width we used when we computed
432 : the n_grouped_digits width. */
433 : } NumberFieldWidths;
434 :
435 :
436 : /* Given a number of the form:
437 : digits[remainder]
438 : where ptr points to the start and end points to the end, find where
439 : the integer part ends. This could be a decimal, an exponent, both,
440 : or neither.
441 : If a decimal point is present, set *has_decimal and increment
442 : remainder beyond it.
443 : Results are undefined (but shouldn't crash) for improperly
444 : formatted strings.
445 : */
446 : static void
447 467 : parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
448 : Py_ssize_t *n_remainder, int *has_decimal)
449 : {
450 : Py_ssize_t remainder;
451 467 : int kind = PyUnicode_KIND(s);
452 467 : const void *data = PyUnicode_DATA(s);
453 :
454 1225 : while (pos<end && Py_ISDIGIT(PyUnicode_READ(kind, data, pos)))
455 758 : ++pos;
456 467 : remainder = pos;
457 :
458 : /* Does remainder start with a decimal point? */
459 467 : *has_decimal = pos<end && PyUnicode_READ(kind, data, remainder) == '.';
460 :
461 : /* Skip the decimal point. */
462 467 : if (*has_decimal)
463 280 : remainder++;
464 :
465 467 : *n_remainder = end - remainder;
466 467 : }
467 :
468 : /* not all fields of format are used. for example, precision is
469 : unused. should this take discrete params in order to be more clear
470 : about what it does? or is passing a single format parameter easier
471 : and more efficient enough to justify a little obfuscation?
472 : Return -1 on error. */
473 : static Py_ssize_t
474 474734 : calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
475 : Py_UCS4 sign_char, Py_ssize_t n_start,
476 : Py_ssize_t n_end, Py_ssize_t n_remainder,
477 : int has_decimal, const LocaleInfo *locale,
478 : const InternalFormatSpec *format, Py_UCS4 *maxchar)
479 : {
480 : Py_ssize_t n_non_digit_non_padding;
481 : Py_ssize_t n_padding;
482 :
483 474734 : spec->n_digits = n_end - n_start - n_remainder - (has_decimal?1:0);
484 474734 : spec->n_lpadding = 0;
485 474734 : spec->n_prefix = n_prefix;
486 474734 : spec->n_decimal = has_decimal ? PyUnicode_GET_LENGTH(locale->decimal_point) : 0;
487 474734 : spec->n_remainder = n_remainder;
488 474734 : spec->n_spadding = 0;
489 474734 : spec->n_rpadding = 0;
490 474734 : spec->sign = '\0';
491 474734 : spec->n_sign = 0;
492 :
493 : /* the output will look like:
494 : | |
495 : | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
496 : | |
497 :
498 : sign is computed from format->sign and the actual
499 : sign of the number
500 :
501 : prefix is given (it's for the '0x' prefix)
502 :
503 : digits is already known
504 :
505 : the total width is either given, or computed from the
506 : actual digits
507 :
508 : only one of lpadding, spadding, and rpadding can be non-zero,
509 : and it's calculated from the width and other fields
510 : */
511 :
512 : /* compute the various parts we're going to write */
513 474734 : switch (format->sign) {
514 18631 : case '+':
515 : /* always put a + or - */
516 18631 : spec->n_sign = 1;
517 18631 : spec->sign = (sign_char == '-' ? '-' : '+');
518 18631 : break;
519 80 : case ' ':
520 80 : spec->n_sign = 1;
521 80 : spec->sign = (sign_char == '-' ? '-' : ' ');
522 80 : break;
523 456023 : default:
524 : /* Not specified, or the default (-) */
525 456023 : if (sign_char == '-') {
526 88 : spec->n_sign = 1;
527 88 : spec->sign = '-';
528 : }
529 : }
530 :
531 : /* The number of chars used for non-digits and non-padding. */
532 474734 : n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
533 474734 : spec->n_remainder;
534 :
535 : /* min_width can go negative, that's okay. format->width == -1 means
536 : we don't care. */
537 474734 : if (format->fill_char == '0' && format->align == '=')
538 455205 : spec->n_min_width = format->width - n_non_digit_non_padding;
539 : else
540 19529 : spec->n_min_width = 0;
541 :
542 474734 : if (spec->n_digits == 0)
543 : /* This case only occurs when using 'c' formatting, we need
544 : to special case it because the grouping code always wants
545 : to have at least one character. */
546 117 : spec->n_grouped_digits = 0;
547 : else {
548 : Py_UCS4 grouping_maxchar;
549 474617 : spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
550 : NULL, 0,
551 : NULL, 0, spec->n_digits,
552 : spec->n_min_width,
553 : locale->grouping, locale->thousands_sep, &grouping_maxchar);
554 474617 : if (spec->n_grouped_digits == -1) {
555 0 : return -1;
556 : }
557 474617 : *maxchar = Py_MAX(*maxchar, grouping_maxchar);
558 : }
559 :
560 : /* Given the desired width and the total of digit and non-digit
561 : space we consume, see if we need any padding. format->width can
562 : be negative (meaning no padding), but this code still works in
563 : that case. */
564 474734 : n_padding = format->width -
565 474734 : (n_non_digit_non_padding + spec->n_grouped_digits);
566 474734 : if (n_padding > 0) {
567 : /* Some padding is needed. Determine if it's left, space, or right. */
568 268 : switch (format->align) {
569 39 : case '<':
570 39 : spec->n_rpadding = n_padding;
571 39 : break;
572 27 : case '^':
573 27 : spec->n_lpadding = n_padding / 2;
574 27 : spec->n_rpadding = n_padding - spec->n_lpadding;
575 27 : break;
576 8 : case '=':
577 8 : spec->n_spadding = n_padding;
578 8 : break;
579 194 : case '>':
580 194 : spec->n_lpadding = n_padding;
581 194 : break;
582 0 : default:
583 : /* Shouldn't get here */
584 0 : Py_UNREACHABLE();
585 : }
586 474466 : }
587 :
588 474734 : if (spec->n_lpadding || spec->n_spadding || spec->n_rpadding)
589 268 : *maxchar = Py_MAX(*maxchar, format->fill_char);
590 :
591 474734 : if (spec->n_decimal)
592 280 : *maxchar = Py_MAX(*maxchar, PyUnicode_MAX_CHAR_VALUE(locale->decimal_point));
593 :
594 474734 : return spec->n_lpadding + spec->n_sign + spec->n_prefix +
595 474734 : spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
596 474734 : spec->n_remainder + spec->n_rpadding;
597 : }
598 :
599 : /* Fill in the digit parts of a number's string representation,
600 : as determined in calc_number_widths().
601 : Return -1 on error, or 0 on success. */
602 : static int
603 474728 : fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
604 : PyObject *digits, Py_ssize_t d_start,
605 : PyObject *prefix, Py_ssize_t p_start,
606 : Py_UCS4 fill_char,
607 : LocaleInfo *locale, int toupper)
608 : {
609 : /* Used to keep track of digits, decimal, and remainder. */
610 474728 : Py_ssize_t d_pos = d_start;
611 474728 : const int kind = writer->kind;
612 474728 : const void *data = writer->data;
613 : Py_ssize_t r;
614 :
615 474728 : if (spec->n_lpadding) {
616 221 : _PyUnicode_FastFill(writer->buffer,
617 : writer->pos, spec->n_lpadding, fill_char);
618 221 : writer->pos += spec->n_lpadding;
619 : }
620 474728 : if (spec->n_sign == 1) {
621 18799 : PyUnicode_WRITE(kind, data, writer->pos, spec->sign);
622 18799 : writer->pos++;
623 : }
624 474728 : if (spec->n_prefix) {
625 49 : _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
626 : prefix, p_start,
627 : spec->n_prefix);
628 49 : if (toupper) {
629 : Py_ssize_t t;
630 45 : for (t = 0; t < spec->n_prefix; t++) {
631 30 : Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
632 30 : c = Py_TOUPPER(c);
633 30 : assert (c <= 127);
634 30 : PyUnicode_WRITE(kind, data, writer->pos + t, c);
635 : }
636 : }
637 49 : writer->pos += spec->n_prefix;
638 : }
639 474728 : if (spec->n_spadding) {
640 8 : _PyUnicode_FastFill(writer->buffer,
641 : writer->pos, spec->n_spadding, fill_char);
642 8 : writer->pos += spec->n_spadding;
643 : }
644 :
645 : /* Only for type 'c' special case, it has no digits. */
646 474728 : if (spec->n_digits != 0) {
647 : /* Fill the digits with InsertThousandsGrouping. */
648 474611 : r = _PyUnicode_InsertThousandsGrouping(
649 : writer, spec->n_grouped_digits,
650 : digits, d_pos, spec->n_digits,
651 : spec->n_min_width,
652 : locale->grouping, locale->thousands_sep, NULL);
653 474611 : if (r == -1)
654 0 : return -1;
655 474611 : assert(r == spec->n_grouped_digits);
656 474611 : d_pos += spec->n_digits;
657 : }
658 474728 : if (toupper) {
659 : Py_ssize_t t;
660 2577 : for (t = 0; t < spec->n_grouped_digits; t++) {
661 1863 : Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t);
662 1863 : c = Py_TOUPPER(c);
663 1863 : if (c > 127) {
664 0 : PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit");
665 0 : return -1;
666 : }
667 1863 : PyUnicode_WRITE(kind, data, writer->pos + t, c);
668 : }
669 : }
670 474728 : writer->pos += spec->n_grouped_digits;
671 :
672 474728 : if (spec->n_decimal) {
673 280 : _PyUnicode_FastCopyCharacters(
674 : writer->buffer, writer->pos,
675 : locale->decimal_point, 0, spec->n_decimal);
676 280 : writer->pos += spec->n_decimal;
677 280 : d_pos += 1;
678 : }
679 :
680 474728 : if (spec->n_remainder) {
681 398 : _PyUnicode_FastCopyCharacters(
682 : writer->buffer, writer->pos,
683 : digits, d_pos, spec->n_remainder);
684 398 : writer->pos += spec->n_remainder;
685 : /* d_pos += spec->n_remainder; */
686 : }
687 :
688 474728 : if (spec->n_rpadding) {
689 66 : _PyUnicode_FastFill(writer->buffer,
690 : writer->pos, spec->n_rpadding,
691 : fill_char);
692 66 : writer->pos += spec->n_rpadding;
693 : }
694 474728 : return 0;
695 : }
696 :
697 : static const char no_grouping[1] = {CHAR_MAX};
698 :
699 : /* Find the decimal point character(s?), thousands_separator(s?), and
700 : grouping description, either for the current locale if type is
701 : LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE or
702 : LT_UNDERSCORE_LOCALE/LT_UNDER_FOUR_LOCALE, or none if LT_NO_LOCALE. */
703 : static int
704 474640 : get_locale_info(enum LocaleType type, LocaleInfo *locale_info)
705 : {
706 474640 : switch (type) {
707 122 : case LT_CURRENT_LOCALE: {
708 122 : struct lconv *lc = localeconv();
709 122 : if (_Py_GetLocaleconvNumeric(lc,
710 : &locale_info->decimal_point,
711 : &locale_info->thousands_sep) < 0) {
712 0 : return -1;
713 : }
714 :
715 : /* localeconv() grouping can become a dangling pointer or point
716 : to a different string if another thread calls localeconv() during
717 : the string formatting. Copy the string to avoid this risk. */
718 122 : locale_info->grouping_buffer = _PyMem_Strdup(lc->grouping);
719 122 : if (locale_info->grouping_buffer == NULL) {
720 0 : PyErr_NoMemory();
721 0 : return -1;
722 : }
723 122 : locale_info->grouping = locale_info->grouping_buffer;
724 122 : break;
725 : }
726 127 : case LT_DEFAULT_LOCALE:
727 : case LT_UNDERSCORE_LOCALE:
728 : case LT_UNDER_FOUR_LOCALE:
729 127 : locale_info->decimal_point = PyUnicode_FromOrdinal('.');
730 127 : locale_info->thousands_sep = PyUnicode_FromOrdinal(
731 : type == LT_DEFAULT_LOCALE ? ',' : '_');
732 127 : if (!locale_info->decimal_point || !locale_info->thousands_sep)
733 0 : return -1;
734 127 : if (type != LT_UNDER_FOUR_LOCALE)
735 107 : locale_info->grouping = "\3"; /* Group every 3 characters. The
736 : (implicit) trailing 0 means repeat
737 : infinitely. */
738 : else
739 20 : locale_info->grouping = "\4"; /* Bin/oct/hex group every four. */
740 127 : break;
741 474391 : case LT_NO_LOCALE:
742 474391 : locale_info->decimal_point = PyUnicode_FromOrdinal('.');
743 474391 : locale_info->thousands_sep = PyUnicode_New(0, 0);
744 474391 : if (!locale_info->decimal_point || !locale_info->thousands_sep)
745 0 : return -1;
746 474391 : locale_info->grouping = no_grouping;
747 474391 : break;
748 : }
749 474640 : return 0;
750 : }
751 :
752 : static void
753 474658 : free_locale_info(LocaleInfo *locale_info)
754 : {
755 474658 : Py_XDECREF(locale_info->decimal_point);
756 474658 : Py_XDECREF(locale_info->thousands_sep);
757 474658 : PyMem_Free(locale_info->grouping_buffer);
758 474658 : }
759 :
760 : /************************************************************************/
761 : /*********** string formatting ******************************************/
762 : /************************************************************************/
763 :
764 : static int
765 7969 : format_string_internal(PyObject *value, const InternalFormatSpec *format,
766 : _PyUnicodeWriter *writer)
767 : {
768 : Py_ssize_t lpad;
769 : Py_ssize_t rpad;
770 : Py_ssize_t total;
771 : Py_ssize_t len;
772 7969 : int result = -1;
773 : Py_UCS4 maxchar;
774 :
775 7969 : assert(PyUnicode_IS_READY(value));
776 7969 : len = PyUnicode_GET_LENGTH(value);
777 :
778 : /* sign is not allowed on strings */
779 7969 : if (format->sign != '\0') {
780 3 : if (format->sign == ' ') {
781 1 : PyErr_SetString(PyExc_ValueError,
782 : "Space not allowed in string format specifier");
783 : }
784 : else {
785 2 : PyErr_SetString(PyExc_ValueError,
786 : "Sign not allowed in string format specifier");
787 : }
788 3 : goto done;
789 : }
790 :
791 : /* negative 0 coercion is not allowed on strings */
792 7966 : if (format->no_neg_0) {
793 1 : PyErr_SetString(PyExc_ValueError,
794 : "Negative zero coercion (z) not allowed in string format "
795 : "specifier");
796 1 : goto done;
797 : }
798 :
799 : /* alternate is not allowed on strings */
800 7965 : if (format->alternate) {
801 2 : PyErr_SetString(PyExc_ValueError,
802 : "Alternate form (#) not allowed in string format "
803 : "specifier");
804 2 : goto done;
805 : }
806 :
807 : /* '=' alignment not allowed on strings */
808 7963 : if (format->align == '=') {
809 1 : PyErr_SetString(PyExc_ValueError,
810 : "'=' alignment not allowed "
811 : "in string format specifier");
812 1 : goto done;
813 : }
814 :
815 7962 : if ((format->width == -1 || format->width <= len)
816 2291 : && (format->precision == -1 || format->precision >= len)) {
817 : /* Fast path */
818 856 : return _PyUnicodeWriter_WriteStr(writer, value);
819 : }
820 :
821 : /* if precision is specified, output no more that format.precision
822 : characters */
823 7106 : if (format->precision >= 0 && len >= format->precision) {
824 2331 : len = format->precision;
825 : }
826 :
827 7106 : calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
828 :
829 7106 : maxchar = writer->maxchar;
830 7106 : if (lpad != 0 || rpad != 0)
831 6200 : maxchar = Py_MAX(maxchar, format->fill_char);
832 7106 : if (PyUnicode_MAX_CHAR_VALUE(value) > maxchar) {
833 7102 : Py_UCS4 valmaxchar = _PyUnicode_FindMaxChar(value, 0, len);
834 7102 : maxchar = Py_MAX(maxchar, valmaxchar);
835 : }
836 :
837 : /* allocate the resulting string */
838 7106 : if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
839 0 : goto done;
840 :
841 : /* Write into that space. First the padding. */
842 7106 : result = fill_padding(writer, len, format->fill_char, lpad, rpad);
843 7106 : if (result == -1)
844 0 : goto done;
845 :
846 : /* Then the source string. */
847 7106 : if (len) {
848 5278 : _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
849 : value, 0, len);
850 : }
851 7106 : writer->pos += (len + rpad);
852 7106 : result = 0;
853 :
854 7113 : done:
855 7113 : return result;
856 : }
857 :
858 :
859 : /************************************************************************/
860 : /*********** long formatting ********************************************/
861 : /************************************************************************/
862 :
863 : static int
864 493265 : format_long_internal(PyObject *value, const InternalFormatSpec *format,
865 : _PyUnicodeWriter *writer)
866 : {
867 493265 : int result = -1;
868 493265 : Py_UCS4 maxchar = 127;
869 493265 : PyObject *tmp = NULL;
870 : Py_ssize_t inumeric_chars;
871 493265 : Py_UCS4 sign_char = '\0';
872 : Py_ssize_t n_digits; /* count of digits need from the computed
873 : string */
874 493265 : Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
875 : produces non-digits */
876 493265 : Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */
877 : Py_ssize_t n_total;
878 493265 : Py_ssize_t prefix = 0;
879 : NumberFieldWidths spec;
880 : long x;
881 :
882 : /* Locale settings, either from the actual locale or
883 : from a hard-code pseudo-locale */
884 493265 : LocaleInfo locale = LocaleInfo_STATIC_INIT;
885 :
886 : /* no precision allowed on integers */
887 493265 : if (format->precision != -1) {
888 2 : PyErr_SetString(PyExc_ValueError,
889 : "Precision not allowed in integer format specifier");
890 2 : goto done;
891 : }
892 : /* no negative zero coercion on integers */
893 493263 : if (format->no_neg_0) {
894 9 : PyErr_SetString(PyExc_ValueError,
895 : "Negative zero coercion (z) not allowed in integer"
896 : " format specifier");
897 9 : goto done;
898 : }
899 :
900 : /* special case for character formatting */
901 493254 : if (format->type == 'c') {
902 : /* error to specify a sign */
903 4 : if (format->sign != '\0') {
904 2 : PyErr_SetString(PyExc_ValueError,
905 : "Sign not allowed with integer"
906 : " format specifier 'c'");
907 2 : goto done;
908 : }
909 : /* error to request alternate format */
910 2 : if (format->alternate) {
911 1 : PyErr_SetString(PyExc_ValueError,
912 : "Alternate form (#) not allowed with integer"
913 : " format specifier 'c'");
914 1 : goto done;
915 : }
916 :
917 : /* taken from unicodeobject.c formatchar() */
918 : /* Integer input truncated to a character */
919 1 : x = PyLong_AsLong(value);
920 1 : if (x == -1 && PyErr_Occurred())
921 0 : goto done;
922 1 : if (x < 0 || x > 0x10ffff) {
923 0 : PyErr_SetString(PyExc_OverflowError,
924 : "%c arg not in range(0x110000)");
925 0 : goto done;
926 : }
927 1 : tmp = PyUnicode_FromOrdinal(x);
928 1 : inumeric_chars = 0;
929 1 : n_digits = 1;
930 1 : maxchar = Py_MAX(maxchar, (Py_UCS4)x);
931 :
932 : /* As a sort-of hack, we tell calc_number_widths that we only
933 : have "remainder" characters. calc_number_widths thinks
934 : these are characters that don't get formatted, only copied
935 : into the output string. We do this for 'c' formatting,
936 : because the characters are likely to be non-digits. */
937 1 : n_remainder = 1;
938 : }
939 : else {
940 : int base;
941 493250 : int leading_chars_to_skip = 0; /* Number of characters added by
942 : PyNumber_ToBase that we want to
943 : skip over. */
944 :
945 : /* Compute the base and how many characters will be added by
946 : PyNumber_ToBase */
947 493250 : switch (format->type) {
948 42 : case 'b':
949 42 : base = 2;
950 42 : leading_chars_to_skip = 2; /* 0b */
951 42 : break;
952 40 : case 'o':
953 40 : base = 8;
954 40 : leading_chars_to_skip = 2; /* 0o */
955 40 : break;
956 466132 : case 'x':
957 : case 'X':
958 466132 : base = 16;
959 466132 : leading_chars_to_skip = 2; /* 0x */
960 466132 : break;
961 27036 : default: /* shouldn't be needed, but stops a compiler warning */
962 : case 'd':
963 : case 'n':
964 27036 : base = 10;
965 27036 : break;
966 : }
967 :
968 493250 : if (format->sign != '+' && format->sign != ' '
969 474761 : && format->width == -1
970 19153 : && format->type != 'X' && format->type != 'n'
971 19072 : && !format->thousands_separators
972 18990 : && PyLong_CheckExact(value))
973 : {
974 : /* Fast path */
975 18984 : return _PyLong_FormatWriter(writer, value, base, format->alternate);
976 : }
977 :
978 : /* The number of prefix chars is the same as the leading
979 : chars to skip */
980 474266 : if (format->alternate)
981 49 : n_prefix = leading_chars_to_skip;
982 :
983 : /* Do the hard part, converting to a string in a given base */
984 474266 : tmp = _PyLong_Format(value, base);
985 474266 : if (tmp == NULL || PyUnicode_READY(tmp) == -1)
986 0 : goto done;
987 :
988 474266 : inumeric_chars = 0;
989 474266 : n_digits = PyUnicode_GET_LENGTH(tmp);
990 :
991 474266 : prefix = inumeric_chars;
992 :
993 : /* Is a sign character present in the output? If so, remember it
994 : and skip it */
995 474266 : if (PyUnicode_READ_CHAR(tmp, inumeric_chars) == '-') {
996 5285 : sign_char = '-';
997 5285 : ++prefix;
998 5285 : ++leading_chars_to_skip;
999 : }
1000 :
1001 : /* Skip over the leading chars (0x, 0b, etc.) */
1002 474266 : n_digits -= leading_chars_to_skip;
1003 474266 : inumeric_chars += leading_chars_to_skip;
1004 : }
1005 :
1006 : /* Determine the grouping, separator, and decimal point, if any. */
1007 474267 : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1008 : format->thousands_separators,
1009 : &locale) == -1)
1010 0 : goto done;
1011 :
1012 : /* Calculate how much memory we'll need. */
1013 474267 : n_total = calc_number_widths(&spec, n_prefix, sign_char, inumeric_chars,
1014 : inumeric_chars + n_digits, n_remainder, 0,
1015 : &locale, format, &maxchar);
1016 474267 : if (n_total == -1) {
1017 0 : goto done;
1018 : }
1019 :
1020 : /* Allocate the memory. */
1021 474267 : if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1022 0 : goto done;
1023 :
1024 : /* Populate the memory. */
1025 474267 : result = fill_number(writer, &spec,
1026 : tmp, inumeric_chars,
1027 : tmp, prefix, format->fill_char,
1028 474267 : &locale, format->type == 'X');
1029 :
1030 474281 : done:
1031 474281 : Py_XDECREF(tmp);
1032 474281 : free_locale_info(&locale);
1033 474281 : return result;
1034 : }
1035 :
1036 : /************************************************************************/
1037 : /*********** float formatting *******************************************/
1038 : /************************************************************************/
1039 :
1040 : /* much of this is taken from unicodeobject.c */
1041 : static int
1042 3349920 : format_float_internal(PyObject *value,
1043 : const InternalFormatSpec *format,
1044 : _PyUnicodeWriter *writer)
1045 : {
1046 3349920 : char *buf = NULL; /* buffer returned from PyOS_double_to_string */
1047 : Py_ssize_t n_digits;
1048 : Py_ssize_t n_remainder;
1049 : Py_ssize_t n_total;
1050 : int has_decimal;
1051 : double val;
1052 3349920 : int precision, default_precision = 6;
1053 3349920 : Py_UCS4 type = format->type;
1054 3349920 : int add_pct = 0;
1055 : Py_ssize_t index;
1056 : NumberFieldWidths spec;
1057 3349920 : int flags = 0;
1058 3349920 : int result = -1;
1059 3349920 : Py_UCS4 maxchar = 127;
1060 3349920 : Py_UCS4 sign_char = '\0';
1061 : int float_type; /* Used to see if we have a nan, inf, or regular float. */
1062 3349920 : PyObject *unicode_tmp = NULL;
1063 :
1064 : /* Locale settings, either from the actual locale or
1065 : from a hard-code pseudo-locale */
1066 3349920 : LocaleInfo locale = LocaleInfo_STATIC_INIT;
1067 :
1068 3349920 : if (format->precision > INT_MAX) {
1069 1 : PyErr_SetString(PyExc_ValueError, "precision too big");
1070 1 : goto done;
1071 : }
1072 3349920 : precision = (int)format->precision;
1073 :
1074 3349920 : if (format->alternate)
1075 92 : flags |= Py_DTSF_ALT;
1076 3349920 : if (format->no_neg_0)
1077 35 : flags |= Py_DTSF_NO_NEG_0;
1078 :
1079 3349920 : if (type == '\0') {
1080 : /* Omitted type specifier. Behaves in the same way as repr(x)
1081 : and str(x) if no precision is given, else like 'g', but with
1082 : at least one digit after the decimal point. */
1083 55 : flags |= Py_DTSF_ADD_DOT_0;
1084 55 : type = 'r';
1085 55 : default_precision = 0;
1086 : }
1087 :
1088 3349920 : if (type == 'n')
1089 : /* 'n' is the same as 'g', except for the locale used to
1090 : format the result. We take care of that later. */
1091 43 : type = 'g';
1092 :
1093 3349920 : val = PyFloat_AsDouble(value);
1094 3349920 : if (val == -1.0 && PyErr_Occurred())
1095 0 : goto done;
1096 :
1097 3349920 : if (type == '%') {
1098 40 : type = 'f';
1099 40 : val *= 100;
1100 40 : add_pct = 1;
1101 : }
1102 :
1103 3349920 : if (precision < 0)
1104 662 : precision = default_precision;
1105 3349260 : else if (type == 'r')
1106 9 : type = 'g';
1107 :
1108 : /* Cast "type", because if we're in unicode we need to pass an
1109 : 8-bit char. This is safe, because we've restricted what "type"
1110 : can be. */
1111 3349920 : buf = PyOS_double_to_string(val, (char)type, precision, flags,
1112 : &float_type);
1113 3349920 : if (buf == NULL)
1114 0 : goto done;
1115 3349920 : n_digits = strlen(buf);
1116 :
1117 3349920 : if (add_pct) {
1118 : /* We know that buf has a trailing zero (since we just called
1119 : strlen() on it), and we don't use that fact any more. So we
1120 : can just write over the trailing zero. */
1121 40 : buf[n_digits] = '%';
1122 40 : n_digits += 1;
1123 : }
1124 :
1125 3349920 : if (format->sign != '+' && format->sign != ' '
1126 3349790 : && format->width == -1
1127 3349680 : && format->type != 'n'
1128 3349640 : && !format->thousands_separators)
1129 : {
1130 : /* Fast path */
1131 3349640 : result = _PyUnicodeWriter_WriteASCIIString(writer, buf, n_digits);
1132 3349640 : PyMem_Free(buf);
1133 3349640 : return result;
1134 : }
1135 :
1136 : /* Since there is no unicode version of PyOS_double_to_string,
1137 : just use the 8 bit version and then convert to unicode. */
1138 279 : unicode_tmp = _PyUnicode_FromASCII(buf, n_digits);
1139 279 : PyMem_Free(buf);
1140 279 : if (unicode_tmp == NULL)
1141 0 : goto done;
1142 :
1143 : /* Is a sign character present in the output? If so, remember it
1144 : and skip it */
1145 279 : index = 0;
1146 279 : if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') {
1147 76 : sign_char = '-';
1148 76 : ++index;
1149 76 : --n_digits;
1150 : }
1151 :
1152 : /* Determine if we have any "remainder" (after the digits, might include
1153 : decimal or exponent or both (or neither)) */
1154 279 : parse_number(unicode_tmp, index, index + n_digits, &n_remainder, &has_decimal);
1155 :
1156 : /* Determine the grouping, separator, and decimal point, if any. */
1157 279 : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1158 : format->thousands_separators,
1159 : &locale) == -1)
1160 0 : goto done;
1161 :
1162 : /* Calculate how much memory we'll need. */
1163 279 : n_total = calc_number_widths(&spec, 0, sign_char, index,
1164 : index + n_digits, n_remainder, has_decimal,
1165 : &locale, format, &maxchar);
1166 279 : if (n_total == -1) {
1167 0 : goto done;
1168 : }
1169 :
1170 : /* Allocate the memory. */
1171 279 : if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
1172 0 : goto done;
1173 :
1174 : /* Populate the memory. */
1175 279 : result = fill_number(writer, &spec,
1176 : unicode_tmp, index,
1177 : NULL, 0, format->fill_char,
1178 : &locale, 0);
1179 :
1180 280 : done:
1181 280 : Py_XDECREF(unicode_tmp);
1182 280 : free_locale_info(&locale);
1183 280 : return result;
1184 : }
1185 :
1186 : /************************************************************************/
1187 : /*********** complex formatting *****************************************/
1188 : /************************************************************************/
1189 :
1190 : static int
1191 97 : format_complex_internal(PyObject *value,
1192 : const InternalFormatSpec *format,
1193 : _PyUnicodeWriter *writer)
1194 : {
1195 : double re;
1196 : double im;
1197 97 : char *re_buf = NULL; /* buffer returned from PyOS_double_to_string */
1198 97 : char *im_buf = NULL; /* buffer returned from PyOS_double_to_string */
1199 :
1200 97 : InternalFormatSpec tmp_format = *format;
1201 : Py_ssize_t n_re_digits;
1202 : Py_ssize_t n_im_digits;
1203 : Py_ssize_t n_re_remainder;
1204 : Py_ssize_t n_im_remainder;
1205 : Py_ssize_t n_re_total;
1206 : Py_ssize_t n_im_total;
1207 : int re_has_decimal;
1208 : int im_has_decimal;
1209 97 : int precision, default_precision = 6;
1210 97 : Py_UCS4 type = format->type;
1211 : Py_ssize_t i_re;
1212 : Py_ssize_t i_im;
1213 : NumberFieldWidths re_spec;
1214 : NumberFieldWidths im_spec;
1215 97 : int flags = 0;
1216 97 : int result = -1;
1217 97 : Py_UCS4 maxchar = 127;
1218 : int rkind;
1219 : void *rdata;
1220 97 : Py_UCS4 re_sign_char = '\0';
1221 97 : Py_UCS4 im_sign_char = '\0';
1222 : int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
1223 : int im_float_type;
1224 97 : int add_parens = 0;
1225 97 : int skip_re = 0;
1226 : Py_ssize_t lpad;
1227 : Py_ssize_t rpad;
1228 : Py_ssize_t total;
1229 97 : PyObject *re_unicode_tmp = NULL;
1230 97 : PyObject *im_unicode_tmp = NULL;
1231 :
1232 : /* Locale settings, either from the actual locale or
1233 : from a hard-code pseudo-locale */
1234 97 : LocaleInfo locale = LocaleInfo_STATIC_INIT;
1235 :
1236 97 : if (format->precision > INT_MAX) {
1237 1 : PyErr_SetString(PyExc_ValueError, "precision too big");
1238 1 : goto done;
1239 : }
1240 96 : precision = (int)format->precision;
1241 :
1242 : /* Zero padding is not allowed. */
1243 96 : if (format->fill_char == '0') {
1244 1 : PyErr_SetString(PyExc_ValueError,
1245 : "Zero padding is not allowed in complex format "
1246 : "specifier");
1247 1 : goto done;
1248 : }
1249 :
1250 : /* Neither is '=' alignment . */
1251 95 : if (format->align == '=') {
1252 1 : PyErr_SetString(PyExc_ValueError,
1253 : "'=' alignment flag is not allowed in complex format "
1254 : "specifier");
1255 1 : goto done;
1256 : }
1257 :
1258 94 : re = PyComplex_RealAsDouble(value);
1259 94 : if (re == -1.0 && PyErr_Occurred())
1260 0 : goto done;
1261 94 : im = PyComplex_ImagAsDouble(value);
1262 94 : if (im == -1.0 && PyErr_Occurred())
1263 0 : goto done;
1264 :
1265 94 : if (format->alternate)
1266 11 : flags |= Py_DTSF_ALT;
1267 94 : if (format->no_neg_0)
1268 4 : flags |= Py_DTSF_NO_NEG_0;
1269 :
1270 94 : if (type == '\0') {
1271 : /* Omitted type specifier. Should be like str(self). */
1272 25 : type = 'r';
1273 25 : default_precision = 0;
1274 25 : if (re == 0.0 && copysign(1.0, re) == 1.0)
1275 6 : skip_re = 1;
1276 : else
1277 19 : add_parens = 1;
1278 : }
1279 :
1280 94 : if (type == 'n')
1281 : /* 'n' is the same as 'g', except for the locale used to
1282 : format the result. We take care of that later. */
1283 0 : type = 'g';
1284 :
1285 94 : if (precision < 0)
1286 63 : precision = default_precision;
1287 31 : else if (type == 'r')
1288 1 : type = 'g';
1289 :
1290 : /* Cast "type", because if we're in unicode we need to pass an
1291 : 8-bit char. This is safe, because we've restricted what "type"
1292 : can be. */
1293 94 : re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
1294 : &re_float_type);
1295 94 : if (re_buf == NULL)
1296 0 : goto done;
1297 94 : im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
1298 : &im_float_type);
1299 94 : if (im_buf == NULL)
1300 0 : goto done;
1301 :
1302 94 : n_re_digits = strlen(re_buf);
1303 94 : n_im_digits = strlen(im_buf);
1304 :
1305 : /* Since there is no unicode version of PyOS_double_to_string,
1306 : just use the 8 bit version and then convert to unicode. */
1307 94 : re_unicode_tmp = _PyUnicode_FromASCII(re_buf, n_re_digits);
1308 94 : if (re_unicode_tmp == NULL)
1309 0 : goto done;
1310 94 : i_re = 0;
1311 :
1312 94 : im_unicode_tmp = _PyUnicode_FromASCII(im_buf, n_im_digits);
1313 94 : if (im_unicode_tmp == NULL)
1314 0 : goto done;
1315 94 : i_im = 0;
1316 :
1317 : /* Is a sign character present in the output? If so, remember it
1318 : and skip it */
1319 94 : if (PyUnicode_READ_CHAR(re_unicode_tmp, i_re) == '-') {
1320 19 : re_sign_char = '-';
1321 19 : ++i_re;
1322 19 : --n_re_digits;
1323 : }
1324 94 : if (PyUnicode_READ_CHAR(im_unicode_tmp, i_im) == '-') {
1325 19 : im_sign_char = '-';
1326 19 : ++i_im;
1327 19 : --n_im_digits;
1328 : }
1329 :
1330 : /* Determine if we have any "remainder" (after the digits, might include
1331 : decimal or exponent or both (or neither)) */
1332 94 : parse_number(re_unicode_tmp, i_re, i_re + n_re_digits,
1333 : &n_re_remainder, &re_has_decimal);
1334 94 : parse_number(im_unicode_tmp, i_im, i_im + n_im_digits,
1335 : &n_im_remainder, &im_has_decimal);
1336 :
1337 : /* Determine the grouping, separator, and decimal point, if any. */
1338 94 : if (get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
1339 : format->thousands_separators,
1340 : &locale) == -1)
1341 0 : goto done;
1342 :
1343 : /* Turn off any padding. We'll do it later after we've composed
1344 : the numbers without padding. */
1345 94 : tmp_format.fill_char = '\0';
1346 94 : tmp_format.align = '<';
1347 94 : tmp_format.width = -1;
1348 :
1349 : /* Calculate how much memory we'll need. */
1350 94 : n_re_total = calc_number_widths(&re_spec, 0, re_sign_char,
1351 : i_re, i_re + n_re_digits, n_re_remainder,
1352 : re_has_decimal, &locale, &tmp_format,
1353 : &maxchar);
1354 94 : if (n_re_total == -1) {
1355 0 : goto done;
1356 : }
1357 :
1358 : /* Same formatting, but always include a sign, unless the real part is
1359 : * going to be omitted, in which case we use whatever sign convention was
1360 : * requested by the original format. */
1361 94 : if (!skip_re)
1362 88 : tmp_format.sign = '+';
1363 94 : n_im_total = calc_number_widths(&im_spec, 0, im_sign_char,
1364 : i_im, i_im + n_im_digits, n_im_remainder,
1365 : im_has_decimal, &locale, &tmp_format,
1366 : &maxchar);
1367 94 : if (n_im_total == -1) {
1368 0 : goto done;
1369 : }
1370 :
1371 94 : if (skip_re)
1372 6 : n_re_total = 0;
1373 :
1374 : /* Add 1 for the 'j', and optionally 2 for parens. */
1375 94 : calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
1376 : format->width, format->align, &lpad, &rpad, &total);
1377 :
1378 94 : if (lpad || rpad)
1379 24 : maxchar = Py_MAX(maxchar, format->fill_char);
1380 :
1381 94 : if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1)
1382 0 : goto done;
1383 94 : rkind = writer->kind;
1384 94 : rdata = writer->data;
1385 :
1386 : /* Populate the memory. First, the padding. */
1387 94 : result = fill_padding(writer,
1388 94 : n_re_total + n_im_total + 1 + add_parens * 2,
1389 : format->fill_char, lpad, rpad);
1390 94 : if (result == -1)
1391 0 : goto done;
1392 :
1393 94 : if (add_parens) {
1394 19 : PyUnicode_WRITE(rkind, rdata, writer->pos, '(');
1395 19 : writer->pos++;
1396 : }
1397 :
1398 94 : if (!skip_re) {
1399 88 : result = fill_number(writer, &re_spec,
1400 : re_unicode_tmp, i_re,
1401 : NULL, 0,
1402 : 0,
1403 : &locale, 0);
1404 88 : if (result == -1)
1405 0 : goto done;
1406 : }
1407 94 : result = fill_number(writer, &im_spec,
1408 : im_unicode_tmp, i_im,
1409 : NULL, 0,
1410 : 0,
1411 : &locale, 0);
1412 94 : if (result == -1)
1413 0 : goto done;
1414 94 : PyUnicode_WRITE(rkind, rdata, writer->pos, 'j');
1415 94 : writer->pos++;
1416 :
1417 94 : if (add_parens) {
1418 19 : PyUnicode_WRITE(rkind, rdata, writer->pos, ')');
1419 19 : writer->pos++;
1420 : }
1421 :
1422 94 : writer->pos += rpad;
1423 :
1424 97 : done:
1425 97 : PyMem_Free(re_buf);
1426 97 : PyMem_Free(im_buf);
1427 97 : Py_XDECREF(re_unicode_tmp);
1428 97 : Py_XDECREF(im_unicode_tmp);
1429 97 : free_locale_info(&locale);
1430 97 : return result;
1431 : }
1432 :
1433 : /************************************************************************/
1434 : /*********** built in formatters ****************************************/
1435 : /************************************************************************/
1436 : static int
1437 16546 : format_obj(PyObject *obj, _PyUnicodeWriter *writer)
1438 : {
1439 : PyObject *str;
1440 : int err;
1441 :
1442 16546 : str = PyObject_Str(obj);
1443 16546 : if (str == NULL)
1444 0 : return -1;
1445 16546 : err = _PyUnicodeWriter_WriteStr(writer, str);
1446 16546 : Py_DECREF(str);
1447 16546 : return err;
1448 : }
1449 :
1450 : int
1451 559571 : _PyUnicode_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1452 : PyObject *obj,
1453 : PyObject *format_spec,
1454 : Py_ssize_t start, Py_ssize_t end)
1455 : {
1456 : InternalFormatSpec format;
1457 :
1458 559571 : assert(PyUnicode_Check(obj));
1459 :
1460 : /* check for the special case of zero length format spec, make
1461 : it equivalent to str(obj) */
1462 559571 : if (start == end) {
1463 551598 : if (PyUnicode_CheckExact(obj))
1464 551587 : return _PyUnicodeWriter_WriteStr(writer, obj);
1465 : else
1466 11 : return format_obj(obj, writer);
1467 : }
1468 :
1469 : /* parse the format_spec */
1470 7973 : if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1471 : &format, 's', '<'))
1472 4 : return -1;
1473 :
1474 : /* type conversion? */
1475 7969 : switch (format.type) {
1476 7969 : case 's':
1477 : /* no type conversion needed, already a string. do the formatting */
1478 7969 : return format_string_internal(obj, &format, writer);
1479 0 : default:
1480 : /* unknown */
1481 0 : unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1482 0 : return -1;
1483 : }
1484 : }
1485 :
1486 : int
1487 4122990 : _PyLong_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1488 : PyObject *obj,
1489 : PyObject *format_spec,
1490 : Py_ssize_t start, Py_ssize_t end)
1491 : {
1492 4122990 : PyObject *tmp = NULL;
1493 : InternalFormatSpec format;
1494 4122990 : int result = -1;
1495 :
1496 : /* check for the special case of zero length format spec, make
1497 : it equivalent to str(obj) */
1498 4122990 : if (start == end) {
1499 288813 : if (PyLong_CheckExact(obj))
1500 288502 : return _PyLong_FormatWriter(writer, obj, 10, 0);
1501 : else
1502 311 : return format_obj(obj, writer);
1503 : }
1504 :
1505 : /* parse the format_spec */
1506 3834180 : if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1507 : &format, 'd', '>'))
1508 35 : goto done;
1509 :
1510 : /* type conversion? */
1511 3834140 : switch (format.type) {
1512 493265 : case 'b':
1513 : case 'c':
1514 : case 'd':
1515 : case 'o':
1516 : case 'x':
1517 : case 'X':
1518 : case 'n':
1519 : /* no type conversion needed, already an int. do the formatting */
1520 493265 : result = format_long_internal(obj, &format, writer);
1521 493265 : break;
1522 :
1523 3340570 : case 'e':
1524 : case 'E':
1525 : case 'f':
1526 : case 'F':
1527 : case 'g':
1528 : case 'G':
1529 : case '%':
1530 : /* convert to float */
1531 3340570 : tmp = PyNumber_Float(obj);
1532 3340570 : if (tmp == NULL)
1533 0 : goto done;
1534 3340570 : result = format_float_internal(tmp, &format, writer);
1535 3340570 : break;
1536 :
1537 306 : default:
1538 : /* unknown */
1539 306 : unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1540 306 : goto done;
1541 : }
1542 :
1543 3834180 : done:
1544 3834180 : Py_XDECREF(tmp);
1545 3834180 : return result;
1546 : }
1547 :
1548 : int
1549 25667 : _PyFloat_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1550 : PyObject *obj,
1551 : PyObject *format_spec,
1552 : Py_ssize_t start, Py_ssize_t end)
1553 : {
1554 : InternalFormatSpec format;
1555 :
1556 : /* check for the special case of zero length format spec, make
1557 : it equivalent to str(obj) */
1558 25667 : if (start == end)
1559 16213 : return format_obj(obj, writer);
1560 :
1561 : /* parse the format_spec */
1562 9454 : if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1563 : &format, '\0', '>'))
1564 6 : return -1;
1565 :
1566 : /* type conversion? */
1567 9448 : switch (format.type) {
1568 9349 : case '\0': /* No format code: like 'g', but with at least one decimal. */
1569 : case 'e':
1570 : case 'E':
1571 : case 'f':
1572 : case 'F':
1573 : case 'g':
1574 : case 'G':
1575 : case 'n':
1576 : case '%':
1577 : /* no conversion, already a float. do the formatting */
1578 9349 : return format_float_internal(obj, &format, writer);
1579 :
1580 99 : default:
1581 : /* unknown */
1582 99 : unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1583 99 : return -1;
1584 : }
1585 : }
1586 :
1587 : int
1588 118 : _PyComplex_FormatAdvancedWriter(_PyUnicodeWriter *writer,
1589 : PyObject *obj,
1590 : PyObject *format_spec,
1591 : Py_ssize_t start, Py_ssize_t end)
1592 : {
1593 : InternalFormatSpec format;
1594 :
1595 : /* check for the special case of zero length format spec, make
1596 : it equivalent to str(obj) */
1597 118 : if (start == end)
1598 11 : return format_obj(obj, writer);
1599 :
1600 : /* parse the format_spec */
1601 107 : if (!parse_internal_render_format_spec(obj, format_spec, start, end,
1602 : &format, '\0', '>'))
1603 4 : return -1;
1604 :
1605 : /* type conversion? */
1606 103 : switch (format.type) {
1607 97 : case '\0': /* No format code: like 'g', but with at least one decimal. */
1608 : case 'e':
1609 : case 'E':
1610 : case 'f':
1611 : case 'F':
1612 : case 'g':
1613 : case 'G':
1614 : case 'n':
1615 : /* no conversion, already a complex. do the formatting */
1616 97 : return format_complex_internal(obj, &format, writer);
1617 :
1618 6 : default:
1619 : /* unknown */
1620 6 : unknown_presentation_type(format.type, Py_TYPE(obj)->tp_name);
1621 6 : return -1;
1622 : }
1623 : }
|