Line data Source code
1 : /*
2 : unicode_format.h -- implementation of str.format().
3 : */
4 :
5 : #include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter()
6 :
7 : /************************************************************************/
8 : /*********** Global data structures and forward declarations *********/
9 : /************************************************************************/
10 :
11 : /*
12 : A SubString consists of the characters between two string or
13 : unicode pointers.
14 : */
15 : typedef struct {
16 : PyObject *str; /* borrowed reference */
17 : Py_ssize_t start, end;
18 : } SubString;
19 :
20 :
21 : typedef enum {
22 : ANS_INIT,
23 : ANS_AUTO,
24 : ANS_MANUAL
25 : } AutoNumberState; /* Keep track if we're auto-numbering fields */
26 :
27 : /* Keeps track of our auto-numbering state, and which number field we're on */
28 : typedef struct {
29 : AutoNumberState an_state;
30 : int an_field_number;
31 : } AutoNumber;
32 :
33 :
34 : /* forward declaration for recursion */
35 : static PyObject *
36 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
37 : int recursion_depth, AutoNumber *auto_number);
38 :
39 :
40 :
41 : /************************************************************************/
42 : /************************** Utility functions ************************/
43 : /************************************************************************/
44 :
45 : static void
46 495261 : AutoNumber_Init(AutoNumber *auto_number)
47 : {
48 495261 : auto_number->an_state = ANS_INIT;
49 495261 : auto_number->an_field_number = 0;
50 495261 : }
51 :
52 : /* fill in a SubString from a pointer and length */
53 : Py_LOCAL_INLINE(void)
54 9602700 : SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
55 : {
56 9602700 : str->str = s;
57 9602700 : str->start = start;
58 9602700 : str->end = end;
59 9602700 : }
60 :
61 : /* return a new string. if str->str is NULL, return None */
62 : Py_LOCAL_INLINE(PyObject *)
63 27402 : SubString_new_object(SubString *str)
64 : {
65 27402 : if (str->str == NULL)
66 298 : Py_RETURN_NONE;
67 27104 : return PyUnicode_Substring(str->str, str->start, str->end);
68 : }
69 :
70 : /* return a new string. if str->str is NULL, return a new empty string */
71 : Py_LOCAL_INLINE(PyObject *)
72 290 : SubString_new_object_or_empty(SubString *str)
73 : {
74 290 : if (str->str == NULL) {
75 261 : return PyUnicode_New(0, 0);
76 : }
77 29 : return SubString_new_object(str);
78 : }
79 :
80 : /* Return 1 if an error has been detected switching between automatic
81 : field numbering and manual field specification, else return 0. Set
82 : ValueError on error. */
83 : static int
84 1027530 : autonumber_state_error(AutoNumberState state, int field_name_is_empty)
85 : {
86 1027530 : if (state == ANS_MANUAL) {
87 152428 : if (field_name_is_empty) {
88 2 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
89 : "manual field specification to "
90 : "automatic field numbering");
91 2 : return 1;
92 : }
93 : }
94 : else {
95 875099 : if (!field_name_is_empty) {
96 2 : PyErr_SetString(PyExc_ValueError, "cannot switch from "
97 : "automatic field numbering to "
98 : "manual field specification");
99 2 : return 1;
100 : }
101 : }
102 1027520 : return 0;
103 : }
104 :
105 :
106 : /************************************************************************/
107 : /*********** Format string parsing -- integers and identifiers *********/
108 : /************************************************************************/
109 :
110 : static Py_ssize_t
111 1050350 : get_integer(const SubString *str)
112 : {
113 1050350 : Py_ssize_t accumulator = 0;
114 : Py_ssize_t digitval;
115 : Py_ssize_t i;
116 :
117 : /* empty string is an error */
118 1050350 : if (str->start >= str->end)
119 875099 : return -1;
120 :
121 328281 : for (i = str->start; i < str->end; i++) {
122 175311 : digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
123 175311 : if (digitval < 0)
124 22281 : return -1;
125 : /*
126 : Detect possible overflow before it happens:
127 :
128 : accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
129 : accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
130 : */
131 153030 : if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
132 3 : PyErr_Format(PyExc_ValueError,
133 : "Too many decimal digits in format string");
134 3 : return -1;
135 : }
136 153027 : accumulator = accumulator * 10 + digitval;
137 : }
138 152970 : return accumulator;
139 : }
140 :
141 : /************************************************************************/
142 : /******** Functions to get field objects and specification strings ******/
143 : /************************************************************************/
144 :
145 : /* do the equivalent of obj.name */
146 : static PyObject *
147 4060 : getattr(PyObject *obj, SubString *name)
148 : {
149 : PyObject *newobj;
150 4060 : PyObject *str = SubString_new_object(name);
151 4060 : if (str == NULL)
152 0 : return NULL;
153 4060 : newobj = PyObject_GetAttr(obj, str);
154 4060 : Py_DECREF(str);
155 4060 : return newobj;
156 : }
157 :
158 : /* do the equivalent of obj[idx], where obj is a sequence */
159 : static PyObject *
160 495 : getitem_sequence(PyObject *obj, Py_ssize_t idx)
161 : {
162 495 : return PySequence_GetItem(obj, idx);
163 : }
164 :
165 : /* do the equivalent of obj[idx], where obj is not a sequence */
166 : static PyObject *
167 1 : getitem_idx(PyObject *obj, Py_ssize_t idx)
168 : {
169 : PyObject *newobj;
170 1 : PyObject *idx_obj = PyLong_FromSsize_t(idx);
171 1 : if (idx_obj == NULL)
172 0 : return NULL;
173 1 : newobj = PyObject_GetItem(obj, idx_obj);
174 1 : Py_DECREF(idx_obj);
175 1 : return newobj;
176 : }
177 :
178 : /* do the equivalent of obj[name] */
179 : static PyObject *
180 37 : getitem_str(PyObject *obj, SubString *name)
181 : {
182 : PyObject *newobj;
183 37 : PyObject *str = SubString_new_object(name);
184 37 : if (str == NULL)
185 0 : return NULL;
186 37 : newobj = PyObject_GetItem(obj, str);
187 37 : Py_DECREF(str);
188 37 : return newobj;
189 : }
190 :
191 : typedef struct {
192 : /* the entire string we're parsing. we assume that someone else
193 : is managing its lifetime, and that it will exist for the
194 : lifetime of the iterator. can be empty */
195 : SubString str;
196 :
197 : /* index to where we are inside field_name */
198 : Py_ssize_t index;
199 : } FieldNameIterator;
200 :
201 :
202 : static int
203 1049810 : FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
204 : Py_ssize_t start, Py_ssize_t end)
205 : {
206 1049810 : SubString_init(&self->str, s, start, end);
207 1049810 : self->index = start;
208 1049810 : return 1;
209 : }
210 :
211 : static int
212 4068 : _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
213 : {
214 : Py_UCS4 c;
215 :
216 4068 : name->str = self->str.str;
217 4068 : name->start = self->index;
218 :
219 : /* return everything until '.' or '[' */
220 24408 : while (self->index < self->str.end) {
221 20348 : c = PyUnicode_READ_CHAR(self->str.str, self->index++);
222 20348 : switch (c) {
223 8 : case '[':
224 : case '.':
225 : /* backup so that we this character will be seen next time */
226 8 : self->index--;
227 8 : break;
228 20340 : default:
229 20340 : continue;
230 : }
231 8 : break;
232 : }
233 : /* end of string is okay */
234 4068 : name->end = self->index;
235 4068 : return 1;
236 : }
237 :
238 : static int
239 541 : _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
240 : {
241 541 : int bracket_seen = 0;
242 : Py_UCS4 c;
243 :
244 541 : name->str = self->str.str;
245 541 : name->start = self->index;
246 :
247 : /* return everything until ']' */
248 1302 : while (self->index < self->str.end) {
249 1302 : c = PyUnicode_READ_CHAR(self->str.str, self->index++);
250 1302 : switch (c) {
251 541 : case ']':
252 541 : bracket_seen = 1;
253 541 : break;
254 761 : default:
255 761 : continue;
256 : }
257 541 : break;
258 : }
259 : /* make sure we ended with a ']' */
260 541 : if (!bracket_seen) {
261 0 : PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
262 0 : return 0;
263 : }
264 :
265 : /* end of string is okay */
266 : /* don't include the ']' */
267 541 : name->end = self->index-1;
268 541 : return 1;
269 : }
270 :
271 : /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
272 : static int
273 1054380 : FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
274 : Py_ssize_t *name_idx, SubString *name)
275 : {
276 : /* check at end of input */
277 1054380 : if (self->index >= self->str.end)
278 1049770 : return 1;
279 :
280 4611 : switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
281 4068 : case '.':
282 4068 : *is_attribute = 1;
283 4068 : if (_FieldNameIterator_attr(self, name) == 0)
284 0 : return 0;
285 4068 : *name_idx = -1;
286 4068 : break;
287 541 : case '[':
288 541 : *is_attribute = 0;
289 541 : if (_FieldNameIterator_item(self, name) == 0)
290 0 : return 0;
291 541 : *name_idx = get_integer(name);
292 541 : if (*name_idx == -1 && PyErr_Occurred())
293 1 : return 0;
294 540 : break;
295 2 : default:
296 : /* Invalid character follows ']' */
297 2 : PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
298 : "follow ']' in format field specifier");
299 2 : return 0;
300 : }
301 :
302 : /* empty string is an error */
303 4608 : if (name->start == name->end) {
304 3 : PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
305 3 : return 0;
306 : }
307 :
308 4605 : return 2;
309 : }
310 :
311 :
312 : /* input: field_name
313 : output: 'first' points to the part before the first '[' or '.'
314 : 'first_idx' is -1 if 'first' is not an integer, otherwise
315 : it's the value of first converted to an integer
316 : 'rest' is an iterator to return the rest
317 : */
318 : static int
319 1049810 : field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
320 : Py_ssize_t *first_idx, FieldNameIterator *rest,
321 : AutoNumber *auto_number)
322 : {
323 : Py_UCS4 c;
324 1049810 : Py_ssize_t i = start;
325 : int field_name_is_empty;
326 : int using_numeric_index;
327 :
328 : /* find the part up until the first '.' or '[' */
329 1363210 : while (i < end) {
330 317985 : switch (c = PyUnicode_READ_CHAR(str, i++)) {
331 4588 : case '[':
332 : case '.':
333 : /* backup so that we this character is available to the
334 : "rest" iterator */
335 4588 : i--;
336 4588 : break;
337 313397 : default:
338 313397 : continue;
339 : }
340 4588 : break;
341 : }
342 :
343 : /* set up the return values */
344 1049810 : SubString_init(first, str, start, i);
345 1049810 : FieldNameIterator_init(rest, str, i, end);
346 :
347 : /* see if "first" is an integer, in which case it's used as an index */
348 1049810 : *first_idx = get_integer(first);
349 1049810 : if (*first_idx == -1 && PyErr_Occurred())
350 2 : return 0;
351 :
352 1049810 : field_name_is_empty = first->start >= first->end;
353 :
354 : /* If the field name is omitted or if we have a numeric index
355 : specified, then we're doing numeric indexing into args. */
356 1049810 : using_numeric_index = field_name_is_empty || *first_idx != -1;
357 :
358 : /* We always get here exactly one time for each field we're
359 : processing. And we get here in field order (counting by left
360 : braces). So this is the perfect place to handle automatic field
361 : numbering if the field name is omitted. */
362 :
363 : /* Check if we need to do the auto-numbering. It's not needed if
364 : we're called from string.Format routines, because it's handled
365 : in that class by itself. */
366 1049810 : if (auto_number) {
367 : /* Initialize our auto numbering state if this is the first
368 : time we're either auto-numbering or manually numbering. */
369 1049570 : if (auto_number->an_state == ANS_INIT && using_numeric_index)
370 483605 : auto_number->an_state = field_name_is_empty ?
371 483605 : ANS_AUTO : ANS_MANUAL;
372 :
373 : /* Make sure our state is consistent with what we're doing
374 : this time through. Only check if we're using a numeric
375 : index. */
376 1049570 : if (using_numeric_index)
377 1027530 : if (autonumber_state_error(auto_number->an_state,
378 : field_name_is_empty))
379 4 : return 0;
380 : /* Zero length field means we want to do auto-numbering of the
381 : fields. */
382 1049570 : if (field_name_is_empty)
383 875097 : *first_idx = (auto_number->an_field_number)++;
384 : }
385 :
386 1049810 : return 1;
387 : }
388 :
389 :
390 : /*
391 : get_field_object returns the object inside {}, before the
392 : format_spec. It handles getindex and getattr lookups and consumes
393 : the entire input string.
394 : */
395 : static PyObject *
396 1049570 : get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
397 : AutoNumber *auto_number)
398 : {
399 1049570 : PyObject *obj = NULL;
400 : int ok;
401 : int is_attribute;
402 : SubString name;
403 : SubString first;
404 : Py_ssize_t index;
405 : FieldNameIterator rest;
406 :
407 1049570 : if (!field_name_split(input->str, input->start, input->end, &first,
408 : &index, &rest, auto_number)) {
409 6 : goto error;
410 : }
411 :
412 1049570 : if (index == -1) {
413 : /* look up in kwargs */
414 22043 : PyObject *key = SubString_new_object(&first);
415 22043 : if (key == NULL) {
416 0 : goto error;
417 : }
418 22043 : if (kwargs == NULL) {
419 0 : PyErr_SetObject(PyExc_KeyError, key);
420 0 : Py_DECREF(key);
421 0 : goto error;
422 : }
423 : /* Use PyObject_GetItem instead of PyDict_GetItem because this
424 : code is no longer just used with kwargs. It might be passed
425 : a non-dict when called through format_map. */
426 22043 : obj = PyObject_GetItem(kwargs, key);
427 22043 : Py_DECREF(key);
428 22043 : if (obj == NULL) {
429 10 : goto error;
430 : }
431 : }
432 : else {
433 : /* If args is NULL, we have a format string with a positional field
434 : with only kwargs to retrieve it from. This can only happen when
435 : used with format_map(), where positional arguments are not
436 : allowed. */
437 1027520 : if (args == NULL) {
438 3 : PyErr_SetString(PyExc_ValueError, "Format string contains "
439 : "positional fields");
440 3 : goto error;
441 : }
442 :
443 : /* look up in args */
444 1027520 : obj = PySequence_GetItem(args, index);
445 1027520 : if (obj == NULL) {
446 6 : PyErr_Format(PyExc_IndexError,
447 : "Replacement index %zd out of range for positional "
448 : "args tuple",
449 : index);
450 6 : goto error;
451 : }
452 : }
453 :
454 : /* iterate over the rest of the field_name */
455 1054140 : while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
456 : &name)) == 2) {
457 : PyObject *tmp;
458 :
459 4593 : if (is_attribute)
460 : /* getattr lookup "." */
461 4060 : tmp = getattr(obj, &name);
462 : else
463 : /* getitem lookup "[]" */
464 533 : if (index == -1)
465 37 : tmp = getitem_str(obj, &name);
466 : else
467 496 : if (PySequence_Check(obj))
468 495 : tmp = getitem_sequence(obj, index);
469 : else
470 : /* not a sequence */
471 1 : tmp = getitem_idx(obj, index);
472 4593 : if (tmp == NULL)
473 1 : goto error;
474 :
475 : /* assign to obj */
476 4592 : Py_DECREF(obj);
477 4592 : obj = tmp;
478 : }
479 : /* end of iterator, this is the non-error case */
480 1049550 : if (ok == 1)
481 1049540 : return obj;
482 6 : error:
483 32 : Py_XDECREF(obj);
484 32 : return NULL;
485 : }
486 :
487 : /************************************************************************/
488 : /***************** Field rendering functions **************************/
489 : /************************************************************************/
490 :
491 : /*
492 : render_field() is the main function in this section. It takes the
493 : field object and field specification string generated by
494 : get_field_and_spec, and renders the field into the output string.
495 :
496 : render_field calls fieldobj.__format__(format_spec) method, and
497 : appends to the output.
498 : */
499 : static int
500 1048240 : render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
501 : {
502 1048240 : int ok = 0;
503 1048240 : PyObject *result = NULL;
504 1048240 : PyObject *format_spec_object = NULL;
505 1048240 : int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
506 : int err;
507 :
508 : /* If we know the type exactly, skip the lookup of __format__ and just
509 : call the formatter directly. */
510 1048240 : if (PyUnicode_CheckExact(fieldobj))
511 551640 : formatter = _PyUnicode_FormatAdvancedWriter;
512 496604 : else if (PyLong_CheckExact(fieldobj))
513 479743 : formatter = _PyLong_FormatAdvancedWriter;
514 16861 : else if (PyFloat_CheckExact(fieldobj))
515 16011 : formatter = _PyFloat_FormatAdvancedWriter;
516 850 : else if (PyComplex_CheckExact(fieldobj))
517 6 : formatter = _PyComplex_FormatAdvancedWriter;
518 :
519 1048240 : if (formatter) {
520 : /* we know exactly which formatter will be called when __format__ is
521 : looked up, so call it directly, instead. */
522 1047400 : err = formatter(writer, fieldobj, format_spec->str,
523 : format_spec->start, format_spec->end);
524 1047400 : return (err == 0);
525 : }
526 : else {
527 : /* We need to create an object out of the pointers we have, because
528 : __format__ takes a string/unicode object for format_spec. */
529 844 : if (format_spec->str)
530 88 : format_spec_object = PyUnicode_Substring(format_spec->str,
531 : format_spec->start,
532 : format_spec->end);
533 : else
534 756 : format_spec_object = PyUnicode_New(0, 0);
535 844 : if (format_spec_object == NULL)
536 0 : goto done;
537 :
538 844 : result = PyObject_Format(fieldobj, format_spec_object);
539 : }
540 844 : if (result == NULL)
541 5 : goto done;
542 :
543 839 : if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
544 0 : goto done;
545 839 : ok = 1;
546 :
547 844 : done:
548 844 : Py_XDECREF(format_spec_object);
549 844 : Py_XDECREF(result);
550 844 : return ok;
551 : }
552 :
553 : static int
554 1049880 : parse_field(SubString *str, SubString *field_name, SubString *format_spec,
555 : int *format_spec_needs_expanding, Py_UCS4 *conversion)
556 : {
557 : /* Note this function works if the field name is zero length,
558 : which is good. Zero length field names are handled later, in
559 : field_name_split. */
560 :
561 1049880 : Py_UCS4 c = 0;
562 :
563 : /* initialize these, as they may be empty */
564 1049880 : *conversion = '\0';
565 1049880 : SubString_init(format_spec, NULL, 0, 0);
566 :
567 : /* Search for the field name. it's terminated by the end of
568 : the string, or a ':' or '!' */
569 1049880 : field_name->str = str->str;
570 1049880 : field_name->start = str->start;
571 1389120 : while (str->start < str->end) {
572 1389110 : switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
573 2 : case '{':
574 2 : PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
575 2 : return 0;
576 552 : case '[':
577 1329 : for (; str->start < str->end; str->start++)
578 1324 : if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
579 547 : break;
580 552 : continue;
581 1049870 : case '}':
582 : case ':':
583 : case '!':
584 1049870 : break;
585 338691 : default:
586 338691 : continue;
587 : }
588 1049870 : break;
589 : }
590 :
591 1049880 : field_name->end = str->start - 1;
592 1049880 : if (c == '!' || c == ':') {
593 : Py_ssize_t count;
594 : /* we have a format specifier and/or a conversion */
595 : /* don't include the last character */
596 :
597 : /* see if there's a conversion specifier */
598 339129 : if (c == '!') {
599 : /* there must be another character present */
600 147618 : if (str->start >= str->end) {
601 0 : PyErr_SetString(PyExc_ValueError,
602 : "end of string while looking for conversion "
603 : "specifier");
604 0 : return 0;
605 : }
606 147618 : *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
607 :
608 147618 : if (str->start < str->end) {
609 147616 : c = PyUnicode_READ_CHAR(str->str, str->start++);
610 147616 : if (c == '}')
611 147593 : return 1;
612 23 : if (c != ':') {
613 2 : PyErr_SetString(PyExc_ValueError,
614 : "expected ':' after conversion specifier");
615 2 : return 0;
616 : }
617 : }
618 : }
619 191534 : format_spec->str = str->str;
620 191534 : format_spec->start = str->start;
621 191534 : count = 1;
622 728578 : while (str->start < str->end) {
623 728574 : switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
624 18283 : case '{':
625 18283 : *format_spec_needs_expanding = 1;
626 18283 : count++;
627 18283 : break;
628 209813 : case '}':
629 209813 : count--;
630 209813 : if (count == 0) {
631 191530 : format_spec->end = str->start - 1;
632 191530 : return 1;
633 : }
634 18283 : break;
635 500478 : default:
636 500478 : break;
637 : }
638 : }
639 :
640 4 : PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
641 4 : return 0;
642 : }
643 710748 : else if (c != '}') {
644 9 : PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
645 9 : return 0;
646 : }
647 :
648 710739 : return 1;
649 : }
650 :
651 : /************************************************************************/
652 : /******* Output string allocation and escape-to-markup processing ******/
653 : /************************************************************************/
654 :
655 : /* MarkupIterator breaks the string into pieces of either literal
656 : text, or things inside {} that need to be marked up. it is
657 : designed to make it easy to wrap a Python iterator around it, for
658 : use with the Formatter class */
659 :
660 : typedef struct {
661 : SubString str;
662 : } MarkupIterator;
663 :
664 : static int
665 513958 : MarkupIterator_init(MarkupIterator *self, PyObject *str,
666 : Py_ssize_t start, Py_ssize_t end)
667 : {
668 513958 : SubString_init(&self->str, str, start, end);
669 513958 : return 1;
670 : }
671 :
672 : /* returns 0 on error, 1 on non-error termination, and 2 if it got a
673 : string (or something to be expanded) */
674 : static int
675 1808580 : MarkupIterator_next(MarkupIterator *self, SubString *literal,
676 : int *field_present, SubString *field_name,
677 : SubString *format_spec, Py_UCS4 *conversion,
678 : int *format_spec_needs_expanding)
679 : {
680 : int at_end;
681 1808580 : Py_UCS4 c = 0;
682 : Py_ssize_t start;
683 : Py_ssize_t len;
684 1808580 : int markup_follows = 0;
685 :
686 : /* initialize all of the output variables */
687 1808580 : SubString_init(literal, NULL, 0, 0);
688 1808580 : SubString_init(field_name, NULL, 0, 0);
689 1808580 : SubString_init(format_spec, NULL, 0, 0);
690 1808580 : *conversion = '\0';
691 1808580 : *format_spec_needs_expanding = 0;
692 1808580 : *field_present = 0;
693 :
694 : /* No more input, end of iterator. This is the normal exit
695 : path. */
696 1808580 : if (self->str.start >= self->str.end)
697 512559 : return 1;
698 :
699 1296020 : start = self->str.start;
700 :
701 : /* First read any literal text. Read until the end of string, an
702 : escaped '{' or '}', or an unescaped '{'. In order to never
703 : allocate memory and so I can just pass pointers around, if
704 : there's an escaped '{' or '}' then we'll return the literal
705 : including the brace, but no format object. The next time
706 : through, we'll return the rest of the literal, skipping past
707 : the second consecutive brace. */
708 10181000 : while (self->str.start < self->str.end) {
709 9938720 : switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
710 1053720 : case '{':
711 : case '}':
712 1053720 : markup_follows = 1;
713 1053720 : break;
714 8884990 : default:
715 8884990 : continue;
716 : }
717 1053720 : break;
718 : }
719 :
720 1296020 : at_end = self->str.start >= self->str.end;
721 1296020 : len = self->str.start - start;
722 :
723 1297940 : if ((c == '}') && (at_end ||
724 1916 : (c != PyUnicode_READ_CHAR(self->str.str,
725 : self->str.start)))) {
726 11 : PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
727 : "in format string");
728 11 : return 0;
729 : }
730 1296010 : if (at_end && c == '{') {
731 4 : PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
732 : "in format string");
733 4 : return 0;
734 : }
735 1296010 : if (!at_end) {
736 1053710 : if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
737 : /* escaped } or {, skip it in the input. there is no
738 : markup object following us, just this literal text */
739 3831 : self->str.start++;
740 3831 : markup_follows = 0;
741 : }
742 : else
743 1049880 : len--;
744 : }
745 :
746 : /* record the literal text */
747 1296010 : literal->str = self->str.str;
748 1296010 : literal->start = start;
749 1296010 : literal->end = start + len;
750 :
751 1296010 : if (!markup_follows)
752 246127 : return 2;
753 :
754 : /* this is markup; parse the field */
755 1049880 : *field_present = 1;
756 1049880 : if (!parse_field(&self->str, field_name, format_spec,
757 : format_spec_needs_expanding, conversion))
758 17 : return 0;
759 1049860 : return 2;
760 : }
761 :
762 :
763 : /* do the !r or !s conversion on obj */
764 : static PyObject *
765 147584 : do_conversion(PyObject *obj, Py_UCS4 conversion)
766 : {
767 : /* XXX in pre-3.0, do we need to convert this to unicode, since it
768 : might have returned a string? */
769 147584 : switch (conversion) {
770 147501 : case 'r':
771 147501 : return PyObject_Repr(obj);
772 8 : case 's':
773 8 : return PyObject_Str(obj);
774 74 : case 'a':
775 74 : return PyObject_ASCII(obj);
776 1 : default:
777 1 : if (conversion > 32 && conversion < 127) {
778 : /* It's the ASCII subrange; casting to char is safe
779 : (assuming the execution character set is an ASCII
780 : superset). */
781 1 : PyErr_Format(PyExc_ValueError,
782 : "Unknown conversion specifier %c",
783 1 : (char)conversion);
784 : } else
785 0 : PyErr_Format(PyExc_ValueError,
786 : "Unknown conversion specifier \\x%x",
787 : (unsigned int)conversion);
788 1 : return NULL;
789 : }
790 : }
791 :
792 : /* given:
793 :
794 : {field_name!conversion:format_spec}
795 :
796 : compute the result and write it to output.
797 : format_spec_needs_expanding is an optimization. if it's false,
798 : just output the string directly, otherwise recursively expand the
799 : format_spec string.
800 :
801 : field_name is allowed to be zero length, in which case we
802 : are doing auto field numbering.
803 : */
804 :
805 : static int
806 1049570 : output_markup(SubString *field_name, SubString *format_spec,
807 : int format_spec_needs_expanding, Py_UCS4 conversion,
808 : _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
809 : int recursion_depth, AutoNumber *auto_number)
810 : {
811 1049570 : PyObject *tmp = NULL;
812 1049570 : PyObject *fieldobj = NULL;
813 : SubString expanded_format_spec;
814 : SubString *actual_format_spec;
815 1049570 : int result = 0;
816 :
817 : /* convert field_name to an object */
818 1049570 : fieldobj = get_field_object(field_name, args, kwargs, auto_number);
819 1049570 : if (fieldobj == NULL)
820 32 : goto done;
821 :
822 1049540 : if (conversion != '\0') {
823 147584 : tmp = do_conversion(fieldobj, conversion);
824 147584 : if (tmp == NULL || PyUnicode_READY(tmp) == -1)
825 1290 : goto done;
826 :
827 : /* do the assignment, transferring ownership: fieldobj = tmp */
828 146294 : Py_DECREF(fieldobj);
829 146294 : fieldobj = tmp;
830 146294 : tmp = NULL;
831 : }
832 :
833 : /* if needed, recursively compute the format_spec */
834 1048250 : if (format_spec_needs_expanding) {
835 18249 : tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
836 : auto_number);
837 18249 : if (tmp == NULL || PyUnicode_READY(tmp) == -1)
838 6 : goto done;
839 :
840 : /* note that in the case we're expanding the format string,
841 : tmp must be kept around until after the call to
842 : render_field. */
843 18243 : SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
844 18243 : actual_format_spec = &expanded_format_spec;
845 : }
846 : else
847 1030000 : actual_format_spec = format_spec;
848 :
849 1048240 : if (render_field(fieldobj, actual_format_spec, writer) == 0)
850 17 : goto done;
851 :
852 1048230 : result = 1;
853 :
854 1049570 : done:
855 1049570 : Py_XDECREF(fieldobj);
856 1049570 : Py_XDECREF(tmp);
857 :
858 1049570 : return result;
859 : }
860 :
861 : /*
862 : do_markup is the top-level loop for the format() method. It
863 : searches through the format string for escapes to markup codes, and
864 : calls other functions to move non-markup text to the output,
865 : and to perform the markup to the output.
866 : */
867 : static int
868 513508 : do_markup(SubString *input, PyObject *args, PyObject *kwargs,
869 : _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
870 : {
871 : MarkupIterator iter;
872 : int format_spec_needs_expanding;
873 : int result;
874 : int field_present;
875 : SubString literal;
876 : SubString field_name;
877 : SubString format_spec;
878 : Py_UCS4 conversion;
879 :
880 513508 : MarkupIterator_init(&iter, input->str, input->start, input->end);
881 1807710 : while ((result = MarkupIterator_next(&iter, &literal, &field_present,
882 : &field_name, &format_spec,
883 : &conversion,
884 : &format_spec_needs_expanding)) == 2) {
885 1295550 : if (literal.end != literal.start) {
886 1114230 : if (!field_present && iter.str.start == iter.str.end)
887 243186 : writer->overallocate = 0;
888 1114230 : if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
889 : literal.start, literal.end) < 0)
890 0 : return 0;
891 : }
892 :
893 1295550 : if (field_present) {
894 1049570 : if (iter.str.start == iter.str.end)
895 269039 : writer->overallocate = 0;
896 1049570 : if (!output_markup(&field_name, &format_spec,
897 : format_spec_needs_expanding, conversion, writer,
898 : args, kwargs, recursion_depth, auto_number))
899 1345 : return 0;
900 : }
901 : }
902 512163 : return result;
903 : }
904 :
905 :
906 : /*
907 : build_string allocates the output string and then
908 : calls do_markup to do the heavy lifting.
909 : */
910 : static PyObject *
911 513510 : build_string(SubString *input, PyObject *args, PyObject *kwargs,
912 : int recursion_depth, AutoNumber *auto_number)
913 : {
914 : _PyUnicodeWriter writer;
915 :
916 : /* check the recursion level */
917 513510 : if (recursion_depth <= 0) {
918 2 : PyErr_SetString(PyExc_ValueError,
919 : "Max string recursion exceeded");
920 2 : return NULL;
921 : }
922 :
923 513508 : _PyUnicodeWriter_Init(&writer);
924 513508 : writer.overallocate = 1;
925 513508 : writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
926 :
927 513508 : if (!do_markup(input, args, kwargs, &writer, recursion_depth,
928 : auto_number)) {
929 1372 : _PyUnicodeWriter_Dealloc(&writer);
930 1372 : return NULL;
931 : }
932 :
933 512136 : return _PyUnicodeWriter_Finish(&writer);
934 : }
935 :
936 : /************************************************************************/
937 : /*********** main routine ***********************************************/
938 : /************************************************************************/
939 :
940 : /* this is the main entry point */
941 : static PyObject *
942 495261 : do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
943 : {
944 : SubString input;
945 :
946 : /* PEP 3101 says only 2 levels, so that
947 : "{0:{1}}".format('abc', 's') # works
948 : "{0:{1:{2}}}".format('abc', 's', '') # fails
949 : */
950 495261 : int recursion_depth = 2;
951 :
952 : AutoNumber auto_number;
953 :
954 495261 : if (PyUnicode_READY(self) == -1)
955 0 : return NULL;
956 :
957 495261 : AutoNumber_Init(&auto_number);
958 495261 : SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
959 495261 : return build_string(&input, args, kwargs, recursion_depth, &auto_number);
960 : }
961 :
962 : static PyObject *
963 615 : do_string_format_map(PyObject *self, PyObject *obj)
964 : {
965 615 : return do_string_format(self, NULL, obj);
966 : }
967 :
968 :
969 : /************************************************************************/
970 : /*********** formatteriterator ******************************************/
971 : /************************************************************************/
972 :
973 : /* This is used to implement string.Formatter.vparse(). It exists so
974 : Formatter can share code with the built in unicode.format() method.
975 : It's really just a wrapper around MarkupIterator that is callable
976 : from Python. */
977 :
978 : typedef struct {
979 : PyObject_HEAD
980 : PyObject *str;
981 : MarkupIterator it_markup;
982 : } formatteriterobject;
983 :
984 : static void
985 450 : formatteriter_dealloc(formatteriterobject *it)
986 : {
987 450 : Py_XDECREF(it->str);
988 450 : PyObject_Free(it);
989 450 : }
990 :
991 : /* returns a tuple:
992 : (literal, field_name, format_spec, conversion)
993 :
994 : literal is any literal text to output. might be zero length
995 : field_name is the string before the ':'. might be None
996 : format_spec is the string after the ':'. mibht be None
997 : conversion is either None, or the string after the '!'
998 : */
999 : static PyObject *
1000 867 : formatteriter_next(formatteriterobject *it)
1001 : {
1002 : SubString literal;
1003 : SubString field_name;
1004 : SubString format_spec;
1005 : Py_UCS4 conversion;
1006 : int format_spec_needs_expanding;
1007 : int field_present;
1008 867 : int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1009 : &field_name, &format_spec, &conversion,
1010 : &format_spec_needs_expanding);
1011 :
1012 : /* all of the SubString objects point into it->str, so no
1013 : memory management needs to be done on them */
1014 867 : assert(0 <= result && result <= 2);
1015 867 : if (result == 0 || result == 1)
1016 : /* if 0, error has already been set, if 1, iterator is empty */
1017 428 : return NULL;
1018 : else {
1019 439 : PyObject *literal_str = NULL;
1020 439 : PyObject *field_name_str = NULL;
1021 439 : PyObject *format_spec_str = NULL;
1022 439 : PyObject *conversion_str = NULL;
1023 439 : PyObject *tuple = NULL;
1024 :
1025 439 : literal_str = SubString_new_object(&literal);
1026 439 : if (literal_str == NULL)
1027 0 : goto done;
1028 :
1029 439 : field_name_str = SubString_new_object(&field_name);
1030 439 : if (field_name_str == NULL)
1031 0 : goto done;
1032 :
1033 : /* if field_name is non-zero length, return a string for
1034 : format_spec (even if zero length), else return None */
1035 439 : format_spec_str = (field_present ?
1036 439 : SubString_new_object_or_empty :
1037 : SubString_new_object)(&format_spec);
1038 439 : if (format_spec_str == NULL)
1039 0 : goto done;
1040 :
1041 : /* if the conversion is not specified, return a None,
1042 : otherwise create a one length string with the conversion
1043 : character */
1044 439 : if (conversion == '\0') {
1045 410 : conversion_str = Py_None;
1046 410 : Py_INCREF(conversion_str);
1047 : }
1048 : else
1049 29 : conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1050 : &conversion, 1);
1051 439 : if (conversion_str == NULL)
1052 0 : goto done;
1053 :
1054 439 : tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1055 : conversion_str);
1056 439 : done:
1057 439 : Py_XDECREF(literal_str);
1058 439 : Py_XDECREF(field_name_str);
1059 439 : Py_XDECREF(format_spec_str);
1060 439 : Py_XDECREF(conversion_str);
1061 439 : return tuple;
1062 : }
1063 : }
1064 :
1065 : static PyMethodDef formatteriter_methods[] = {
1066 : {NULL, NULL} /* sentinel */
1067 : };
1068 :
1069 : static PyTypeObject PyFormatterIter_Type = {
1070 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1071 : "formatteriterator", /* tp_name */
1072 : sizeof(formatteriterobject), /* tp_basicsize */
1073 : 0, /* tp_itemsize */
1074 : /* methods */
1075 : (destructor)formatteriter_dealloc, /* tp_dealloc */
1076 : 0, /* tp_vectorcall_offset */
1077 : 0, /* tp_getattr */
1078 : 0, /* tp_setattr */
1079 : 0, /* tp_as_async */
1080 : 0, /* tp_repr */
1081 : 0, /* tp_as_number */
1082 : 0, /* tp_as_sequence */
1083 : 0, /* tp_as_mapping */
1084 : 0, /* tp_hash */
1085 : 0, /* tp_call */
1086 : 0, /* tp_str */
1087 : PyObject_GenericGetAttr, /* tp_getattro */
1088 : 0, /* tp_setattro */
1089 : 0, /* tp_as_buffer */
1090 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1091 : 0, /* tp_doc */
1092 : 0, /* tp_traverse */
1093 : 0, /* tp_clear */
1094 : 0, /* tp_richcompare */
1095 : 0, /* tp_weaklistoffset */
1096 : PyObject_SelfIter, /* tp_iter */
1097 : (iternextfunc)formatteriter_next, /* tp_iternext */
1098 : formatteriter_methods, /* tp_methods */
1099 : 0,
1100 : };
1101 :
1102 : /* unicode_formatter_parser is used to implement
1103 : string.Formatter.vformat. it parses a string and returns tuples
1104 : describing the parsed elements. It's a wrapper around
1105 : stringlib/string_format.h's MarkupIterator */
1106 : static PyObject *
1107 451 : formatter_parser(PyObject *ignored, PyObject *self)
1108 : {
1109 : formatteriterobject *it;
1110 :
1111 451 : if (!PyUnicode_Check(self)) {
1112 1 : PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1113 1 : return NULL;
1114 : }
1115 :
1116 450 : if (PyUnicode_READY(self) == -1)
1117 0 : return NULL;
1118 :
1119 450 : it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1120 450 : if (it == NULL)
1121 0 : return NULL;
1122 :
1123 : /* take ownership, give the object to the iterator */
1124 450 : Py_INCREF(self);
1125 450 : it->str = self;
1126 :
1127 : /* initialize the contained MarkupIterator */
1128 450 : MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
1129 450 : return (PyObject *)it;
1130 : }
1131 :
1132 :
1133 : /************************************************************************/
1134 : /*********** fieldnameiterator ******************************************/
1135 : /************************************************************************/
1136 :
1137 :
1138 : /* This is used to implement string.Formatter.vparse(). It parses the
1139 : field name into attribute and item values. It's a Python-callable
1140 : wrapper around FieldNameIterator */
1141 :
1142 : typedef struct {
1143 : PyObject_HEAD
1144 : PyObject *str;
1145 : FieldNameIterator it_field;
1146 : } fieldnameiterobject;
1147 :
1148 : static void
1149 240 : fieldnameiter_dealloc(fieldnameiterobject *it)
1150 : {
1151 240 : Py_XDECREF(it->str);
1152 240 : PyObject_Free(it);
1153 240 : }
1154 :
1155 : /* returns a tuple:
1156 : (is_attr, value)
1157 : is_attr is true if we used attribute syntax (e.g., '.foo')
1158 : false if we used index syntax (e.g., '[foo]')
1159 : value is an integer or string
1160 : */
1161 : static PyObject *
1162 246 : fieldnameiter_next(fieldnameiterobject *it)
1163 : {
1164 : int result;
1165 : int is_attr;
1166 : Py_ssize_t idx;
1167 : SubString name;
1168 :
1169 246 : result = FieldNameIterator_next(&it->it_field, &is_attr,
1170 : &idx, &name);
1171 246 : if (result == 0 || result == 1)
1172 : /* if 0, error has already been set, if 1, iterator is empty */
1173 234 : return NULL;
1174 : else {
1175 12 : PyObject* result = NULL;
1176 12 : PyObject* is_attr_obj = NULL;
1177 12 : PyObject* obj = NULL;
1178 :
1179 12 : is_attr_obj = PyBool_FromLong(is_attr);
1180 12 : if (is_attr_obj == NULL)
1181 0 : goto done;
1182 :
1183 : /* either an integer or a string */
1184 12 : if (idx != -1)
1185 4 : obj = PyLong_FromSsize_t(idx);
1186 : else
1187 8 : obj = SubString_new_object(&name);
1188 12 : if (obj == NULL)
1189 0 : goto done;
1190 :
1191 : /* return a tuple of values */
1192 12 : result = PyTuple_Pack(2, is_attr_obj, obj);
1193 :
1194 12 : done:
1195 12 : Py_XDECREF(is_attr_obj);
1196 12 : Py_XDECREF(obj);
1197 12 : return result;
1198 : }
1199 : }
1200 :
1201 : static PyMethodDef fieldnameiter_methods[] = {
1202 : {NULL, NULL} /* sentinel */
1203 : };
1204 :
1205 : static PyTypeObject PyFieldNameIter_Type = {
1206 : PyVarObject_HEAD_INIT(&PyType_Type, 0)
1207 : "fieldnameiterator", /* tp_name */
1208 : sizeof(fieldnameiterobject), /* tp_basicsize */
1209 : 0, /* tp_itemsize */
1210 : /* methods */
1211 : (destructor)fieldnameiter_dealloc, /* tp_dealloc */
1212 : 0, /* tp_vectorcall_offset */
1213 : 0, /* tp_getattr */
1214 : 0, /* tp_setattr */
1215 : 0, /* tp_as_async */
1216 : 0, /* tp_repr */
1217 : 0, /* tp_as_number */
1218 : 0, /* tp_as_sequence */
1219 : 0, /* tp_as_mapping */
1220 : 0, /* tp_hash */
1221 : 0, /* tp_call */
1222 : 0, /* tp_str */
1223 : PyObject_GenericGetAttr, /* tp_getattro */
1224 : 0, /* tp_setattro */
1225 : 0, /* tp_as_buffer */
1226 : Py_TPFLAGS_DEFAULT, /* tp_flags */
1227 : 0, /* tp_doc */
1228 : 0, /* tp_traverse */
1229 : 0, /* tp_clear */
1230 : 0, /* tp_richcompare */
1231 : 0, /* tp_weaklistoffset */
1232 : PyObject_SelfIter, /* tp_iter */
1233 : (iternextfunc)fieldnameiter_next, /* tp_iternext */
1234 : fieldnameiter_methods, /* tp_methods */
1235 : 0};
1236 :
1237 : /* unicode_formatter_field_name_split is used to implement
1238 : string.Formatter.vformat. it takes a PEP 3101 "field name", and
1239 : returns a tuple of (first, rest): "first", the part before the
1240 : first '.' or '['; and "rest", an iterator for the rest of the field
1241 : name. it's a wrapper around stringlib/string_format.h's
1242 : field_name_split. The iterator it returns is a
1243 : FieldNameIterator */
1244 : static PyObject *
1245 241 : formatter_field_name_split(PyObject *ignored, PyObject *self)
1246 : {
1247 : SubString first;
1248 : Py_ssize_t first_idx;
1249 : fieldnameiterobject *it;
1250 :
1251 241 : PyObject *first_obj = NULL;
1252 241 : PyObject *result = NULL;
1253 :
1254 241 : if (!PyUnicode_Check(self)) {
1255 1 : PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
1256 1 : return NULL;
1257 : }
1258 :
1259 240 : if (PyUnicode_READY(self) == -1)
1260 0 : return NULL;
1261 :
1262 240 : it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1263 240 : if (it == NULL)
1264 0 : return NULL;
1265 :
1266 : /* take ownership, give the object to the iterator. this is
1267 : just to keep the field_name alive */
1268 240 : Py_INCREF(self);
1269 240 : it->str = self;
1270 :
1271 : /* Pass in auto_number = NULL. We'll return an empty string for
1272 : first_obj in that case. */
1273 240 : if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
1274 : &first, &first_idx, &it->it_field, NULL))
1275 0 : goto done;
1276 :
1277 : /* first becomes an integer, if possible; else a string */
1278 240 : if (first_idx != -1)
1279 42 : first_obj = PyLong_FromSsize_t(first_idx);
1280 : else
1281 : /* convert "first" into a string object */
1282 198 : first_obj = SubString_new_object(&first);
1283 240 : if (first_obj == NULL)
1284 0 : goto done;
1285 :
1286 : /* return a tuple of values */
1287 240 : result = PyTuple_Pack(2, first_obj, it);
1288 :
1289 240 : done:
1290 240 : Py_XDECREF(it);
1291 240 : Py_XDECREF(first_obj);
1292 240 : return result;
1293 : }
|