Line data Source code
1 : /* stringlib: bytes joining implementation */ 2 : 3 : #if STRINGLIB_IS_UNICODE 4 : #error join.h only compatible with byte-wise strings 5 : #endif 6 : 7 : Py_LOCAL_INLINE(PyObject *) 8 73480 : STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) 9 : { 10 73480 : const char *sepstr = STRINGLIB_STR(sep); 11 73480 : Py_ssize_t seplen = STRINGLIB_LEN(sep); 12 73480 : PyObject *res = NULL; 13 : char *p; 14 73480 : Py_ssize_t seqlen = 0; 15 73480 : Py_ssize_t sz = 0; 16 : Py_ssize_t i, nbufs; 17 : PyObject *seq, *item; 18 73480 : Py_buffer *buffers = NULL; 19 : #define NB_STATIC_BUFFERS 10 20 : Py_buffer static_buffers[NB_STATIC_BUFFERS]; 21 : #define GIL_THRESHOLD 1048576 22 73480 : int drop_gil = 1; 23 73480 : PyThreadState *save = NULL; 24 : 25 73480 : seq = PySequence_Fast(iterable, "can only join an iterable"); 26 73480 : if (seq == NULL) { 27 2 : return NULL; 28 : } 29 : 30 73478 : seqlen = PySequence_Fast_GET_SIZE(seq); 31 73478 : if (seqlen == 0) { 32 3806 : Py_DECREF(seq); 33 3806 : return STRINGLIB_NEW(NULL, 0); 34 : } 35 : #if !STRINGLIB_MUTABLE 36 69626 : if (seqlen == 1) { 37 31941 : item = PySequence_Fast_GET_ITEM(seq, 0); 38 31941 : if (STRINGLIB_CHECK_EXACT(item)) { 39 31939 : Py_INCREF(item); 40 31939 : Py_DECREF(seq); 41 31939 : return item; 42 : } 43 : } 44 : #endif 45 37733 : if (seqlen > NB_STATIC_BUFFERS) { 46 2365 : buffers = PyMem_NEW(Py_buffer, seqlen); 47 2365 : if (buffers == NULL) { 48 0 : Py_DECREF(seq); 49 0 : PyErr_NoMemory(); 50 0 : return NULL; 51 : } 52 : } 53 : else { 54 35368 : buffers = static_buffers; 55 : } 56 : 57 : /* Here is the general case. Do a pre-pass to figure out the total 58 : * amount of space we'll need (sz), and see whether all arguments are 59 : * bytes-like. 60 : */ 61 1339212 : for (i = 0, nbufs = 0; i < seqlen; i++) { 62 : Py_ssize_t itemlen; 63 1301488 : item = PySequence_Fast_GET_ITEM(seq, i); 64 1301488 : if (PyBytes_CheckExact(item)) { 65 : /* Fast path. */ 66 1301373 : Py_INCREF(item); 67 1301373 : buffers[i].obj = item; 68 1301373 : buffers[i].buf = PyBytes_AS_STRING(item); 69 1301373 : buffers[i].len = PyBytes_GET_SIZE(item); 70 : } 71 : else { 72 110 : if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) { 73 5 : PyErr_Format(PyExc_TypeError, 74 : "sequence item %zd: expected a bytes-like object, " 75 : "%.80s found", 76 5 : i, Py_TYPE(item)->tp_name); 77 5 : goto error; 78 : } 79 : /* If the backing objects are mutable, then dropping the GIL 80 : * opens up race conditions where another thread tries to modify 81 : * the object which we hold a buffer on it. Such code has data 82 : * races anyway, but this is a conservative approach that avoids 83 : * changing the behaviour of that data race. 84 : */ 85 105 : drop_gil = 0; 86 : } 87 1301486 : nbufs = i + 1; /* for error cleanup */ 88 1301486 : itemlen = buffers[i].len; 89 1301486 : if (itemlen > PY_SSIZE_T_MAX - sz) { 90 0 : PyErr_SetString(PyExc_OverflowError, 91 : "join() result is too long"); 92 0 : goto error; 93 : } 94 1301486 : sz += itemlen; 95 1301486 : if (i != 0) { 96 1263750 : if (seplen > PY_SSIZE_T_MAX - sz) { 97 0 : PyErr_SetString(PyExc_OverflowError, 98 : "join() result is too long"); 99 0 : goto error; 100 : } 101 1263750 : sz += seplen; 102 : } 103 1301486 : if (seqlen != PySequence_Fast_GET_SIZE(seq)) { 104 0 : PyErr_SetString(PyExc_RuntimeError, 105 : "sequence changed size during iteration"); 106 0 : goto error; 107 : } 108 : } 109 : 110 : /* Allocate result space. */ 111 37728 : res = STRINGLIB_NEW(NULL, sz); 112 37728 : if (res == NULL) 113 0 : goto error; 114 : 115 : /* Catenate everything. */ 116 37728 : p = STRINGLIB_STR(res); 117 37728 : if (sz < GIL_THRESHOLD) { 118 37710 : drop_gil = 0; /* Benefits are likely outweighed by the overheads */ 119 : } 120 37728 : if (drop_gil) { 121 18 : save = PyEval_SaveThread(); 122 : } 123 37728 : if (!seplen) { 124 : /* fast path */ 125 1129925 : for (i = 0; i < nbufs; i++) { 126 1092882 : Py_ssize_t n = buffers[i].len; 127 1092882 : char *q = buffers[i].buf; 128 1092882 : memcpy(p, q, n); 129 1092882 : p += n; 130 : } 131 : } 132 : else { 133 209282 : for (i = 0; i < nbufs; i++) { 134 : Py_ssize_t n; 135 : char *q; 136 208596 : if (i) { 137 207910 : memcpy(p, sepstr, seplen); 138 207910 : p += seplen; 139 : } 140 208596 : n = buffers[i].len; 141 208596 : q = buffers[i].buf; 142 208596 : memcpy(p, q, n); 143 208596 : p += n; 144 : } 145 : } 146 37728 : if (drop_gil) { 147 18 : PyEval_RestoreThread(save); 148 : } 149 37728 : goto done; 150 : 151 5 : error: 152 5 : res = NULL; 153 37733 : done: 154 37733 : Py_DECREF(seq); 155 1339212 : for (i = 0; i < nbufs; i++) 156 1301486 : PyBuffer_Release(&buffers[i]); 157 37733 : if (buffers != static_buffers) 158 2365 : PyMem_Free(buffers); 159 37733 : return res; 160 : } 161 : 162 : #undef NB_STATIC_BUFFERS 163 : #undef GIL_THRESHOLD