LCOV - code coverage report
Current view: top level - Modules - binascii.c (source / functions) Hit Total Coverage
Test: CPython lcov report Lines: 420 468 89.7 %
Date: 2022-07-07 18:19:46 Functions: 20 20 100.0 %

          Line data    Source code
       1             : /*
       2             : ** Routines to represent binary data in ASCII and vice-versa
       3             : **
       4             : ** This module currently supports the following encodings:
       5             : ** uuencode:
       6             : **      each line encodes 45 bytes (except possibly the last)
       7             : **      First char encodes (binary) length, rest data
       8             : **      each char encodes 6 bits, as follows:
       9             : **      binary: 01234567 abcdefgh ijklmnop
      10             : **      ascii:  012345 67abcd efghij klmnop
      11             : **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
      12             : **      short binary data is zero-extended (so the bits are always in the
      13             : **      right place), this does *not* reflect in the length.
      14             : ** base64:
      15             : **      Line breaks are insignificant, but lines are at most 76 chars
      16             : **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
      17             : **      is done via a table.
      18             : **      Short binary data is filled (in ASCII) with '='.
      19             : ** hqx:
      20             : **      File starts with introductory text, real data starts and ends
      21             : **      with colons.
      22             : **      Data consists of three similar parts: info, datafork, resourcefork.
      23             : **      Each part is protected (at the end) with a 16-bit crc
      24             : **      The binary data is run-length encoded, and then ascii-fied:
      25             : **      binary: 01234567 abcdefgh ijklmnop
      26             : **      ascii:  012345 67abcd efghij klmnop
      27             : **      ASCII encoding is table-driven, see the code.
      28             : **      Short binary data results in the runt ascii-byte being output with
      29             : **      the bits in the right place.
      30             : **
      31             : ** While I was reading dozens of programs that encode or decode the formats
      32             : ** here (documentation? hihi:-) I have formulated Jansen's Observation:
      33             : **
      34             : **      Programs that encode binary data in ASCII are written in
      35             : **      such a style that they are as unreadable as possible. Devices used
      36             : **      include unnecessary global variables, burying important tables
      37             : **      in unrelated sourcefiles, putting functions in include files,
      38             : **      using seemingly-descriptive variable names for different purposes,
      39             : **      calls to empty subroutines and a host of others.
      40             : **
      41             : ** I have attempted to break with this tradition, but I guess that that
      42             : ** does make the performance sub-optimal. Oh well, too bad...
      43             : **
      44             : ** Jack Jansen, CWI, July 1995.
      45             : **
      46             : ** Added support for quoted-printable encoding, based on rfc 1521 et al
      47             : ** quoted-printable encoding specifies that non printable characters (anything
      48             : ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
      49             : ** of the character.  It also specifies some other behavior to enable 8bit data
      50             : ** in a mail message with little difficulty (maximum line sizes, protecting
      51             : ** some cases of whitespace, etc).
      52             : **
      53             : ** Brandon Long, September 2001.
      54             : */
      55             : 
      56             : #ifndef Py_BUILD_CORE_BUILTIN
      57             : #  define Py_BUILD_CORE_MODULE 1
      58             : #endif
      59             : 
      60             : #define PY_SSIZE_T_CLEAN
      61             : 
      62             : #include "Python.h"
      63             : #include "pycore_long.h"          // _PyLong_DigitValue
      64             : #include "pycore_strhex.h"        // _Py_strhex_bytes_with_sep()
      65             : #ifdef USE_ZLIB_CRC32
      66             : #  include "zlib.h"
      67             : #endif
      68             : 
      69             : typedef struct binascii_state {
      70             :     PyObject *Error;
      71             :     PyObject *Incomplete;
      72             : } binascii_state;
      73             : 
      74             : static inline binascii_state *
      75       24735 : get_binascii_state(PyObject *module)
      76             : {
      77       24735 :     return (binascii_state *)PyModule_GetState(module);
      78             : }
      79             : 
      80             : 
      81             : static const unsigned char table_a2b_base64[] = {
      82             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      83             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      84             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
      85             :     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
      86             :     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
      87             :     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
      88             :     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
      89             :     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
      90             : 
      91             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      92             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      93             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      94             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      95             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      96             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      97             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      98             :     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
      99             : };
     100             : 
     101             : #define BASE64_PAD '='
     102             : 
     103             : /* Max binary chunk size; limited only by available memory */
     104             : #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
     105             : 
     106             : static const unsigned char table_b2a_base64[] =
     107             : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     108             : 
     109             : 
     110             : static const unsigned short crctab_hqx[256] = {
     111             :     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
     112             :     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
     113             :     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
     114             :     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
     115             :     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
     116             :     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
     117             :     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
     118             :     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
     119             :     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
     120             :     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
     121             :     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
     122             :     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
     123             :     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
     124             :     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
     125             :     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
     126             :     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
     127             :     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
     128             :     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
     129             :     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
     130             :     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
     131             :     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
     132             :     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
     133             :     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
     134             :     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
     135             :     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
     136             :     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
     137             :     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
     138             :     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
     139             :     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
     140             :     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
     141             :     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
     142             :     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
     143             : };
     144             : 
     145             : /*[clinic input]
     146             : module binascii
     147             : [clinic start generated code]*/
     148             : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
     149             : 
     150             : /*[python input]
     151             : 
     152             : class ascii_buffer_converter(CConverter):
     153             :     type = 'Py_buffer'
     154             :     converter = 'ascii_buffer_converter'
     155             :     impl_by_reference = True
     156             :     c_default = "{NULL, NULL}"
     157             : 
     158             :     def cleanup(self):
     159             :         name = self.name
     160             :         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
     161             : 
     162             : [python start generated code]*/
     163             : /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
     164             : 
     165             : static int
     166       18662 : ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
     167             : {
     168       18662 :     if (arg == NULL) {
     169           0 :         PyBuffer_Release(buf);
     170           0 :         return 1;
     171             :     }
     172       18662 :     if (PyUnicode_Check(arg)) {
     173         386 :         if (PyUnicode_READY(arg) < 0)
     174           0 :             return 0;
     175         386 :         if (!PyUnicode_IS_ASCII(arg)) {
     176          20 :             PyErr_SetString(PyExc_ValueError,
     177             :                             "string argument should contain only ASCII characters");
     178          20 :             return 0;
     179             :         }
     180         366 :         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
     181         366 :         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
     182         366 :         buf->len = PyUnicode_GET_LENGTH(arg);
     183         366 :         buf->obj = NULL;
     184         366 :         return 1;
     185             :     }
     186       18276 :     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
     187           0 :         PyErr_Format(PyExc_TypeError,
     188             :                      "argument should be bytes, buffer or ASCII string, "
     189           0 :                      "not '%.100s'", Py_TYPE(arg)->tp_name);
     190           0 :         return 0;
     191             :     }
     192       18276 :     if (!PyBuffer_IsContiguous(buf, 'C')) {
     193           0 :         PyErr_Format(PyExc_TypeError,
     194             :                      "argument should be a contiguous buffer, "
     195           0 :                      "not '%.100s'", Py_TYPE(arg)->tp_name);
     196           0 :         PyBuffer_Release(buf);
     197           0 :         return 0;
     198             :     }
     199       18276 :     return Py_CLEANUP_SUPPORTED;
     200             : }
     201             : 
     202             : #include "clinic/binascii.c.h"
     203             : 
     204             : /*[clinic input]
     205             : binascii.a2b_uu
     206             : 
     207             :     data: ascii_buffer
     208             :     /
     209             : 
     210             : Decode a line of uuencoded data.
     211             : [clinic start generated code]*/
     212             : 
     213             : static PyObject *
     214         196 : binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
     215             : /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
     216             : {
     217             :     const unsigned char *ascii_data;
     218             :     unsigned char *bin_data;
     219         196 :     int leftbits = 0;
     220             :     unsigned char this_ch;
     221         196 :     unsigned int leftchar = 0;
     222             :     PyObject *rv;
     223             :     Py_ssize_t ascii_len, bin_len;
     224             :     binascii_state *state;
     225             : 
     226         196 :     ascii_data = data->buf;
     227         196 :     ascii_len = data->len;
     228             : 
     229         196 :     assert(ascii_len >= 0);
     230             : 
     231             :     /* First byte: binary data length (in bytes) */
     232         196 :     bin_len = (*ascii_data++ - ' ') & 077;
     233         196 :     ascii_len--;
     234             : 
     235             :     /* Allocate the buffer */
     236         196 :     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
     237           0 :         return NULL;
     238         196 :     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
     239             : 
     240        6310 :     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
     241             :         /* XXX is it really best to add NULs if there's no more data */
     242        6118 :         this_ch = (ascii_len > 0) ? *ascii_data : 0;
     243        6118 :         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
     244             :             /*
     245             :             ** Whitespace. Assume some spaces got eaten at
     246             :             ** end-of-line. (We check this later)
     247             :             */
     248         680 :             this_ch = 0;
     249             :         } else {
     250             :             /* Check the character for legality
     251             :             ** The 64 in stead of the expected 63 is because
     252             :             ** there are a few uuencodes out there that use
     253             :             ** '`' as zero instead of space.
     254             :             */
     255        5438 :             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
     256           4 :                 state = get_binascii_state(module);
     257           4 :                 if (state == NULL) {
     258           0 :                     return NULL;
     259             :                 }
     260           4 :                 PyErr_SetString(state->Error, "Illegal char");
     261           4 :                 Py_DECREF(rv);
     262           4 :                 return NULL;
     263             :             }
     264        5434 :             this_ch = (this_ch - ' ') & 077;
     265             :         }
     266             :         /*
     267             :         ** Shift it in on the low end, and see if there's
     268             :         ** a byte ready for output.
     269             :         */
     270        6114 :         leftchar = (leftchar << 6) | (this_ch);
     271        6114 :         leftbits += 6;
     272        6114 :         if ( leftbits >= 8 ) {
     273        4560 :             leftbits -= 8;
     274        4560 :             *bin_data++ = (leftchar >> leftbits) & 0xff;
     275        4560 :             leftchar &= ((1 << leftbits) - 1);
     276        4560 :             bin_len--;
     277             :         }
     278             :     }
     279             :     /*
     280             :     ** Finally, check that if there's anything left on the line
     281             :     ** that it's whitespace only.
     282             :     */
     283         385 :     while( ascii_len-- > 0 ) {
     284         201 :         this_ch = *ascii_data++;
     285             :         /* Extra '`' may be written as padding in some cases */
     286         201 :         if ( this_ch != ' ' && this_ch != ' '+64 &&
     287           8 :              this_ch != '\n' && this_ch != '\r' ) {
     288           8 :             state = get_binascii_state(module);
     289           8 :             if (state == NULL) {
     290           0 :                 return NULL;
     291             :             }
     292           8 :             PyErr_SetString(state->Error, "Trailing garbage");
     293           8 :             Py_DECREF(rv);
     294           8 :             return NULL;
     295             :         }
     296             :     }
     297         184 :     return rv;
     298             : }
     299             : 
     300             : /*[clinic input]
     301             : binascii.b2a_uu
     302             : 
     303             :     data: Py_buffer
     304             :     /
     305             :     *
     306             :     backtick: bool(accept={int}) = False
     307             : 
     308             : Uuencode line of data.
     309             : [clinic start generated code]*/
     310             : 
     311             : static PyObject *
     312         126 : binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
     313             : /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
     314             : {
     315             :     unsigned char *ascii_data;
     316             :     const unsigned char *bin_data;
     317         126 :     int leftbits = 0;
     318             :     unsigned char this_ch;
     319         126 :     unsigned int leftchar = 0;
     320             :     binascii_state *state;
     321             :     Py_ssize_t bin_len, out_len;
     322             :     _PyBytesWriter writer;
     323             : 
     324         126 :     _PyBytesWriter_Init(&writer);
     325         126 :     bin_data = data->buf;
     326         126 :     bin_len = data->len;
     327         126 :     if ( bin_len > 45 ) {
     328             :         /* The 45 is a limit that appears in all uuencode's */
     329           4 :         state = get_binascii_state(module);
     330           4 :         if (state == NULL) {
     331           0 :             return NULL;
     332             :         }
     333           4 :         PyErr_SetString(state->Error, "At most 45 bytes at once");
     334           4 :         return NULL;
     335             :     }
     336             : 
     337             :     /* We're lazy and allocate to much (fixed up later) */
     338         122 :     out_len = 2 + (bin_len + 2) / 3 * 4;
     339         122 :     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
     340         122 :     if (ascii_data == NULL)
     341           0 :         return NULL;
     342             : 
     343             :     /* Store the length */
     344         122 :     if (backtick && !bin_len)
     345           4 :         *ascii_data++ = '`';
     346             :     else
     347         118 :         *ascii_data++ = ' ' + (unsigned char)bin_len;
     348             : 
     349        3815 :     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
     350             :         /* Shift the data (or padding) into our buffer */
     351        3693 :         if ( bin_len > 0 )              /* Data */
     352        3641 :             leftchar = (leftchar << 8) | *bin_data;
     353             :         else                            /* Padding */
     354          52 :             leftchar <<= 8;
     355        3693 :         leftbits += 8;
     356             : 
     357             :         /* See if there are 6-bit groups ready */
     358        8617 :         while ( leftbits >= 6 ) {
     359        4924 :             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
     360        4924 :             leftbits -= 6;
     361        4924 :             if (backtick && !this_ch)
     362          43 :                 *ascii_data++ = '`';
     363             :             else
     364        4881 :                 *ascii_data++ = this_ch + ' ';
     365             :         }
     366             :     }
     367         122 :     *ascii_data++ = '\n';       /* Append a courtesy newline */
     368             : 
     369         122 :     return _PyBytesWriter_Finish(&writer, ascii_data);
     370             : }
     371             : 
     372             : /*[clinic input]
     373             : binascii.a2b_base64
     374             : 
     375             :     data: ascii_buffer
     376             :     /
     377             :     *
     378             :     strict_mode: bool(accept={int}) = False
     379             : 
     380             : Decode a line of base64 data.
     381             : 
     382             :   strict_mode
     383             :     When set to True, bytes that are not part of the base64 standard are not allowed.
     384             :     The same applies to excess data after padding (= / ==).
     385             : [clinic start generated code]*/
     386             : 
     387             : static PyObject *
     388        1003 : binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
     389             : /*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
     390             : {
     391        1003 :     assert(data->len >= 0);
     392             : 
     393        1003 :     const unsigned char *ascii_data = data->buf;
     394        1003 :     size_t ascii_len = data->len;
     395        1003 :     binascii_state *state = NULL;
     396        1003 :     char padding_started = 0;
     397             : 
     398             :     /* Allocate the buffer */
     399        1003 :     Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
     400             :     _PyBytesWriter writer;
     401        1003 :     _PyBytesWriter_Init(&writer);
     402        1003 :     unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
     403        1003 :     if (bin_data == NULL)
     404           0 :         return NULL;
     405        1003 :     unsigned char *bin_data_start = bin_data;
     406             : 
     407        1003 :     if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
     408          12 :         state = get_binascii_state(module);
     409          12 :         if (state) {
     410          12 :             PyErr_SetString(state->Error, "Leading padding not allowed");
     411             :         }
     412          12 :         goto error_end;
     413             :     }
     414             : 
     415         991 :     int quad_pos = 0;
     416         991 :     unsigned char leftchar = 0;
     417         991 :     int pads = 0;
     418      375872 :     for (size_t i = 0; i < ascii_len; i++) {
     419      375442 :         unsigned char this_ch = ascii_data[i];
     420             : 
     421             :         /* Check for pad sequences and ignore
     422             :         ** the invalid ones.
     423             :         */
     424      375442 :         if (this_ch == BASE64_PAD) {
     425        1032 :             padding_started = 1;
     426             : 
     427        1032 :             if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
     428             :                 /* A pad sequence means we should not parse more input.
     429             :                 ** We've already interpreted the data from the quad at this point.
     430             :                 ** in strict mode, an error should raise if there's excess data after the padding.
     431             :                 */
     432         504 :                 if (strict_mode && i + 1 < ascii_len) {
     433          24 :                     state = get_binascii_state(module);
     434          24 :                     if (state) {
     435          24 :                         PyErr_SetString(state->Error, "Excess data after padding");
     436             :                     }
     437          24 :                     goto error_end;
     438             :                 }
     439             : 
     440         480 :                 goto done;
     441             :             }
     442         528 :             continue;
     443             :         }
     444             : 
     445      374410 :         this_ch = table_a2b_base64[this_ch];
     446      374410 :         if (this_ch >= 64) {
     447       11831 :             if (strict_mode) {
     448          48 :                 state = get_binascii_state(module);
     449          48 :                 if (state) {
     450          48 :                     PyErr_SetString(state->Error, "Only base64 data is allowed");
     451             :                 }
     452          48 :                 goto error_end;
     453             :             }
     454       11783 :             continue;
     455             :         }
     456             : 
     457             :         // Characters that are not '=', in the middle of the padding, are not allowed
     458      362579 :         if (strict_mode && padding_started) {
     459           9 :             state = get_binascii_state(module);
     460           9 :             if (state) {
     461           9 :                 PyErr_SetString(state->Error, "Discontinuous padding not allowed");
     462             :             }
     463           9 :             goto error_end;
     464             :         }
     465      362570 :         pads = 0;
     466             : 
     467      362570 :         switch (quad_pos) {
     468       90921 :             case 0:
     469       90921 :                 quad_pos = 1;
     470       90921 :                 leftchar = this_ch;
     471       90921 :                 break;
     472       90867 :             case 1:
     473       90867 :                 quad_pos = 2;
     474       90867 :                 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
     475       90867 :                 leftchar = this_ch & 0x0f;
     476       90867 :                 break;
     477       90489 :             case 2:
     478       90489 :                 quad_pos = 3;
     479       90489 :                 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
     480       90489 :                 leftchar = this_ch & 0x03;
     481       90489 :                 break;
     482       90293 :             case 3:
     483       90293 :                 quad_pos = 0;
     484       90293 :                 *bin_data++ = (leftchar << 6) | (this_ch);
     485       90293 :                 leftchar = 0;
     486       90293 :                 break;
     487             :         }
     488      374881 :     }
     489             : 
     490         430 :     if (quad_pos != 0) {
     491          85 :         state = get_binascii_state(module);
     492          85 :         if (state == NULL) {
     493             :             /* error already set, from get_binascii_state */
     494          85 :         } else if (quad_pos == 1) {
     495             :             /*
     496             :             ** There is exactly one extra valid, non-padding, base64 character.
     497             :             ** This is an invalid length, as there is no possible input that
     498             :             ** could encoded into such a base64 string.
     499             :             */
     500          44 :             PyErr_Format(state->Error,
     501             :                          "Invalid base64-encoded string: "
     502             :                          "number of data characters (%zd) cannot be 1 more "
     503             :                          "than a multiple of 4",
     504          44 :                          (bin_data - bin_data_start) / 3 * 4 + 1);
     505             :         } else {
     506          41 :             PyErr_SetString(state->Error, "Incorrect padding");
     507             :         }
     508         178 :         error_end:
     509         178 :         _PyBytesWriter_Dealloc(&writer);
     510         178 :         return NULL;
     511             :     }
     512             : 
     513         345 : done:
     514         825 :     return _PyBytesWriter_Finish(&writer, bin_data);
     515             : }
     516             : 
     517             : 
     518             : /*[clinic input]
     519             : binascii.b2a_base64
     520             : 
     521             :     data: Py_buffer
     522             :     /
     523             :     *
     524             :     newline: bool(accept={int}) = True
     525             : 
     526             : Base64-code line of data.
     527             : [clinic start generated code]*/
     528             : 
     529             : static PyObject *
     530        4517 : binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
     531             : /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
     532             : {
     533             :     unsigned char *ascii_data;
     534             :     const unsigned char *bin_data;
     535        4517 :     int leftbits = 0;
     536             :     unsigned char this_ch;
     537        4517 :     unsigned int leftchar = 0;
     538             :     Py_ssize_t bin_len, out_len;
     539             :     _PyBytesWriter writer;
     540             :     binascii_state *state;
     541             : 
     542        4517 :     bin_data = data->buf;
     543        4517 :     bin_len = data->len;
     544        4517 :     _PyBytesWriter_Init(&writer);
     545             : 
     546        4517 :     assert(bin_len >= 0);
     547             : 
     548        4517 :     if ( bin_len > BASE64_MAXBIN ) {
     549           0 :         state = get_binascii_state(module);
     550           0 :         if (state == NULL) {
     551           0 :             return NULL;
     552             :         }
     553           0 :         PyErr_SetString(state->Error, "Too much data for base64 line");
     554           0 :         return NULL;
     555             :     }
     556             : 
     557             :     /* We're lazy and allocate too much (fixed up later).
     558             :        "+2" leaves room for up to two pad characters.
     559             :        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
     560        4517 :     out_len = bin_len*2 + 2;
     561        4517 :     if (newline)
     562        4207 :         out_len++;
     563        4517 :     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
     564        4517 :     if (ascii_data == NULL)
     565           0 :         return NULL;
     566             : 
     567      246676 :     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
     568             :         /* Shift the data into our buffer */
     569      242159 :         leftchar = (leftchar << 8) | *bin_data;
     570      242159 :         leftbits += 8;
     571             : 
     572             :         /* See if there are 6-bit groups ready */
     573      564865 :         while ( leftbits >= 6 ) {
     574      322706 :             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
     575      322706 :             leftbits -= 6;
     576      322706 :             *ascii_data++ = table_b2a_base64[this_ch];
     577             :         }
     578             :     }
     579        4517 :     if ( leftbits == 2 ) {
     580         184 :         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
     581         184 :         *ascii_data++ = BASE64_PAD;
     582         184 :         *ascii_data++ = BASE64_PAD;
     583        4333 :     } else if ( leftbits == 4 ) {
     584         167 :         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
     585         167 :         *ascii_data++ = BASE64_PAD;
     586             :     }
     587        4517 :     if (newline)
     588        4207 :         *ascii_data++ = '\n';       /* Append a courtesy newline */
     589             : 
     590        4517 :     return _PyBytesWriter_Finish(&writer, ascii_data);
     591             : }
     592             : 
     593             : 
     594             : /*[clinic input]
     595             : binascii.crc_hqx
     596             : 
     597             :     data: Py_buffer
     598             :     crc: unsigned_int(bitwise=True)
     599             :     /
     600             : 
     601             : Compute CRC-CCITT incrementally.
     602             : [clinic start generated code]*/
     603             : 
     604             : static PyObject *
     605          40 : binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
     606             : /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
     607             : {
     608             :     const unsigned char *bin_data;
     609             :     Py_ssize_t len;
     610             : 
     611          40 :     crc &= 0xffff;
     612          40 :     bin_data = data->buf;
     613          40 :     len = data->len;
     614             : 
     615         344 :     while(len-- > 0) {
     616         304 :         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
     617             :     }
     618             : 
     619          40 :     return PyLong_FromUnsignedLong(crc);
     620             : }
     621             : 
     622             : #ifndef USE_ZLIB_CRC32
     623             : /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
     624             :     Also known as: ISO 3307
     625             : **********************************************************************|
     626             : *                                                                    *|
     627             : * Demonstration program to compute the 32-bit CRC used as the frame  *|
     628             : * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
     629             : * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
     630             : * protocol).  The 32-bit FCS was added via the Federal Register,     *|
     631             : * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
     632             : * this polynomial is or will be included in CCITT V.41, which        *|
     633             : * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
     634             : * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
     635             : * errors by a factor of 10^-5 over 16-bit FCS.                       *|
     636             : *                                                                    *|
     637             : **********************************************************************|
     638             : 
     639             :  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
     640             :  code or tables extracted from it, as desired without restriction.
     641             : 
     642             :  First, the polynomial itself and its table of feedback terms.  The
     643             :  polynomial is
     644             :  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
     645             :  Note that we take it "backwards" and put the highest-order term in
     646             :  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
     647             :  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
     648             :  the MSB being 1.
     649             : 
     650             :  Note that the usual hardware shift register implementation, which
     651             :  is what we're using (we're merely optimizing it by doing eight-bit
     652             :  chunks at a time) shifts bits into the lowest-order term.  In our
     653             :  implementation, that means shifting towards the right.  Why do we
     654             :  do it this way?  Because the calculated CRC must be transmitted in
     655             :  order from highest-order term to lowest-order term.  UARTs transmit
     656             :  characters in order from LSB to MSB.  By storing the CRC this way,
     657             :  we hand it to the UART in the order low-byte to high-byte; the UART
     658             :  sends each low-bit to hight-bit; and the result is transmission bit
     659             :  by bit from highest- to lowest-order term without requiring any bit
     660             :  shuffling on our part.  Reception works similarly.
     661             : 
     662             :  The feedback terms table consists of 256, 32-bit entries.  Notes:
     663             : 
     664             :   1. The table can be generated at runtime if desired; code to do so
     665             :      is shown later.  It might not be obvious, but the feedback
     666             :      terms simply represent the results of eight shift/xor opera-
     667             :      tions for all combinations of data and CRC register values.
     668             : 
     669             :   2. The CRC accumulation logic is the same for all CRC polynomials,
     670             :      be they sixteen or thirty-two bits wide.  You simply choose the
     671             :      appropriate table.  Alternatively, because the table can be
     672             :      generated at runtime, you can start by generating the table for
     673             :      the polynomial in question and use exactly the same "updcrc",
     674             :      if your application needn't simultaneously handle two CRC
     675             :      polynomials.  (Note, however, that XMODEM is strange.)
     676             : 
     677             :   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
     678             :      of course, 32-bit entries work OK if the high 16 bits are zero.
     679             : 
     680             :   4. The values must be right-shifted by eight bits by the "updcrc"
     681             :      logic; the shift must be unsigned (bring in zeroes).  On some
     682             :      hardware you could probably optimize the shift in assembler by
     683             :      using byte-swap instructions.
     684             : ********************************************************************/
     685             : 
     686             : static const unsigned int crc_32_tab[256] = {
     687             : 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
     688             : 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
     689             : 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
     690             : 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
     691             : 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
     692             : 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
     693             : 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
     694             : 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
     695             : 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
     696             : 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
     697             : 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
     698             : 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
     699             : 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
     700             : 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
     701             : 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
     702             : 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
     703             : 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
     704             : 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
     705             : 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
     706             : 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
     707             : 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
     708             : 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
     709             : 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
     710             : 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
     711             : 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
     712             : 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
     713             : 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
     714             : 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
     715             : 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
     716             : 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
     717             : 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
     718             : 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
     719             : 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
     720             : 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
     721             : 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
     722             : 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
     723             : 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
     724             : 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
     725             : 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
     726             : 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
     727             : 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
     728             : 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
     729             : 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
     730             : 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
     731             : 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
     732             : 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
     733             : 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
     734             : 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
     735             : 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
     736             : 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
     737             : 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
     738             : 0x2d02ef8dU
     739             : };
     740             : 
     741             : static unsigned int
     742             : internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
     743             : { /* By Jim Ahlstrom; All rights transferred to CNRI */
     744             :     unsigned int result;
     745             : 
     746             :     crc = ~ crc;
     747             :     while (len-- > 0) {
     748             :         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
     749             :         /* Note:  (crc >> 8) MUST zero fill on left */
     750             :     }
     751             : 
     752             :     result = (crc ^ 0xFFFFFFFF);
     753             :     return result & 0xffffffff;
     754             : }
     755             : #endif  /* USE_ZLIB_CRC32 */
     756             : 
     757             : /*[clinic input]
     758             : binascii.crc32 -> unsigned_int
     759             : 
     760             :     data: Py_buffer
     761             :     crc: unsigned_int(bitwise=True) = 0
     762             :     /
     763             : 
     764             : Compute CRC-32 incrementally.
     765             : [clinic start generated code]*/
     766             : 
     767             : static unsigned int
     768          18 : binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
     769             : /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
     770             : 
     771             : #ifdef USE_ZLIB_CRC32
     772             : /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
     773             :  * modules for historical reasons. */
     774             : {
     775             :     /* Releasing the GIL for very small buffers is inefficient
     776             :        and may lower performance */
     777          18 :     if (data->len > 1024*5) {
     778           0 :         unsigned char *buf = data->buf;
     779           0 :         Py_ssize_t len = data->len;
     780             : 
     781           0 :         Py_BEGIN_ALLOW_THREADS
     782             :         /* Avoid truncation of length for very large buffers. crc32() takes
     783             :            length as an unsigned int, which may be narrower than Py_ssize_t. */
     784           0 :         while ((size_t)len > UINT_MAX) {
     785           0 :             crc = crc32(crc, buf, UINT_MAX);
     786           0 :             buf += (size_t) UINT_MAX;
     787           0 :             len -= (size_t) UINT_MAX;
     788             :         }
     789           0 :         crc = crc32(crc, buf, (unsigned int)len);
     790           0 :         Py_END_ALLOW_THREADS
     791             :     } else {
     792          18 :         crc = crc32(crc, data->buf, (unsigned int)data->len);
     793             :     }
     794          18 :     return crc & 0xffffffff;
     795             : }
     796             : #else  /* USE_ZLIB_CRC32 */
     797             : {
     798             :     const unsigned char *bin_data = data->buf;
     799             :     Py_ssize_t len = data->len;
     800             : 
     801             :     /* Releasing the GIL for very small buffers is inefficient
     802             :        and may lower performance */
     803             :     if (len > 1024*5) {
     804             :         unsigned int result;
     805             :         Py_BEGIN_ALLOW_THREADS
     806             :         result = internal_crc32(bin_data, len, crc);
     807             :         Py_END_ALLOW_THREADS
     808             :         return result;
     809             :     } else {
     810             :         return internal_crc32(bin_data, len, crc);
     811             :     }
     812             : }
     813             : #endif  /* USE_ZLIB_CRC32 */
     814             : 
     815             : /*[clinic input]
     816             : binascii.b2a_hex
     817             : 
     818             :     data: Py_buffer
     819             :     sep: object = NULL
     820             :         An optional single character or byte to separate hex bytes.
     821             :     bytes_per_sep: int = 1
     822             :         How many bytes between separators.  Positive values count from the
     823             :         right, negative values count from the left.
     824             : 
     825             : Hexadecimal representation of binary data.
     826             : 
     827             : The return value is a bytes object.  This function is also
     828             : available as "hexlify()".
     829             : 
     830             : Example:
     831             : >>> binascii.b2a_hex(b'\xb9\x01\xef')
     832             : b'b901ef'
     833             : >>> binascii.hexlify(b'\xb9\x01\xef', ':')
     834             : b'b9:01:ef'
     835             : >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
     836             : b'b9_01ef'
     837             : [clinic start generated code]*/
     838             : 
     839             : static PyObject *
     840          31 : binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
     841             :                       int bytes_per_sep)
     842             : /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
     843             : {
     844          31 :     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
     845             :                                      sep, bytes_per_sep);
     846             : }
     847             : 
     848             : /*[clinic input]
     849             : binascii.hexlify = binascii.b2a_hex
     850             : 
     851             : Hexadecimal representation of binary data.
     852             : 
     853             : The return value is a bytes object.  This function is also
     854             : available as "b2a_hex()".
     855             : [clinic start generated code]*/
     856             : 
     857             : static PyObject *
     858         236 : binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
     859             :                       int bytes_per_sep)
     860             : /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
     861             : {
     862         236 :     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
     863             :                                      sep, bytes_per_sep);
     864             : }
     865             : 
     866             : /*[clinic input]
     867             : binascii.a2b_hex
     868             : 
     869             :     hexstr: ascii_buffer
     870             :     /
     871             : 
     872             : Binary data of hexadecimal representation.
     873             : 
     874             : hexstr must contain an even number of hex digits (upper or lower case).
     875             : This function is also available as "unhexlify()".
     876             : [clinic start generated code]*/
     877             : 
     878             : static PyObject *
     879       17285 : binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
     880             : /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
     881             : {
     882             :     const char* argbuf;
     883             :     Py_ssize_t arglen;
     884             :     PyObject *retval;
     885             :     char* retbuf;
     886             :     Py_ssize_t i, j;
     887             :     binascii_state *state;
     888             : 
     889       17285 :     argbuf = hexstr->buf;
     890       17285 :     arglen = hexstr->len;
     891             : 
     892       17285 :     assert(arglen >= 0);
     893             : 
     894             :     /* XXX What should we do about strings with an odd length?  Should
     895             :      * we add an implicit leading zero, or a trailing zero?  For now,
     896             :      * raise an exception.
     897             :      */
     898       17285 :     if (arglen % 2) {
     899           6 :         state = get_binascii_state(module);
     900           6 :         if (state == NULL) {
     901           0 :             return NULL;
     902             :         }
     903           6 :         PyErr_SetString(state->Error, "Odd-length string");
     904           6 :         return NULL;
     905             :     }
     906             : 
     907       17279 :     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
     908       17279 :     if (!retval)
     909           0 :         return NULL;
     910       17279 :     retbuf = PyBytes_AS_STRING(retval);
     911             : 
     912      114141 :     for (i=j=0; i < arglen; i += 2) {
     913       96886 :         unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
     914       96886 :         unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
     915       96886 :         if (top >= 16 || bot >= 16) {
     916          24 :             state = get_binascii_state(module);
     917          24 :             if (state == NULL) {
     918           0 :                 return NULL;
     919             :             }
     920          24 :             PyErr_SetString(state->Error,
     921             :                             "Non-hexadecimal digit found");
     922          24 :             goto finally;
     923             :         }
     924       96862 :         retbuf[j++] = (top << 4) + bot;
     925             :     }
     926       17255 :     return retval;
     927             : 
     928          24 :   finally:
     929          24 :     Py_DECREF(retval);
     930          24 :     return NULL;
     931             : }
     932             : 
     933             : /*[clinic input]
     934             : binascii.unhexlify = binascii.a2b_hex
     935             : 
     936             : Binary data of hexadecimal representation.
     937             : 
     938             : hexstr must contain an even number of hex digits (upper or lower case).
     939             : [clinic start generated code]*/
     940             : 
     941             : static PyObject *
     942       17231 : binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
     943             : /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
     944             : {
     945       17231 :     return binascii_a2b_hex_impl(module, hexstr);
     946             : }
     947             : 
     948             : #define MAXLINESIZE 76
     949             : 
     950             : 
     951             : /*[clinic input]
     952             : binascii.a2b_qp
     953             : 
     954             :     data: ascii_buffer
     955             :     header: bool(accept={int}) = False
     956             : 
     957             : Decode a string of qp-encoded data.
     958             : [clinic start generated code]*/
     959             : 
     960             : static PyObject *
     961         158 : binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
     962             : /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
     963             : {
     964             :     Py_ssize_t in, out;
     965             :     char ch;
     966             :     const unsigned char *ascii_data;
     967             :     unsigned char *odata;
     968         158 :     Py_ssize_t datalen = 0;
     969             :     PyObject *rv;
     970             : 
     971         158 :     ascii_data = data->buf;
     972         158 :     datalen = data->len;
     973             : 
     974             :     /* We allocate the output same size as input, this is overkill.
     975             :      */
     976         158 :     odata = (unsigned char *) PyMem_Calloc(1, datalen);
     977         158 :     if (odata == NULL) {
     978           0 :         PyErr_NoMemory();
     979           0 :         return NULL;
     980             :     }
     981             : 
     982         158 :     in = out = 0;
     983        6538 :     while (in < datalen) {
     984        6384 :         if (ascii_data[in] == '=') {
     985        1184 :             in++;
     986        1184 :             if (in >= datalen) break;
     987             :             /* Soft line breaks */
     988        1180 :             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
     989          77 :                 if (ascii_data[in] != '\n') {
     990          40 :                     while (in < datalen && ascii_data[in] != '\n') in++;
     991             :                 }
     992          77 :                 if (in < datalen) in++;
     993             :             }
     994        1103 :             else if (ascii_data[in] == '=') {
     995             :                 /* broken case from broken python qp */
     996           5 :                 odata[out++] = '=';
     997           5 :                 in++;
     998             :             }
     999        1098 :             else if ((in + 1 < datalen) &&
    1000        1090 :                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
    1001         288 :                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
    1002         272 :                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
    1003        1086 :                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
    1004         768 :                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
    1005         758 :                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
    1006             :                 /* hexval */
    1007        1082 :                 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
    1008        1082 :                 in++;
    1009        1082 :                 ch |= _PyLong_DigitValue[ascii_data[in]];
    1010        1082 :                 in++;
    1011        1082 :                 odata[out++] = ch;
    1012             :             }
    1013             :             else {
    1014          16 :               odata[out++] = '=';
    1015             :             }
    1016             :         }
    1017        5200 :         else if (header && ascii_data[in] == '_') {
    1018           5 :             odata[out++] = ' ';
    1019           5 :             in++;
    1020             :         }
    1021             :         else {
    1022        5195 :             odata[out] = ascii_data[in];
    1023        5195 :             in++;
    1024        5195 :             out++;
    1025             :         }
    1026             :     }
    1027         158 :     rv = PyBytes_FromStringAndSize((char *)odata, out);
    1028         158 :     PyMem_Free(odata);
    1029         158 :     return rv;
    1030             : }
    1031             : 
    1032             : static int
    1033         810 : to_hex (unsigned char ch, unsigned char *s)
    1034             : {
    1035         810 :     unsigned int uvalue = ch;
    1036             : 
    1037         810 :     s[1] = "0123456789ABCDEF"[uvalue % 16];
    1038         810 :     uvalue = (uvalue / 16);
    1039         810 :     s[0] = "0123456789ABCDEF"[uvalue % 16];
    1040         810 :     return 0;
    1041             : }
    1042             : 
    1043             : /* XXX: This is ridiculously complicated to be backward compatible
    1044             :  * (mostly) with the quopri module.  It doesn't re-create the quopri
    1045             :  * module bug where text ending in CRLF has the CR encoded */
    1046             : 
    1047             : /*[clinic input]
    1048             : binascii.b2a_qp
    1049             : 
    1050             :     data: Py_buffer
    1051             :     quotetabs: bool(accept={int}) = False
    1052             :     istext: bool(accept={int}) = True
    1053             :     header: bool(accept={int}) = False
    1054             : 
    1055             : Encode a string using quoted-printable encoding.
    1056             : 
    1057             : On encoding, when istext is set, newlines are not encoded, and white
    1058             : space at end of lines is.  When istext is not set, \r and \n (CR/LF)
    1059             : are both encoded.  When quotetabs is set, space and tabs are encoded.
    1060             : [clinic start generated code]*/
    1061             : 
    1062             : static PyObject *
    1063         234 : binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
    1064             :                      int istext, int header)
    1065             : /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
    1066             : {
    1067             :     Py_ssize_t in, out;
    1068             :     const unsigned char *databuf;
    1069             :     unsigned char *odata;
    1070         234 :     Py_ssize_t datalen = 0, odatalen = 0;
    1071             :     PyObject *rv;
    1072         234 :     unsigned int linelen = 0;
    1073             :     unsigned char ch;
    1074         234 :     int crlf = 0;
    1075             :     const unsigned char *p;
    1076             : 
    1077         234 :     databuf = data->buf;
    1078         234 :     datalen = data->len;
    1079             : 
    1080             :     /* See if this string is using CRLF line ends */
    1081             :     /* XXX: this function has the side effect of converting all of
    1082             :      * the end of lines to be the same depending on this detection
    1083             :      * here */
    1084         234 :     p = (const unsigned char *) memchr(databuf, '\n', datalen);
    1085         234 :     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
    1086          20 :         crlf = 1;
    1087             : 
    1088             :     /* First, scan to see how many characters need to be encoded */
    1089         234 :     in = 0;
    1090        4995 :     while (in < datalen) {
    1091        4761 :         Py_ssize_t delta = 0;
    1092        4761 :         if ((databuf[in] > 126) ||
    1093        4286 :             (databuf[in] == '=') ||
    1094          70 :             (header && databuf[in] == '_') ||
    1095        4163 :             ((databuf[in] == '.') && (linelen == 0) &&
    1096          20 :              (in + 1 == datalen || databuf[in+1] == '\n' ||
    1097        4151 :               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
    1098         239 :             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
    1099        4101 :             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
    1100        4076 :             ((databuf[in] < 33) &&
    1101         583 :              (databuf[in] != '\r') && (databuf[in] != '\n') &&
    1102         296 :              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
    1103             :         {
    1104         791 :             if ((linelen + 3) >= MAXLINESIZE) {
    1105          13 :                 linelen = 0;
    1106          13 :                 if (crlf)
    1107           4 :                     delta += 3;
    1108             :                 else
    1109           9 :                     delta += 2;
    1110             :             }
    1111         791 :             linelen += 3;
    1112         791 :             delta += 3;
    1113         791 :             in++;
    1114             :         }
    1115             :         else {
    1116        3970 :             if (istext &&
    1117        3775 :                 ((databuf[in] == '\n') ||
    1118        3655 :                  ((in+1 < datalen) && (databuf[in] == '\r') &&
    1119          37 :                  (databuf[in+1] == '\n'))))
    1120             :             {
    1121         148 :                 linelen = 0;
    1122             :                 /* Protect against whitespace on end of line */
    1123         148 :                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
    1124          37 :                     delta += 2;
    1125         148 :                 if (crlf)
    1126          32 :                     delta += 2;
    1127             :                 else
    1128         116 :                     delta += 1;
    1129         148 :                 if (databuf[in] == '\r')
    1130          28 :                     in += 2;
    1131             :                 else
    1132         120 :                     in++;
    1133             :             }
    1134             :             else {
    1135        3822 :                 if ((in + 1 != datalen) &&
    1136        3720 :                     (databuf[in+1] != '\n') &&
    1137        3662 :                     (linelen + 1) >= MAXLINESIZE) {
    1138          11 :                     linelen = 0;
    1139          11 :                     if (crlf)
    1140           0 :                         delta += 3;
    1141             :                     else
    1142          11 :                         delta += 2;
    1143             :                 }
    1144        3822 :                 linelen++;
    1145        3822 :                 delta++;
    1146        3822 :                 in++;
    1147             :             }
    1148             :         }
    1149        4761 :         if (PY_SSIZE_T_MAX - delta < odatalen) {
    1150           0 :             PyErr_NoMemory();
    1151           0 :             return NULL;
    1152             :         }
    1153        4761 :         odatalen += delta;
    1154             :     }
    1155             : 
    1156             :     /* We allocate the output same size as input, this is overkill.
    1157             :      */
    1158         234 :     odata = (unsigned char *) PyMem_Calloc(1, odatalen);
    1159         234 :     if (odata == NULL) {
    1160           0 :         PyErr_NoMemory();
    1161           0 :         return NULL;
    1162             :     }
    1163             : 
    1164         234 :     in = out = linelen = 0;
    1165        4995 :     while (in < datalen) {
    1166        4761 :         if ((databuf[in] > 126) ||
    1167        4286 :             (databuf[in] == '=') ||
    1168          70 :             (header && databuf[in] == '_') ||
    1169        4163 :             ((databuf[in] == '.') && (linelen == 0) &&
    1170          20 :              (in + 1 == datalen || databuf[in+1] == '\n' ||
    1171        4151 :               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
    1172         239 :             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
    1173        4101 :             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
    1174        4076 :             ((databuf[in] < 33) &&
    1175         583 :              (databuf[in] != '\r') && (databuf[in] != '\n') &&
    1176         296 :              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
    1177             :         {
    1178         791 :             if ((linelen + 3 )>= MAXLINESIZE) {
    1179          13 :                 odata[out++] = '=';
    1180          13 :                 if (crlf) odata[out++] = '\r';
    1181          13 :                 odata[out++] = '\n';
    1182          13 :                 linelen = 0;
    1183             :             }
    1184         791 :             odata[out++] = '=';
    1185         791 :             to_hex(databuf[in], &odata[out]);
    1186         791 :             out += 2;
    1187         791 :             in++;
    1188         791 :             linelen += 3;
    1189             :         }
    1190             :         else {
    1191        3970 :             if (istext &&
    1192        3775 :                 ((databuf[in] == '\n') ||
    1193        3655 :                  ((in+1 < datalen) && (databuf[in] == '\r') &&
    1194          37 :                  (databuf[in+1] == '\n'))))
    1195             :             {
    1196         148 :                 linelen = 0;
    1197             :                 /* Protect against whitespace on end of line */
    1198         148 :                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
    1199          19 :                     ch = odata[out-1];
    1200          19 :                     odata[out-1] = '=';
    1201          19 :                     to_hex(ch, &odata[out]);
    1202          19 :                     out += 2;
    1203             :                 }
    1204             : 
    1205         148 :                 if (crlf) odata[out++] = '\r';
    1206         148 :                 odata[out++] = '\n';
    1207         148 :                 if (databuf[in] == '\r')
    1208          28 :                     in += 2;
    1209             :                 else
    1210         120 :                     in++;
    1211             :             }
    1212             :             else {
    1213        3822 :                 if ((in + 1 != datalen) &&
    1214        3720 :                     (databuf[in+1] != '\n') &&
    1215        3662 :                     (linelen + 1) >= MAXLINESIZE) {
    1216          11 :                     odata[out++] = '=';
    1217          11 :                     if (crlf) odata[out++] = '\r';
    1218          11 :                     odata[out++] = '\n';
    1219          11 :                     linelen = 0;
    1220             :                 }
    1221        3822 :                 linelen++;
    1222        3822 :                 if (header && databuf[in] == ' ') {
    1223           5 :                     odata[out++] = '_';
    1224           5 :                     in++;
    1225             :                 }
    1226             :                 else {
    1227        3817 :                     odata[out++] = databuf[in++];
    1228             :                 }
    1229             :             }
    1230             :         }
    1231             :     }
    1232         234 :     rv = PyBytes_FromStringAndSize((char *)odata, out);
    1233         234 :     PyMem_Free(odata);
    1234         234 :     return rv;
    1235             : }
    1236             : 
    1237             : /* List of functions defined in the module */
    1238             : 
    1239             : static struct PyMethodDef binascii_module_methods[] = {
    1240             :     BINASCII_A2B_UU_METHODDEF
    1241             :     BINASCII_B2A_UU_METHODDEF
    1242             :     BINASCII_A2B_BASE64_METHODDEF
    1243             :     BINASCII_B2A_BASE64_METHODDEF
    1244             :     BINASCII_A2B_HEX_METHODDEF
    1245             :     BINASCII_B2A_HEX_METHODDEF
    1246             :     BINASCII_HEXLIFY_METHODDEF
    1247             :     BINASCII_UNHEXLIFY_METHODDEF
    1248             :     BINASCII_CRC_HQX_METHODDEF
    1249             :     BINASCII_CRC32_METHODDEF
    1250             :     BINASCII_A2B_QP_METHODDEF
    1251             :     BINASCII_B2A_QP_METHODDEF
    1252             :     {NULL, NULL}                             /* sentinel */
    1253             : };
    1254             : 
    1255             : 
    1256             : /* Initialization function for the module (*must* be called PyInit_binascii) */
    1257             : PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
    1258             : 
    1259             : static int
    1260         616 : binascii_exec(PyObject *module) {
    1261             :     int result;
    1262         616 :     binascii_state *state = PyModule_GetState(module);
    1263         616 :     if (state == NULL) {
    1264           0 :         return -1;
    1265             :     }
    1266             : 
    1267         616 :     state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
    1268         616 :     if (state->Error == NULL) {
    1269           0 :         return -1;
    1270             :     }
    1271         616 :     Py_INCREF(state->Error);
    1272         616 :     result = PyModule_AddObject(module, "Error", state->Error);
    1273         616 :     if (result == -1) {
    1274           0 :         Py_DECREF(state->Error);
    1275           0 :         return -1;
    1276             :     }
    1277             : 
    1278         616 :     state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
    1279         616 :     if (state->Incomplete == NULL) {
    1280           0 :         return -1;
    1281             :     }
    1282         616 :     Py_INCREF(state->Incomplete);
    1283         616 :     result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
    1284         616 :     if (result == -1) {
    1285           0 :         Py_DECREF(state->Incomplete);
    1286           0 :         return -1;
    1287             :     }
    1288             : 
    1289         616 :     return 0;
    1290             : }
    1291             : 
    1292             : static PyModuleDef_Slot binascii_slots[] = {
    1293             :     {Py_mod_exec, binascii_exec},
    1294             :     {0, NULL}
    1295             : };
    1296             : 
    1297             : static int
    1298       23702 : binascii_traverse(PyObject *module, visitproc visit, void *arg)
    1299             : {
    1300       23702 :     binascii_state *state = get_binascii_state(module);
    1301       23702 :     Py_VISIT(state->Error);
    1302       23702 :     Py_VISIT(state->Incomplete);
    1303       23702 :     return 0;
    1304             : }
    1305             : 
    1306             : static int
    1307         809 : binascii_clear(PyObject *module)
    1308             : {
    1309         809 :     binascii_state *state = get_binascii_state(module);
    1310         809 :     Py_CLEAR(state->Error);
    1311         809 :     Py_CLEAR(state->Incomplete);
    1312         809 :     return 0;
    1313             : }
    1314             : 
    1315             : static void
    1316         616 : binascii_free(void *module)
    1317             : {
    1318         616 :     binascii_clear((PyObject *)module);
    1319         616 : }
    1320             : 
    1321             : static struct PyModuleDef binasciimodule = {
    1322             :     PyModuleDef_HEAD_INIT,
    1323             :     "binascii",
    1324             :     doc_binascii,
    1325             :     sizeof(binascii_state),
    1326             :     binascii_module_methods,
    1327             :     binascii_slots,
    1328             :     binascii_traverse,
    1329             :     binascii_clear,
    1330             :     binascii_free
    1331             : };
    1332             : 
    1333             : PyMODINIT_FUNC
    1334         616 : PyInit_binascii(void)
    1335             : {
    1336         616 :     return PyModuleDef_Init(&binasciimodule);
    1337             : }

Generated by: LCOV version 1.14