LCOV - CPython lcov report - Modules/mathmodule.c

LCOV - code coverage report

Current view:	top level - Modules - mathmodule.c (source / functions)		Hit	Total	Coverage
Test:	CPython lcov report	Lines:	1147	1253	91.5 %
Date:	2022-07-07 18:19:46	Functions:	89	89	100.0 %

          Line data    Source code

       1             : /* Math module -- standard C math library functions, pi and e */
       2             : 
       3             : /* Here are some comments from Tim Peters, extracted from the
       4             :    discussion attached to http://bugs.python.org/issue1640.  They
       5             :    describe the general aims of the math module with respect to
       6             :    special values, IEEE-754 floating-point exceptions, and Python
       7             :    exceptions.
       8             : 
       9             : These are the "spirit of 754" rules:
      10             : 
      11             : 1. If the mathematical result is a real number, but of magnitude too
      12             : large to approximate by a machine float, overflow is signaled and the
      13             : result is an infinity (with the appropriate sign).
      14             : 
      15             : 2. If the mathematical result is a real number, but of magnitude too
      16             : small to approximate by a machine float, underflow is signaled and the
      17             : result is a zero (with the appropriate sign).
      18             : 
      19             : 3. At a singularity (a value x such that the limit of f(y) as y
      20             : approaches x exists and is an infinity), "divide by zero" is signaled
      21             : and the result is an infinity (with the appropriate sign).  This is
      22             : complicated a little by that the left-side and right-side limits may
      23             : not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
      24             : from the positive or negative directions.  In that specific case, the
      25             : sign of the zero determines the result of 1/0.
      26             : 
      27             : 4. At a point where a function has no defined result in the extended
      28             : reals (i.e., the reals plus an infinity or two), invalid operation is
      29             : signaled and a NaN is returned.
      30             : 
      31             : And these are what Python has historically /tried/ to do (but not
      32             : always successfully, as platform libm behavior varies a lot):
      33             : 
      34             : For #1, raise OverflowError.
      35             : 
      36             : For #2, return a zero (with the appropriate sign if that happens by
      37             : accident ;-)).
      38             : 
      39             : For #3 and #4, raise ValueError.  It may have made sense to raise
      40             : Python's ZeroDivisionError in #3, but historically that's only been
      41             : raised for division by zero and mod by zero.
      42             : 
      43             : */
      44             : 
      45             : /*
      46             :    In general, on an IEEE-754 platform the aim is to follow the C99
      47             :    standard, including Annex 'F', whenever possible.  Where the
      48             :    standard recommends raising the 'divide-by-zero' or 'invalid'
      49             :    floating-point exceptions, Python should raise a ValueError.  Where
      50             :    the standard recommends raising 'overflow', Python should raise an
      51             :    OverflowError.  In all other circumstances a value should be
      52             :    returned.
      53             :  */
      54             : 
      55             : #ifndef Py_BUILD_CORE_BUILTIN
      56             : #  define Py_BUILD_CORE_MODULE 1
      57             : #endif
      58             : 
      59             : #include "Python.h"
      60             : #include "pycore_bitutils.h"      // _Py_bit_length()
      61             : #include "pycore_call.h"          // _PyObject_CallNoArgs()
      62             : #include "pycore_dtoa.h"          // _Py_dg_infinity()
      63             : #include "pycore_long.h"          // _PyLong_GetZero()
      64             : #include "pycore_moduleobject.h"  // _PyModule_GetState()
      65             : #include "pycore_object.h"        // _PyObject_LookupSpecial()
      66             : #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
      67             : /* For DBL_EPSILON in _math.h */
      68             : #include <float.h>
      69             : /* For _Py_log1p with workarounds for buggy handling of zeros. */
      70             : #include "_math.h"
      71             : 
      72             : #include "clinic/mathmodule.c.h"
      73             : 
      74             : /*[clinic input]
      75             : module math
      76             : [clinic start generated code]*/
      77             : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
      78             : 
      79             : 
      80             : typedef struct {
      81             :     PyObject *str___ceil__;
      82             :     PyObject *str___floor__;
      83             :     PyObject *str___trunc__;
      84             : } math_module_state;
      85             : 
      86             : static inline math_module_state*
      87        3912 : get_math_module_state(PyObject *module)
      88             : {
      89        3912 :     void *state = _PyModule_GetState(module);
      90        3912 :     assert(state != NULL);
      91        3912 :     return (math_module_state *)state;
      92             : }
      93             : 
      94             : /*
      95             :    sin(pi*x), giving accurate results for all finite x (especially x
      96             :    integral or close to an integer).  This is here for use in the
      97             :    reflection formula for the gamma function.  It conforms to IEEE
      98             :    754-2008 for finite arguments, but not for infinities or nans.
      99             : */
     100             : 
     101             : static const double pi = 3.141592653589793238462643383279502884197;
     102             : static const double logpi = 1.144729885849400174143427351353058711647;
     103             : #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
     104             : static const double sqrtpi = 1.772453850905516027298167483341145182798;
     105             : #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
     106             : 
     107             : 
     108             : /* Version of PyFloat_AsDouble() with in-line fast paths
     109             :    for exact floats and integers.  Gives a substantial
     110             :    speed improvement for extracting float arguments.
     111             : */
     112             : 
     113             : #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
     114             :     if (PyFloat_CheckExact(obj)) {                         \
     115             :         target_var = PyFloat_AS_DOUBLE(obj);               \
     116             :     }                                                      \
     117             :     else if (PyLong_CheckExact(obj)) {                     \
     118             :         target_var = PyLong_AsDouble(obj);                 \
     119             :         if (target_var == -1.0 && PyErr_Occurred()) {      \
     120             :             goto error_label;                              \
     121             :         }                                                  \
     122             :     }                                                      \
     123             :     else {                                                 \
     124             :         target_var = PyFloat_AsDouble(obj);                \
     125             :         if (target_var == -1.0 && PyErr_Occurred()) {      \
     126             :             goto error_label;                              \
     127             :         }                                                  \
     128             :     }
     129             : 
     130             : static double
     131          55 : m_sinpi(double x)
     132             : {
     133             :     double y, r;
     134             :     int n;
     135             :     /* this function should only ever be called for finite arguments */
     136          55 :     assert(Py_IS_FINITE(x));
     137          55 :     y = fmod(fabs(x), 2.0);
     138          55 :     n = (int)round(2.0*y);
     139          55 :     assert(0 <= n && n <= 4);
     140          55 :     switch (n) {
     141          12 :     case 0:
     142          12 :         r = sin(pi*y);
     143          12 :         break;
     144          20 :     case 1:
     145          20 :         r = cos(pi*(y-0.5));
     146          20 :         break;
     147           6 :     case 2:
     148             :         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
     149             :            -0.0 instead of 0.0 when y == 1.0. */
     150           6 :         r = sin(pi*(1.0-y));
     151           6 :         break;
     152          13 :     case 3:
     153          13 :         r = -cos(pi*(y-1.5));
     154          13 :         break;
     155           4 :     case 4:
     156           4 :         r = sin(pi*(y-2.0));
     157           4 :         break;
     158           0 :     default:
     159           0 :         Py_UNREACHABLE();
     160             :     }
     161          55 :     return copysign(1.0, x)*r;
     162             : }
     163             : 
     164             : /* Implementation of the real gamma function.  In extensive but non-exhaustive
     165             :    random tests, this function proved accurate to within <= 10 ulps across the
     166             :    entire float domain.  Note that accuracy may depend on the quality of the
     167             :    system math functions, the pow function in particular.  Special cases
     168             :    follow C99 annex F.  The parameters and method are tailored to platforms
     169             :    whose double format is the IEEE 754 binary64 format.
     170             : 
     171             :    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
     172             :    and g=6.024680040776729583740234375; these parameters are amongst those
     173             :    used by the Boost library.  Following Boost (again), we re-express the
     174             :    Lanczos sum as a rational function, and compute it that way.  The
     175             :    coefficients below were computed independently using MPFR, and have been
     176             :    double-checked against the coefficients in the Boost source code.
     177             : 
     178             :    For x < 0.0 we use the reflection formula.
     179             : 
     180             :    There's one minor tweak that deserves explanation: Lanczos' formula for
     181             :    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
     182             :    values, x+g-0.5 can be represented exactly.  However, in cases where it
     183             :    can't be represented exactly the small error in x+g-0.5 can be magnified
     184             :    significantly by the pow and exp calls, especially for large x.  A cheap
     185             :    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
     186             :    involved in the computation of x+g-0.5 (that is, e = computed value of
     187             :    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
     188             : 
     189             :    Correction factor
     190             :    -----------------
     191             :    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
     192             :    double, and e is tiny.  Then:
     193             : 
     194             :      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
     195             :      = pow(y, x-0.5)/exp(y) * C,
     196             : 
     197             :    where the correction_factor C is given by
     198             : 
     199             :      C = pow(1-e/y, x-0.5) * exp(e)
     200             : 
     201             :    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
     202             : 
     203             :      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
     204             : 
     205             :    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
     206             : 
     207             :      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
     208             : 
     209             :    Note that for accuracy, when computing r*C it's better to do
     210             : 
     211             :      r + e*g/y*r;
     212             : 
     213             :    than
     214             : 
     215             :      r * (1 + e*g/y);
     216             : 
     217             :    since the addition in the latter throws away most of the bits of
     218             :    information in e*g/y.
     219             : */
     220             : 
     221             : #define LANCZOS_N 13
     222             : static const double lanczos_g = 6.024680040776729583740234375;
     223             : static const double lanczos_g_minus_half = 5.524680040776729583740234375;
     224             : static const double lanczos_num_coeffs[LANCZOS_N] = {
     225             :     23531376880.410759688572007674451636754734846804940,
     226             :     42919803642.649098768957899047001988850926355848959,
     227             :     35711959237.355668049440185451547166705960488635843,
     228             :     17921034426.037209699919755754458931112671403265390,
     229             :     6039542586.3520280050642916443072979210699388420708,
     230             :     1439720407.3117216736632230727949123939715485786772,
     231             :     248874557.86205415651146038641322942321632125127801,
     232             :     31426415.585400194380614231628318205362874684987640,
     233             :     2876370.6289353724412254090516208496135991145378768,
     234             :     186056.26539522349504029498971604569928220784236328,
     235             :     8071.6720023658162106380029022722506138218516325024,
     236             :     210.82427775157934587250973392071336271166969580291,
     237             :     2.5066282746310002701649081771338373386264310793408
     238             : };
     239             : 
     240             : /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
     241             : static const double lanczos_den_coeffs[LANCZOS_N] = {
     242             :     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
     243             :     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
     244             : 
     245             : /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
     246             : #define NGAMMA_INTEGRAL 23
     247             : static const double gamma_integral[NGAMMA_INTEGRAL] = {
     248             :     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
     249             :     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
     250             :     1307674368000.0, 20922789888000.0, 355687428096000.0,
     251             :     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
     252             :     51090942171709440000.0, 1124000727777607680000.0,
     253             : };
     254             : 
     255             : /* Lanczos' sum L_g(x), for positive x */
     256             : 
     257             : static double
     258          89 : lanczos_sum(double x)
     259             : {
     260          89 :     double num = 0.0, den = 0.0;
     261             :     int i;
     262          89 :     assert(x > 0.0);
     263             :     /* evaluate the rational function lanczos_sum(x).  For large
     264             :        x, the obvious algorithm risks overflow, so we instead
     265             :        rescale the denominator and numerator of the rational
     266             :        function by x**(1-LANCZOS_N) and treat this as a
     267             :        rational function in 1/x.  This also reduces the error for
     268             :        larger x values.  The choice of cutoff point (5.0 below) is
     269             :        somewhat arbitrary; in tests, smaller cutoff values than
     270             :        this resulted in lower accuracy. */
     271          89 :     if (x < 5.0) {
     272         686 :         for (i = LANCZOS_N; --i >= 0; ) {
     273         637 :             num = num * x + lanczos_num_coeffs[i];
     274         637 :             den = den * x + lanczos_den_coeffs[i];
     275             :         }
     276             :     }
     277             :     else {
     278         560 :         for (i = 0; i < LANCZOS_N; i++) {
     279         520 :             num = num / x + lanczos_num_coeffs[i];
     280         520 :             den = den / x + lanczos_den_coeffs[i];
     281             :         }
     282             :     }
     283          89 :     return num/den;
     284             : }
     285             : 
     286             : /* Constant for +infinity, generated in the same way as float('inf'). */
     287             : 
     288             : static double
     289        1325 : m_inf(void)
     290             : {
     291             : #if _PY_SHORT_FLOAT_REPR == 1
     292        1325 :     return _Py_dg_infinity(0);
     293             : #else
     294             :     return Py_HUGE_VAL;
     295             : #endif
     296             : }
     297             : 
     298             : /* Constant nan value, generated in the same way as float('nan'). */
     299             : /* We don't currently assume that Py_NAN is defined everywhere. */
     300             : 
     301             : #if _PY_SHORT_FLOAT_REPR == 1
     302             : 
     303             : static double
     304        1308 : m_nan(void)
     305             : {
     306             : #if _PY_SHORT_FLOAT_REPR == 1
     307        1308 :     return _Py_dg_stdnan(0);
     308             : #else
     309             :     return Py_NAN;
     310             : #endif
     311             : }
     312             : 
     313             : #endif
     314             : 
     315             : static double
     316          76 : m_tgamma(double x)
     317             : {
     318             :     double absx, r, y, z, sqrtpow;
     319             : 
     320             :     /* special cases */
     321          76 :     if (!Py_IS_FINITE(x)) {
     322           3 :         if (Py_IS_NAN(x) || x > 0.0)
     323           2 :             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
     324             :         else {
     325           1 :             errno = EDOM;
     326           1 :             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
     327             :         }
     328             :     }
     329          73 :     if (x == 0.0) {
     330           2 :         errno = EDOM;
     331             :         /* tgamma(+-0.0) = +-inf, divide-by-zero */
     332           2 :         return copysign(Py_HUGE_VAL, x);
     333             :     }
     334             : 
     335             :     /* integer arguments */
     336          71 :     if (x == floor(x)) {
     337          15 :         if (x < 0.0) {
     338           4 :             errno = EDOM;  /* tgamma(n) = nan, invalid for */
     339           4 :             return Py_NAN; /* negative integers n */
     340             :         }
     341          11 :         if (x <= NGAMMA_INTEGRAL)
     342           6 :             return gamma_integral[(int)x - 1];
     343             :     }
     344          61 :     absx = fabs(x);
     345             : 
     346             :     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
     347          61 :     if (absx < 1e-20) {
     348          16 :         r = 1.0/x;
     349          16 :         if (Py_IS_INFINITY(r))
     350           8 :             errno = ERANGE;
     351          16 :         return r;
     352             :     }
     353             : 
     354             :     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
     355             :        x > 200, and underflows to +-0.0 for x < -200, not a negative
     356             :        integer. */
     357          45 :     if (absx > 200.0) {
     358           7 :         if (x < 0.0) {
     359           5 :             return 0.0/m_sinpi(x);
     360             :         }
     361             :         else {
     362           2 :             errno = ERANGE;
     363           2 :             return Py_HUGE_VAL;
     364             :         }
     365             :     }
     366             : 
     367          38 :     y = absx + lanczos_g_minus_half;
     368             :     /* compute error in sum */
     369          38 :     if (absx > lanczos_g_minus_half) {
     370             :         /* note: the correction can be foiled by an optimizing
     371             :            compiler that (incorrectly) thinks that an expression like
     372             :            a + b - a - b can be optimized to 0.0.  This shouldn't
     373             :            happen in a standards-conforming compiler. */
     374          17 :         double q = y - absx;
     375          17 :         z = q - lanczos_g_minus_half;
     376             :     }
     377             :     else {
     378          21 :         double q = y - lanczos_g_minus_half;
     379          21 :         z = q - absx;
     380             :     }
     381          38 :     z = z * lanczos_g / y;
     382          38 :     if (x < 0.0) {
     383          24 :         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
     384          24 :         r -= z * r;
     385          24 :         if (absx < 140.0) {
     386          17 :             r /= pow(y, absx - 0.5);
     387             :         }
     388             :         else {
     389           7 :             sqrtpow = pow(y, absx / 2.0 - 0.25);
     390           7 :             r /= sqrtpow;
     391           7 :             r /= sqrtpow;
     392             :         }
     393             :     }
     394             :     else {
     395          14 :         r = lanczos_sum(absx) / exp(y);
     396          14 :         r += z * r;
     397          14 :         if (absx < 140.0) {
     398           9 :             r *= pow(y, absx - 0.5);
     399             :         }
     400             :         else {
     401           5 :             sqrtpow = pow(y, absx / 2.0 - 0.25);
     402           5 :             r *= sqrtpow;
     403           5 :             r *= sqrtpow;
     404             :         }
     405             :     }
     406          38 :     if (Py_IS_INFINITY(r))
     407           2 :         errno = ERANGE;
     408          38 :     return r;
     409             : }
     410             : 
     411             : /*
     412             :    lgamma:  natural log of the absolute value of the Gamma function.
     413             :    For large arguments, Lanczos' formula works extremely well here.
     414             : */
     415             : 
     416             : static double
     417          79 : m_lgamma(double x)
     418             : {
     419             :     double r;
     420             :     double absx;
     421             : 
     422             :     /* special cases */
     423          79 :     if (!Py_IS_FINITE(x)) {
     424           3 :         if (Py_IS_NAN(x))
     425           1 :             return x;  /* lgamma(nan) = nan */
     426             :         else
     427           2 :             return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
     428             :     }
     429             : 
     430             :     /* integer arguments */
     431          76 :     if (x == floor(x) && x <= 2.0) {
     432           9 :         if (x <= 0.0) {
     433           7 :             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
     434           7 :             return Py_HUGE_VAL; /* integers n <= 0 */
     435             :         }
     436             :         else {
     437           2 :             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
     438             :         }
     439             :     }
     440             : 
     441          67 :     absx = fabs(x);
     442             :     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
     443          67 :     if (absx < 1e-20)
     444          16 :         return -log(absx);
     445             : 
     446             :     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
     447             :        having a second set of numerator coefficients for lanczos_sum that
     448             :        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
     449             :        subtraction below; it's probably not worth it. */
     450          51 :     r = log(lanczos_sum(absx)) - lanczos_g;
     451          51 :     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
     452          51 :     if (x < 0.0)
     453             :         /* Use reflection formula to get value for negative x. */
     454          26 :         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
     455          51 :     if (Py_IS_INFINITY(r))
     456           2 :         errno = ERANGE;
     457          51 :     return r;
     458             : }
     459             : 
     460             : #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
     461             : 
     462             : /*
     463             :    Implementations of the error function erf(x) and the complementary error
     464             :    function erfc(x).
     465             : 
     466             :    Method: we use a series approximation for erf for small x, and a continued
     467             :    fraction approximation for erfc(x) for larger x;
     468             :    combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x),
     469             :    this gives us erf(x) and erfc(x) for all x.
     470             : 
     471             :    The series expansion used is:
     472             : 
     473             :       erf(x) = x*exp(-x*x)/sqrt(pi) * [
     474             :                      2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...]
     475             : 
     476             :    The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k).
     477             :    This series converges well for smallish x, but slowly for larger x.
     478             : 
     479             :    The continued fraction expansion used is:
     480             : 
     481             :       erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - )
     482             :                               3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...]
     483             : 
     484             :    after the first term, the general term has the form:
     485             : 
     486             :       k*(k-0.5)/(2*k+0.5 + x**2 - ...).
     487             : 
     488             :    This expansion converges fast for larger x, but convergence becomes
     489             :    infinitely slow as x approaches 0.0.  The (somewhat naive) continued
     490             :    fraction evaluation algorithm used below also risks overflow for large x;
     491             :    but for large x, erfc(x) == 0.0 to within machine precision.  (For
     492             :    example, erfc(30.0) is approximately 2.56e-393).
     493             : 
     494             :    Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and
     495             :    continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) <
     496             :    ERFC_CONTFRAC_CUTOFF.  ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the
     497             :    numbers of terms to use for the relevant expansions.  */
     498             : 
     499             : #define ERF_SERIES_CUTOFF 1.5
     500             : #define ERF_SERIES_TERMS 25
     501             : #define ERFC_CONTFRAC_CUTOFF 30.0
     502             : #define ERFC_CONTFRAC_TERMS 50
     503             : 
     504             : /*
     505             :    Error function, via power series.
     506             : 
     507             :    Given a finite float x, return an approximation to erf(x).
     508             :    Converges reasonably fast for small x.
     509             : */
     510             : 
     511             : static double
     512             : m_erf_series(double x)
     513             : {
     514             :     double x2, acc, fk, result;
     515             :     int i, saved_errno;
     516             : 
     517             :     x2 = x * x;
     518             :     acc = 0.0;
     519             :     fk = (double)ERF_SERIES_TERMS + 0.5;
     520             :     for (i = 0; i < ERF_SERIES_TERMS; i++) {
     521             :         acc = 2.0 + x2 * acc / fk;
     522             :         fk -= 1.0;
     523             :     }
     524             :     /* Make sure the exp call doesn't affect errno;
     525             :        see m_erfc_contfrac for more. */
     526             :     saved_errno = errno;
     527             :     result = acc * x * exp(-x2) / sqrtpi;
     528             :     errno = saved_errno;
     529             :     return result;
     530             : }
     531             : 
     532             : /*
     533             :    Complementary error function, via continued fraction expansion.
     534             : 
     535             :    Given a positive float x, return an approximation to erfc(x).  Converges
     536             :    reasonably fast for x large (say, x > 2.0), and should be safe from
     537             :    overflow if x and nterms are not too large.  On an IEEE 754 machine, with x
     538             :    <= 30.0, we're safe up to nterms = 100.  For x >= 30.0, erfc(x) is smaller
     539             :    than the smallest representable nonzero float.  */
     540             : 
     541             : static double
     542             : m_erfc_contfrac(double x)
     543             : {
     544             :     double x2, a, da, p, p_last, q, q_last, b, result;
     545             :     int i, saved_errno;
     546             : 
     547             :     if (x >= ERFC_CONTFRAC_CUTOFF)
     548             :         return 0.0;
     549             : 
     550             :     x2 = x*x;
     551             :     a = 0.0;
     552             :     da = 0.5;
     553             :     p = 1.0; p_last = 0.0;
     554             :     q = da + x2; q_last = 1.0;
     555             :     for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) {
     556             :         double temp;
     557             :         a += da;
     558             :         da += 2.0;
     559             :         b = da + x2;
     560             :         temp = p; p = b*p - a*p_last; p_last = temp;
     561             :         temp = q; q = b*q - a*q_last; q_last = temp;
     562             :     }
     563             :     /* Issue #8986: On some platforms, exp sets errno on underflow to zero;
     564             :        save the current errno value so that we can restore it later. */
     565             :     saved_errno = errno;
     566             :     result = p / q * x * exp(-x2) / sqrtpi;
     567             :     errno = saved_errno;
     568             :     return result;
     569             : }
     570             : 
     571             : #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
     572             : 
     573             : /* Error function erf(x), for general x */
     574             : 
     575             : static double
     576     2098540 : m_erf(double x)
     577             : {
     578             : #ifdef HAVE_ERF
     579     2098540 :     return erf(x);
     580             : #else
     581             :     double absx, cf;
     582             : 
     583             :     if (Py_IS_NAN(x))
     584             :         return x;
     585             :     absx = fabs(x);
     586             :     if (absx < ERF_SERIES_CUTOFF)
     587             :         return m_erf_series(x);
     588             :     else {
     589             :         cf = m_erfc_contfrac(absx);
     590             :         return x > 0.0 ? 1.0 - cf : cf - 1.0;
     591             :     }
     592             : #endif
     593             : }
     594             : 
     595             : /* Complementary error function erfc(x), for general x. */
     596             : 
     597             : static double
     598          44 : m_erfc(double x)
     599             : {
     600             : #ifdef HAVE_ERFC
     601          44 :     return erfc(x);
     602             : #else
     603             :     double absx, cf;
     604             : 
     605             :     if (Py_IS_NAN(x))
     606             :         return x;
     607             :     absx = fabs(x);
     608             :     if (absx < ERF_SERIES_CUTOFF)
     609             :         return 1.0 - m_erf_series(x);
     610             :     else {
     611             :         cf = m_erfc_contfrac(absx);
     612             :         return x > 0.0 ? cf : 2.0 - cf;
     613             :     }
     614             : #endif
     615             : }
     616             : 
     617             : /*
     618             :    wrapper for atan2 that deals directly with special cases before
     619             :    delegating to the platform libm for the remaining cases.  This
     620             :    is necessary to get consistent behaviour across platforms.
     621             :    Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
     622             :    always follow C99.
     623             : */
     624             : 
     625             : static double
     626         114 : m_atan2(double y, double x)
     627             : {
     628         114 :     if (Py_IS_NAN(x) || Py_IS_NAN(y))
     629          13 :         return Py_NAN;
     630         101 :     if (Py_IS_INFINITY(y)) {
     631          12 :         if (Py_IS_INFINITY(x)) {
     632           4 :             if (copysign(1., x) == 1.)
     633             :                 /* atan2(+-inf, +inf) == +-pi/4 */
     634           2 :                 return copysign(0.25*Py_MATH_PI, y);
     635             :             else
     636             :                 /* atan2(+-inf, -inf) == +-pi*3/4 */
     637           2 :                 return copysign(0.75*Py_MATH_PI, y);
     638             :         }
     639             :         /* atan2(+-inf, x) == +-pi/2 for finite x */
     640           8 :         return copysign(0.5*Py_MATH_PI, y);
     641             :     }
     642          89 :     if (Py_IS_INFINITY(x) || y == 0.) {
     643          37 :         if (copysign(1., x) == 1.)
     644             :             /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
     645          14 :             return copysign(0., y);
     646             :         else
     647             :             /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
     648          23 :             return copysign(Py_MATH_PI, y);
     649             :     }
     650          52 :     return atan2(y, x);
     651             : }
     652             : 
     653             : 
     654             : /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
     655             :    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
     656             :    binary floating-point format, the result is always exact. */
     657             : 
     658             : static double
     659        9894 : m_remainder(double x, double y)
     660             : {
     661             :     /* Deal with most common case first. */
     662        9894 :     if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
     663             :         double absx, absy, c, m, r;
     664             : 
     665        9856 :         if (y == 0.0) {
     666           8 :             return Py_NAN;
     667             :         }
     668             : 
     669        9848 :         absx = fabs(x);
     670        9848 :         absy = fabs(y);
     671        9848 :         m = fmod(absx, absy);
     672             : 
     673             :         /*
     674             :            Warning: some subtlety here. What we *want* to know at this point is
     675             :            whether the remainder m is less than, equal to, or greater than half
     676             :            of absy. However, we can't do that comparison directly because we
     677             :            can't be sure that 0.5*absy is representable (the multiplication
     678             :            might incur precision loss due to underflow). So instead we compare
     679             :            m with the complement c = absy - m: m < 0.5*absy if and only if m <
     680             :            c, and so on. The catch is that absy - m might also not be
     681             :            representable, but it turns out that it doesn't matter:
     682             : 
     683             :            - if m > 0.5*absy then absy - m is exactly representable, by
     684             :              Sterbenz's lemma, so m > c
     685             :            - if m == 0.5*absy then again absy - m is exactly representable
     686             :              and m == c
     687             :            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
     688             :              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
     689             :              c, or (ii) absy is tiny, either subnormal or in the lowest normal
     690             :              binade. Then absy - m is exactly representable and again m < c.
     691             :         */
     692             : 
     693        9848 :         c = absy - m;
     694        9848 :         if (m < c) {
     695        5517 :             r = m;
     696             :         }
     697        4331 :         else if (m > c) {
     698        3699 :             r = -c;
     699             :         }
     700             :         else {
     701             :             /*
     702             :                Here absx is exactly halfway between two multiples of absy,
     703             :                and we need to choose the even multiple. x now has the form
     704             : 
     705             :                    absx = n * absy + m
     706             : 
     707             :                for some integer n (recalling that m = 0.5*absy at this point).
     708             :                If n is even we want to return m; if n is odd, we need to
     709             :                return -m.
     710             : 
     711             :                So
     712             : 
     713             :                    0.5 * (absx - m) = (n/2) * absy
     714             : 
     715             :                and now reducing modulo absy gives us:
     716             : 
     717             :                                                   | m, if n is odd
     718             :                    fmod(0.5 * (absx - m), absy) = |
     719             :                                                   | 0, if n is even
     720             : 
     721             :                Now m - 2.0 * fmod(...) gives the desired result: m
     722             :                if n is even, -m if m is odd.
     723             : 
     724             :                Note that all steps in fmod(0.5 * (absx - m), absy)
     725             :                will be computed exactly, with no rounding error
     726             :                introduced.
     727             :             */
     728         632 :             assert(m == c);
     729         632 :             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
     730             :         }
     731        9848 :         return copysign(1.0, x) * r;
     732             :     }
     733             : 
     734             :     /* Special values. */
     735          38 :     if (Py_IS_NAN(x)) {
     736           8 :         return x;
     737             :     }
     738          30 :     if (Py_IS_NAN(y)) {
     739           6 :         return y;
     740             :     }
     741          24 :     if (Py_IS_INFINITY(x)) {
     742          16 :         return Py_NAN;
     743             :     }
     744           8 :     assert(Py_IS_INFINITY(y));
     745           8 :     return x;
     746             : }
     747             : 
     748             : 
     749             : /*
     750             :     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
     751             :     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
     752             :     special values directly, passing positive non-special values through to
     753             :     the system log/log10.
     754             :  */
     755             : 
     756             : static double
     757      604892 : m_log(double x)
     758             : {
     759      604892 :     if (Py_IS_FINITE(x)) {
     760      604879 :         if (x > 0.0)
     761      604874 :             return log(x);
     762           5 :         errno = EDOM;
     763           5 :         if (x == 0.0)
     764           3 :             return -Py_HUGE_VAL; /* log(0) = -inf */
     765             :         else
     766           2 :             return Py_NAN; /* log(-ve) = nan */
     767             :     }
     768          13 :     else if (Py_IS_NAN(x))
     769           4 :         return x; /* log(nan) = nan */
     770           9 :     else if (x > 0.0)
     771           7 :         return x; /* log(inf) = inf */
     772             :     else {
     773           2 :         errno = EDOM;
     774           2 :         return Py_NAN; /* log(-inf) = nan */
     775             :     }
     776             : }
     777             : 
     778             : /*
     779             :    log2: log to base 2.
     780             : 
     781             :    Uses an algorithm that should:
     782             : 
     783             :      (a) produce exact results for powers of 2, and
     784             :      (b) give a monotonic log2 (for positive finite floats),
     785             :          assuming that the system log is monotonic.
     786             : */
     787             : 
     788             : static double
     789        2200 : m_log2(double x)
     790             : {
     791        2200 :     if (!Py_IS_FINITE(x)) {
     792           5 :         if (Py_IS_NAN(x))
     793           2 :             return x; /* log2(nan) = nan */
     794           3 :         else if (x > 0.0)
     795           1 :             return x; /* log2(+inf) = +inf */
     796             :         else {
     797           2 :             errno = EDOM;
     798           2 :             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
     799             :         }
     800             :     }
     801             : 
     802        2195 :     if (x > 0.0) {
     803             : #ifdef HAVE_LOG2
     804        2164 :         return log2(x);
     805             : #else
     806             :         double m;
     807             :         int e;
     808             :         m = frexp(x, &e);
     809             :         /* We want log2(m * 2**e) == log(m) / log(2) + e.  Care is needed when
     810             :          * x is just greater than 1.0: in that case e is 1, log(m) is negative,
     811             :          * and we get significant cancellation error from the addition of
     812             :          * log(m) / log(2) to e.  The slight rewrite of the expression below
     813             :          * avoids this problem.
     814             :          */
     815             :         if (x >= 1.0) {
     816             :             return log(2.0 * m) / log(2.0) + (e - 1);
     817             :         }
     818             :         else {
     819             :             return log(m) / log(2.0) + e;
     820             :         }
     821             : #endif
     822             :     }
     823          31 :     else if (x == 0.0) {
     824           2 :         errno = EDOM;
     825           2 :         return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
     826             :     }
     827             :     else {
     828          29 :         errno = EDOM;
     829          29 :         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
     830             :     }
     831             : }
     832             : 
     833             : static double
     834          75 : m_log10(double x)
     835             : {
     836          75 :     if (Py_IS_FINITE(x)) {
     837          71 :         if (x > 0.0)
     838          68 :             return log10(x);
     839           3 :         errno = EDOM;
     840           3 :         if (x == 0.0)
     841           2 :             return -Py_HUGE_VAL; /* log10(0) = -inf */
     842             :         else
     843           1 :             return Py_NAN; /* log10(-ve) = nan */
     844             :     }
     845           4 :     else if (Py_IS_NAN(x))
     846           1 :         return x; /* log10(nan) = nan */
     847           3 :     else if (x > 0.0)
     848           2 :         return x; /* log10(inf) = inf */
     849             :     else {
     850           1 :         errno = EDOM;
     851           1 :         return Py_NAN; /* log10(-inf) = nan */
     852             :     }
     853             : }
     854             : 
     855             : 
     856             : static PyObject *
     857      644756 : math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
     858             : {
     859             :     PyObject *res, *x;
     860             :     Py_ssize_t i;
     861             : 
     862      644756 :     if (nargs == 0) {
     863           1 :         return PyLong_FromLong(0);
     864             :     }
     865      644755 :     res = PyNumber_Index(args[0]);
     866      644755 :     if (res == NULL) {
     867           2 :         return NULL;
     868             :     }
     869      644753 :     if (nargs == 1) {
     870           2 :         Py_SETREF(res, PyNumber_Absolute(res));
     871           2 :         return res;
     872             :     }
     873             : 
     874      644751 :     PyObject *one = _PyLong_GetOne();  // borrowed ref
     875     1289500 :     for (i = 1; i < nargs; i++) {
     876      644754 :         x = _PyNumber_Index(args[i]);
     877      644754 :         if (x == NULL) {
     878           2 :             Py_DECREF(res);
     879           2 :             return NULL;
     880             :         }
     881      644752 :         if (res == one) {
     882             :             /* Fast path: just check arguments.
     883             :                It is okay to use identity comparison here. */
     884      152883 :             Py_DECREF(x);
     885      152883 :             continue;
     886             :         }
     887      491869 :         Py_SETREF(res, _PyLong_GCD(res, x));
     888      491869 :         Py_DECREF(x);
     889      491869 :         if (res == NULL) {
     890           0 :             return NULL;
     891             :         }
     892             :     }
     893      644749 :     return res;
     894             : }
     895             : 
     896             : PyDoc_STRVAR(math_gcd_doc,
     897             : "gcd($module, *integers)\n"
     898             : "--\n"
     899             : "\n"
     900             : "Greatest Common Divisor.");
     901             : 
     902             : 
     903             : static PyObject *
     904          29 : long_lcm(PyObject *a, PyObject *b)
     905             : {
     906             :     PyObject *g, *m, *f, *ab;
     907             : 
     908          29 :     if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
     909           4 :         return PyLong_FromLong(0);
     910             :     }
     911          25 :     g = _PyLong_GCD(a, b);
     912          25 :     if (g == NULL) {
     913           0 :         return NULL;
     914             :     }
     915          25 :     f = PyNumber_FloorDivide(a, g);
     916          25 :     Py_DECREF(g);
     917          25 :     if (f == NULL) {
     918           0 :         return NULL;
     919             :     }
     920          25 :     m = PyNumber_Multiply(f, b);
     921          25 :     Py_DECREF(f);
     922          25 :     if (m == NULL) {
     923           0 :         return NULL;
     924             :     }
     925          25 :     ab = PyNumber_Absolute(m);
     926          25 :     Py_DECREF(m);
     927          25 :     return ab;
     928             : }
     929             : 
     930             : 
     931             : static PyObject *
     932          37 : math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
     933             : {
     934             :     PyObject *res, *x;
     935             :     Py_ssize_t i;
     936             : 
     937          37 :     if (nargs == 0) {
     938           1 :         return PyLong_FromLong(1);
     939             :     }
     940          36 :     res = PyNumber_Index(args[0]);
     941          36 :     if (res == NULL) {
     942           2 :         return NULL;
     943             :     }
     944          34 :     if (nargs == 1) {
     945           2 :         Py_SETREF(res, PyNumber_Absolute(res));
     946           2 :         return res;
     947             :     }
     948             : 
     949          32 :     PyObject *zero = _PyLong_GetZero();  // borrowed ref
     950          65 :     for (i = 1; i < nargs; i++) {
     951          35 :         x = PyNumber_Index(args[i]);
     952          35 :         if (x == NULL) {
     953           2 :             Py_DECREF(res);
     954           2 :             return NULL;
     955             :         }
     956          33 :         if (res == zero) {
     957             :             /* Fast path: just check arguments.
     958             :                It is okay to use identity comparison here. */
     959           4 :             Py_DECREF(x);
     960           4 :             continue;
     961             :         }
     962          29 :         Py_SETREF(res, long_lcm(res, x));
     963          29 :         Py_DECREF(x);
     964          29 :         if (res == NULL) {
     965           0 :             return NULL;
     966             :         }
     967             :     }
     968          30 :     return res;
     969             : }
     970             : 
     971             : 
     972             : PyDoc_STRVAR(math_lcm_doc,
     973             : "lcm($module, *integers)\n"
     974             : "--\n"
     975             : "\n"
     976             : "Least Common Multiple.");
     977             : 
     978             : 
     979             : /* Call is_error when errno != 0, and where x is the result libm
     980             :  * returned.  is_error will usually set up an exception and return
     981             :  * true (1), but may return false (0) without setting up an exception.
     982             :  */
     983             : static int
     984       31327 : is_error(double x)
     985             : {
     986       31327 :     int result = 1;     /* presumption of guilt */
     987       31327 :     assert(errno);      /* non-zero errno is a precondition for calling */
     988       31327 :     if (errno == EDOM)
     989          54 :         PyErr_SetString(PyExc_ValueError, "math domain error");
     990             : 
     991       31273 :     else if (errno == ERANGE) {
     992             :         /* ANSI C generally requires libm functions to set ERANGE
     993             :          * on overflow, but also generally *allows* them to set
     994             :          * ERANGE on underflow too.  There's no consistency about
     995             :          * the latter across platforms.
     996             :          * Alas, C99 never requires that errno be set.
     997             :          * Here we suppress the underflow errors (libm functions
     998             :          * should return a zero on underflow, and +- HUGE_VAL on
     999             :          * overflow, so testing the result for zero suffices to
    1000             :          * distinguish the cases).
    1001             :          *
    1002             :          * On some platforms (Ubuntu/ia64) it seems that errno can be
    1003             :          * set to ERANGE for subnormal results that do *not* underflow
    1004             :          * to zero.  So to be safe, we'll ignore ERANGE whenever the
    1005             :          * function result is less than 1.5 in absolute value.
    1006             :          *
    1007             :          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
    1008             :          * are correctly detected, since the function may underflow
    1009             :          * toward -1.0 rather than 0.0.
    1010             :          */
    1011       31273 :         if (fabs(x) < 1.5)
    1012       30556 :             result = 0;
    1013             :         else
    1014         717 :             PyErr_SetString(PyExc_OverflowError,
    1015             :                             "math range error");
    1016             :     }
    1017             :     else
    1018             :         /* Unexpected math error */
    1019           0 :         PyErr_SetFromErrno(PyExc_ValueError);
    1020       31327 :     return result;
    1021             : }
    1022             : 
    1023             : /*
    1024             :    math_1 is used to wrap a libm function f that takes a double
    1025             :    argument and returns a double.
    1026             : 
    1027             :    The error reporting follows these rules, which are designed to do
    1028             :    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
    1029             :    platforms.
    1030             : 
    1031             :    - a NaN result from non-NaN inputs causes ValueError to be raised
    1032             :    - an infinite result from finite inputs causes OverflowError to be
    1033             :      raised if can_overflow is 1, or raises ValueError if can_overflow
    1034             :      is 0.
    1035             :    - if the result is finite and errno == EDOM then ValueError is
    1036             :      raised
    1037             :    - if the result is finite and nonzero and errno == ERANGE then
    1038             :      OverflowError is raised
    1039             : 
    1040             :    The last rule is used to catch overflow on platforms which follow
    1041             :    C89 but for which HUGE_VAL is not an infinity.
    1042             : 
    1043             :    For the majority of one-argument functions these rules are enough
    1044             :    to ensure that Python's functions behave as specified in 'Annex F'
    1045             :    of the C99 standard, with the 'invalid' and 'divide-by-zero'
    1046             :    floating-point exceptions mapping to Python's ValueError and the
    1047             :    'overflow' floating-point exception mapping to OverflowError.
    1048             :    math_1 only works for functions that don't have singularities *and*
    1049             :    the possibility of overflow; fortunately, that covers everything we
    1050             :    care about right now.
    1051             : */
    1052             : 
    1053             : static PyObject *
    1054     3696380 : math_1_to_whatever(PyObject *arg, double (*func) (double),
    1055             :                    PyObject *(*from_double_func) (double),
    1056             :                    int can_overflow)
    1057             : {
    1058             :     double x, r;
    1059     3696380 :     x = PyFloat_AsDouble(arg);
    1060     3696380 :     if (x == -1.0 && PyErr_Occurred())
    1061           5 :         return NULL;
    1062     3696370 :     errno = 0;
    1063     3696370 :     r = (*func)(x);
    1064     3696370 :     if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
    1065          85 :         PyErr_SetString(PyExc_ValueError,
    1066             :                         "math domain error"); /* invalid arg */
    1067          85 :         return NULL;
    1068             :     }
    1069     3696290 :     if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
    1070          22 :         if (can_overflow)
    1071           7 :             PyErr_SetString(PyExc_OverflowError,
    1072             :                             "math range error"); /* overflow */
    1073             :         else
    1074          15 :             PyErr_SetString(PyExc_ValueError,
    1075             :                             "math domain error"); /* singularity */
    1076          22 :         return NULL;
    1077             :     }
    1078     3696270 :     if (Py_IS_FINITE(r) && errno && is_error(r))
    1079             :         /* this branch unnecessary on most platforms */
    1080           0 :         return NULL;
    1081             : 
    1082     3696270 :     return (*from_double_func)(r);
    1083             : }
    1084             : 
    1085             : /* variant of math_1, to be used when the function being wrapped is known to
    1086             :    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
    1087             :    errno = ERANGE for overflow). */
    1088             : 
    1089             : static PyObject *
    1090     2098740 : math_1a(PyObject *arg, double (*func) (double))
    1091             : {
    1092             :     double x, r;
    1093     2098740 :     x = PyFloat_AsDouble(arg);
    1094     2098740 :     if (x == -1.0 && PyErr_Occurred())
    1095           0 :         return NULL;
    1096     2098740 :     errno = 0;
    1097     2098740 :     r = (*func)(x);
    1098     2098740 :     if (errno && is_error(r))
    1099          28 :         return NULL;
    1100     2098710 :     return PyFloat_FromDouble(r);
    1101             : }
    1102             : 
    1103             : /*
    1104             :    math_2 is used to wrap a libm function f that takes two double
    1105             :    arguments and returns a double.
    1106             : 
    1107             :    The error reporting follows these rules, which are designed to do
    1108             :    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
    1109             :    platforms.
    1110             : 
    1111             :    - a NaN result from non-NaN inputs causes ValueError to be raised
    1112             :    - an infinite result from finite inputs causes OverflowError to be
    1113             :      raised.
    1114             :    - if the result is finite and errno == EDOM then ValueError is
    1115             :      raised
    1116             :    - if the result is finite and nonzero and errno == ERANGE then
    1117             :      OverflowError is raised
    1118             : 
    1119             :    The last rule is used to catch overflow on platforms which follow
    1120             :    C89 but for which HUGE_VAL is not an infinity.
    1121             : 
    1122             :    For most two-argument functions (copysign, fmod, hypot, atan2)
    1123             :    these rules are enough to ensure that Python's functions behave as
    1124             :    specified in 'Annex F' of the C99 standard, with the 'invalid' and
    1125             :    'divide-by-zero' floating-point exceptions mapping to Python's
    1126             :    ValueError and the 'overflow' floating-point exception mapping to
    1127             :    OverflowError.
    1128             : */
    1129             : 
    1130             : static PyObject *
    1131     3696380 : math_1(PyObject *arg, double (*func) (double), int can_overflow)
    1132             : {
    1133     3696380 :     return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow);
    1134             : }
    1135             : 
    1136             : static PyObject *
    1137       15237 : math_2(PyObject *const *args, Py_ssize_t nargs,
    1138             :        double (*func) (double, double), const char *funcname)
    1139             : {
    1140             :     double x, y, r;
    1141       15237 :     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
    1142           2 :         return NULL;
    1143       15235 :     x = PyFloat_AsDouble(args[0]);
    1144       15235 :     if (x == -1.0 && PyErr_Occurred()) {
    1145           3 :         return NULL;
    1146             :     }
    1147       15232 :     y = PyFloat_AsDouble(args[1]);
    1148       15232 :     if (y == -1.0 && PyErr_Occurred()) {
    1149           0 :         return NULL;
    1150             :     }
    1151       15232 :     errno = 0;
    1152       15232 :     r = (*func)(x, y);
    1153       15232 :     if (Py_IS_NAN(r)) {
    1154          55 :         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
    1155          24 :             errno = EDOM;
    1156             :         else
    1157          31 :             errno = 0;
    1158             :     }
    1159       15177 :     else if (Py_IS_INFINITY(r)) {
    1160           9 :         if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
    1161           0 :             errno = ERANGE;
    1162             :         else
    1163           9 :             errno = 0;
    1164             :     }
    1165       15232 :     if (errno && is_error(r))
    1166          24 :         return NULL;
    1167             :     else
    1168       15208 :         return PyFloat_FromDouble(r);
    1169             : }
    1170             : 
    1171             : #define FUNC1(funcname, func, can_overflow, docstring)                  \
    1172             :     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
    1173             :         return math_1(args, func, can_overflow);                            \
    1174             :     }\
    1175             :     PyDoc_STRVAR(math_##funcname##_doc, docstring);
    1176             : 
    1177             : #define FUNC1A(funcname, func, docstring)                               \
    1178             :     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
    1179             :         return math_1a(args, func);                                     \
    1180             :     }\
    1181             :     PyDoc_STRVAR(math_##funcname##_doc, docstring);
    1182             : 
    1183             : #define FUNC2(funcname, func, docstring) \
    1184             :     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
    1185             :         return math_2(args, nargs, func, #funcname); \
    1186             :     }\
    1187             :     PyDoc_STRVAR(math_##funcname##_doc, docstring);
    1188             : 
    1189         973 : FUNC1(acos, acos, 0,
    1190             :       "acos($module, x, /)\n--\n\n"
    1191             :       "Return the arc cosine (measured in radians) of x.\n\n"
    1192             :       "The result is between 0 and pi.")
    1193          30 : FUNC1(acosh, acosh, 0,
    1194             :       "acosh($module, x, /)\n--\n\n"
    1195             :       "Return the inverse hyperbolic cosine of x.")
    1196          65 : FUNC1(asin, asin, 0,
    1197             :       "asin($module, x, /)\n--\n\n"
    1198             :       "Return the arc sine (measured in radians) of x.\n\n"
    1199             :       "The result is between -pi/2 and pi/2.")
    1200          28 : FUNC1(asinh, asinh, 0,
    1201             :       "asinh($module, x, /)\n--\n\n"
    1202             :       "Return the inverse hyperbolic sine of x.")
    1203          60 : FUNC1(atan, atan, 0,
    1204             :       "atan($module, x, /)\n--\n\n"
    1205             :       "Return the arc tangent (measured in radians) of x.\n\n"
    1206             :       "The result is between -pi/2 and pi/2.")
    1207         116 : FUNC2(atan2, m_atan2,
    1208             :       "atan2($module, y, x, /)\n--\n\n"
    1209             :       "Return the arc tangent (measured in radians) of y/x.\n\n"
    1210             :       "Unlike atan(y/x), the signs of both x and y are considered.")
    1211          48 : FUNC1(atanh, atanh, 0,
    1212             :       "atanh($module, x, /)\n--\n\n"
    1213             :       "Return the inverse hyperbolic tangent of x.")
    1214          13 : FUNC1(cbrt, cbrt, 0,
    1215             :       "cbrt($module, x, /)\n--\n\n"
    1216             :       "Return the cube root of x.")
    1217             : 
    1218             : /*[clinic input]
    1219             : math.ceil
    1220             : 
    1221             :     x as number: object
    1222             :     /
    1223             : 
    1224             : Return the ceiling of x as an Integral.
    1225             : 
    1226             : This is the smallest integer >= x.
    1227             : [clinic start generated code]*/
    1228             : 
    1229             : static PyObject *
    1230        5470 : math_ceil(PyObject *module, PyObject *number)
    1231             : /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
    1232             : {
    1233             : 
    1234        5470 :     if (!PyFloat_CheckExact(number)) {
    1235          40 :         math_module_state *state = get_math_module_state(module);
    1236          40 :         PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
    1237          40 :         if (method != NULL) {
    1238          36 :             PyObject *result = _PyObject_CallNoArgs(method);
    1239          36 :             Py_DECREF(method);
    1240          36 :             return result;
    1241             :         }
    1242           4 :         if (PyErr_Occurred())
    1243           1 :             return NULL;
    1244             :     }
    1245        5433 :     double x = PyFloat_AsDouble(number);
    1246        5433 :     if (x == -1.0 && PyErr_Occurred())
    1247           2 :         return NULL;
    1248             : 
    1249        5431 :     return PyLong_FromDouble(ceil(x));
    1250             : }
    1251             : 
    1252        5226 : FUNC2(copysign, copysign,
    1253             :       "copysign($module, x, y, /)\n--\n\n"
    1254             :        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
    1255             :       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
    1256             :       "returns -1.0.\n")
    1257      102835 : FUNC1(cos, cos, 0,
    1258             :       "cos($module, x, /)\n--\n\n"
    1259             :       "Return the cosine of x (measured in radians).")
    1260          63 : FUNC1(cosh, cosh, 1,
    1261             :       "cosh($module, x, /)\n--\n\n"
    1262             :       "Return the hyperbolic cosine of x.")
    1263     2098540 : FUNC1A(erf, m_erf,
    1264             :        "erf($module, x, /)\n--\n\n"
    1265             :        "Error function at x.")
    1266          44 : FUNC1A(erfc, m_erfc,
    1267             :        "erfc($module, x, /)\n--\n\n"
    1268             :        "Complementary error function at x.")
    1269      937895 : FUNC1(exp, exp, 1,
    1270             :       "exp($module, x, /)\n--\n\n"
    1271             :       "Return e raised to the power of x.")
    1272           8 : FUNC1(exp2, exp2, 1,
    1273             :       "exp2($module, x, /)\n--\n\n"
    1274             :       "Return 2 raised to the power of x.")
    1275          52 : FUNC1(expm1, expm1, 1,
    1276             :       "expm1($module, x, /)\n--\n\n"
    1277             :       "Return exp(x)-1.\n\n"
    1278             :       "This function avoids the loss of precision involved in the direct "
    1279             :       "evaluation of exp(x)-1 for small x.")
    1280     1049110 : FUNC1(fabs, fabs, 0,
    1281             :       "fabs($module, x, /)\n--\n\n"
    1282             :       "Return the absolute value of the float x.")
    1283             : 
    1284             : /*[clinic input]
    1285             : math.floor
    1286             : 
    1287             :     x as number: object
    1288             :     /
    1289             : 
    1290             : Return the floor of x as an Integral.
    1291             : 
    1292             : This is the largest integer <= x.
    1293             : [clinic start generated code]*/
    1294             : 
    1295             : static PyObject *
    1296     8183720 : math_floor(PyObject *module, PyObject *number)
    1297             : /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
    1298             : {
    1299             :     double x;
    1300             : 
    1301     8183720 :     if (PyFloat_CheckExact(number)) {
    1302     8183680 :         x = PyFloat_AS_DOUBLE(number);
    1303             :     }
    1304             :     else
    1305             :     {
    1306          39 :         math_module_state *state = get_math_module_state(module);
    1307          39 :         PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
    1308          39 :         if (method != NULL) {
    1309          35 :             PyObject *result = _PyObject_CallNoArgs(method);
    1310          35 :             Py_DECREF(method);
    1311          35 :             return result;
    1312             :         }
    1313           4 :         if (PyErr_Occurred())
    1314           1 :             return NULL;
    1315           3 :         x = PyFloat_AsDouble(number);
    1316           3 :         if (x == -1.0 && PyErr_Occurred())
    1317           2 :             return NULL;
    1318             :     }
    1319     8183680 :     return PyLong_FromDouble(floor(x));
    1320             : }
    1321             : 
    1322          76 : FUNC1A(gamma, m_tgamma,
    1323             :       "gamma($module, x, /)\n--\n\n"
    1324             :       "Gamma function at x.")
    1325          79 : FUNC1A(lgamma, m_lgamma,
    1326             :       "lgamma($module, x, /)\n--\n\n"
    1327             :       "Natural logarithm of absolute value of Gamma function at x.")
    1328          60 : FUNC1(log1p, m_log1p, 0,
    1329             :       "log1p($module, x, /)\n--\n\n"
    1330             :       "Return the natural logarithm of 1+x (base e).\n\n"
    1331             :       "The result is computed in a way which is accurate for x near zero.")
    1332        9895 : FUNC2(remainder, m_remainder,
    1333             :       "remainder($module, x, y, /)\n--\n\n"
    1334             :       "Difference between x and the closest integer multiple of y.\n\n"
    1335             :       "Return x - n*y where n*y is the closest integer multiple of y.\n"
    1336             :       "In the case where x is exactly halfway between two multiples of\n"
    1337             :       "y, the nearest even value of n is used. The result is always exact.")
    1338      101577 : FUNC1(sin, sin, 0,
    1339             :       "sin($module, x, /)\n--\n\n"
    1340             :       "Return the sine of x (measured in radians).")
    1341          64 : FUNC1(sinh, sinh, 1,
    1342             :       "sinh($module, x, /)\n--\n\n"
    1343             :       "Return the hyperbolic sine of x.")
    1344     1187440 : FUNC1(sqrt, sqrt, 0,
    1345             :       "sqrt($module, x, /)\n--\n\n"
    1346             :       "Return the square root of x.")
    1347          65 : FUNC1(tan, tan, 0,
    1348             :       "tan($module, x, /)\n--\n\n"
    1349             :       "Return the tangent of x (measured in radians).")
    1350          61 : FUNC1(tanh, tanh, 0,
    1351             :       "tanh($module, x, /)\n--\n\n"
    1352             :       "Return the hyperbolic tangent of x.")
    1353             : 
    1354             : /* Precision summation function as msum() by Raymond Hettinger in
    1355             :    <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
    1356             :    enhanced with the exact partials sum and roundoff from Mark
    1357             :    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
    1358             :    See those links for more details, proofs and other references.
    1359             : 
    1360             :    Note 1: IEEE 754R floating point semantics are assumed,
    1361             :    but the current implementation does not re-establish special
    1362             :    value semantics across iterations (i.e. handling -Inf + Inf).
    1363             : 
    1364             :    Note 2:  No provision is made for intermediate overflow handling;
    1365             :    therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while
    1366             :    sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
    1367             :    overflow of the first partial sum.
    1368             : 
    1369             :    Note 3: The intermediate values lo, yr, and hi are declared volatile so
    1370             :    aggressive compilers won't algebraically reduce lo to always be exactly 0.0.
    1371             :    Also, the volatile declaration forces the values to be stored in memory as
    1372             :    regular doubles instead of extended long precision (80-bit) values.  This
    1373             :    prevents double rounding because any addition or subtraction of two doubles
    1374             :    can be resolved exactly into double-sized hi and lo values.  As long as the
    1375             :    hi value gets forced into a double before yr and lo are computed, the extra
    1376             :    bits in downstream extended precision operations (x87 for example) will be
    1377             :    exactly zero and therefore can be losslessly stored back into a double,
    1378             :    thereby preventing double rounding.
    1379             : 
    1380             :    Note 4: A similar implementation is in Modules/cmathmodule.c.
    1381             :    Be sure to update both when making changes.
    1382             : 
    1383             :    Note 5: The signature of math.fsum() differs from builtins.sum()
    1384             :    because the start argument doesn't make sense in the context of
    1385             :    accurate summation.  Since the partials table is collapsed before
    1386             :    returning a result, sum(seq2, start=sum(seq1)) may not equal the
    1387             :    accurate result returned by sum(itertools.chain(seq1, seq2)).
    1388             : */
    1389             : 
    1390             : #define NUM_PARTIALS  32  /* initial partials array size, on stack */
    1391             : 
    1392             : /* Extend the partials array p[] by doubling its size. */
    1393             : static int                          /* non-zero on error */
    1394          81 : _fsum_realloc(double **p_ptr, Py_ssize_t  n,
    1395             :              double  *ps,    Py_ssize_t *m_ptr)
    1396             : {
    1397          81 :     void *v = NULL;
    1398          81 :     Py_ssize_t m = *m_ptr;
    1399             : 
    1400          81 :     m += m;  /* double */
    1401          81 :     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
    1402          81 :         double *p = *p_ptr;
    1403          81 :         if (p == ps) {
    1404          29 :             v = PyMem_Malloc(sizeof(double) * m);
    1405          29 :             if (v != NULL)
    1406          29 :                 memcpy(v, ps, sizeof(double) * n);
    1407             :         }
    1408             :         else
    1409          52 :             v = PyMem_Realloc(p, sizeof(double) * m);
    1410             :     }
    1411          81 :     if (v == NULL) {        /* size overflow or no memory */
    1412           0 :         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
    1413           0 :         return 1;
    1414             :     }
    1415          81 :     *p_ptr = (double*) v;
    1416          81 :     *m_ptr = m;
    1417          81 :     return 0;
    1418             : }
    1419             : 
    1420             : /* Full precision summation of a sequence of floats.
    1421             : 
    1422             :    def msum(iterable):
    1423             :        partials = []  # sorted, non-overlapping partial sums
    1424             :        for x in iterable:
    1425             :            i = 0
    1426             :            for y in partials:
    1427             :                if abs(x) < abs(y):
    1428             :                    x, y = y, x
    1429             :                hi = x + y
    1430             :                lo = y - (hi - x)
    1431             :                if lo:
    1432             :                    partials[i] = lo
    1433             :                    i += 1
    1434             :                x = hi
    1435             :            partials[i:] = [x]
    1436             :        return sum_exact(partials)
    1437             : 
    1438             :    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
    1439             :    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
    1440             :    partial so that the list of partial sums remains exact.
    1441             : 
    1442             :    Sum_exact() adds the partial sums exactly and correctly rounds the final
    1443             :    result (using the round-half-to-even rule).  The items in partials remain
    1444             :    non-zero, non-special, non-overlapping and strictly increasing in
    1445             :    magnitude, but possibly not all having the same sign.
    1446             : 
    1447             :    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
    1448             : */
    1449             : 
    1450             : /*[clinic input]
    1451             : math.fsum
    1452             : 
    1453             :     seq: object
    1454             :     /
    1455             : 
    1456             : Return an accurate floating point sum of values in the iterable seq.
    1457             : 
    1458             : Assumes IEEE-754 floating point arithmetic.
    1459             : [clinic start generated code]*/
    1460             : 
    1461             : static PyObject *
    1462        1378 : math_fsum(PyObject *module, PyObject *seq)
    1463             : /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
    1464             : {
    1465        1378 :     PyObject *item, *iter, *sum = NULL;
    1466        1378 :     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
    1467        1378 :     double x, y, t, ps[NUM_PARTIALS], *p = ps;
    1468        1378 :     double xsave, special_sum = 0.0, inf_sum = 0.0;
    1469             :     volatile double hi, yr, lo;
    1470             : 
    1471        1378 :     iter = PyObject_GetIter(seq);
    1472        1378 :     if (iter == NULL)
    1473           0 :         return NULL;
    1474             : 
    1475             :     for(;;) {           /* for x in iterable */
    1476     1666870 :         assert(0 <= n && n <= m);
    1477     1666870 :         assert((m == NUM_PARTIALS && p == ps) ||
    1478             :                (m >  NUM_PARTIALS && p != NULL));
    1479             : 
    1480     1666870 :         item = PyIter_Next(iter);
    1481     1666870 :         if (item == NULL) {
    1482        1377 :             if (PyErr_Occurred())
    1483           5 :                 goto _fsum_error;
    1484        1372 :             break;
    1485             :         }
    1486     1665500 :         ASSIGN_DOUBLE(x, item, error_with_item);
    1487     1665500 :         Py_DECREF(item);
    1488             : 
    1489     1665500 :         xsave = x;
    1490    34303900 :         for (i = j = 0; j < n; j++) {       /* for y in partials */
    1491    32638400 :             y = p[j];
    1492    32638400 :             if (fabs(x) < fabs(y)) {
    1493     2516440 :                 t = x; x = y; y = t;
    1494             :             }
    1495    32638400 :             hi = x + y;
    1496    32638400 :             yr = hi - x;
    1497    32638400 :             lo = y - yr;
    1498    32638400 :             if (lo != 0.0)
    1499    31018800 :                 p[i++] = lo;
    1500    32638400 :             x = hi;
    1501             :         }
    1502             : 
    1503     1665500 :         n = i;                              /* ps[i:] = [x] */
    1504     1665500 :         if (x != 0.0) {
    1505     1628500 :             if (! Py_IS_FINITE(x)) {
    1506             :                 /* a nonfinite x could arise either as
    1507             :                    a result of intermediate overflow, or
    1508             :                    as a result of a nan or inf in the
    1509             :                    summands */
    1510          11 :                 if (Py_IS_FINITE(xsave)) {
    1511           0 :                     PyErr_SetString(PyExc_OverflowError,
    1512             :                           "intermediate overflow in fsum");
    1513           0 :                     goto _fsum_error;
    1514             :                 }
    1515          11 :                 if (Py_IS_INFINITY(xsave))
    1516           7 :                     inf_sum += xsave;
    1517          11 :                 special_sum += xsave;
    1518             :                 /* reset partials */
    1519          11 :                 n = 0;
    1520             :             }
    1521     1628490 :             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
    1522           0 :                 goto _fsum_error;
    1523             :             else
    1524     1628490 :                 p[n++] = x;
    1525             :         }
    1526             :     }
    1527             : 
    1528        1372 :     if (special_sum != 0.0) {
    1529           7 :         if (Py_IS_NAN(inf_sum))
    1530           1 :             PyErr_SetString(PyExc_ValueError,
    1531             :                             "-inf + inf in fsum");
    1532             :         else
    1533           6 :             sum = PyFloat_FromDouble(special_sum);
    1534           7 :         goto _fsum_error;
    1535             :     }
    1536             : 
    1537        1365 :     hi = 0.0;
    1538        1365 :     if (n > 0) {
    1539        1349 :         hi = p[--n];
    1540             :         /* sum_exact(ps, hi) from the top, stop when the sum becomes
    1541             :            inexact. */
    1542        1944 :         while (n > 0) {
    1543        1805 :             x = hi;
    1544        1805 :             y = p[--n];
    1545        1805 :             assert(fabs(y) < fabs(x));
    1546        1805 :             hi = x + y;
    1547        1805 :             yr = hi - x;
    1548        1805 :             lo = y - yr;
    1549        1805 :             if (lo != 0.0)
    1550        1210 :                 break;
    1551             :         }
    1552             :         /* Make half-even rounding work across multiple partials.
    1553             :            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
    1554             :            digit to two instead of down to zero (the 1e-16 makes the 1
    1555             :            slightly closer to two).  With a potential 1 ULP rounding
    1556             :            error fixed-up, math.fsum() can guarantee commutativity. */
    1557        1349 :         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
    1558         817 :                       (lo > 0.0 && p[n-1] > 0.0))) {
    1559         500 :             y = lo * 2.0;
    1560         500 :             x = hi + y;
    1561         500 :             yr = x - hi;
    1562         500 :             if (y == yr)
    1563          25 :                 hi = x;
    1564             :         }
    1565             :     }
    1566        1365 :     sum = PyFloat_FromDouble(hi);
    1567             : 
    1568        1378 :   _fsum_error:
    1569        1378 :     Py_DECREF(iter);
    1570        1378 :     if (p != ps)
    1571          29 :         PyMem_Free(p);
    1572        1378 :     return sum;
    1573             : 
    1574           1 :   error_with_item:
    1575           1 :     Py_DECREF(item);
    1576           1 :     goto _fsum_error;
    1577             : }
    1578             : 
    1579             : #undef NUM_PARTIALS
    1580             : 
    1581             : 
    1582             : static unsigned long
    1583       46055 : count_set_bits(unsigned long n)
    1584             : {
    1585       46055 :     unsigned long count = 0;
    1586      233385 :     while (n != 0) {
    1587      187330 :         ++count;
    1588      187330 :         n &= n - 1; /* clear least significant bit */
    1589             :     }
    1590       46055 :     return count;
    1591             : }
    1592             : 
    1593             : /* Integer square root
    1594             : 
    1595             : Given a nonnegative integer `n`, we want to compute the largest integer
    1596             : `a` for which `a * a <= n`, or equivalently the integer part of the exact
    1597             : square root of `n`.
    1598             : 
    1599             : We use an adaptive-precision pure-integer version of Newton's iteration. Given
    1600             : a positive integer `n`, the algorithm produces at each iteration an integer
    1601             : approximation `a` to the square root of `n >> s` for some even integer `s`,
    1602             : with `s` decreasing as the iterations progress. On the final iteration, `s` is
    1603             : zero and we have an approximation to the square root of `n` itself.
    1604             : 
    1605             : At every step, the approximation `a` is strictly within 1.0 of the true square
    1606             : root, so we have
    1607             : 
    1608             :     (a - 1)**2 < (n >> s) < (a + 1)**2
    1609             : 
    1610             : After the final iteration, a check-and-correct step is needed to determine
    1611             : whether `a` or `a - 1` gives the desired integer square root of `n`.
    1612             : 
    1613             : The algorithm is remarkable in its simplicity. There's no need for a
    1614             : per-iteration check-and-correct step, and termination is straightforward: the
    1615             : number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
    1616             : for `n > 1`). The only tricky part of the correctness proof is in establishing
    1617             : that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
    1618             : iteration to the next. A sketch of the proof of this is given below.
    1619             : 
    1620             : In addition to the proof sketch, a formal, computer-verified proof
    1621             : of correctness (using Lean) of an equivalent recursive algorithm can be found
    1622             : here:
    1623             : 
    1624             :     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
    1625             : 
    1626             : 
    1627             : Here's Python code equivalent to the C implementation below:
    1628             : 
    1629             :     def isqrt(n):
    1630             :         """
    1631             :         Return the integer part of the square root of the input.
    1632             :         """
    1633             :         n = operator.index(n)
    1634             : 
    1635             :         if n < 0:
    1636             :             raise ValueError("isqrt() argument must be nonnegative")
    1637             :         if n == 0:
    1638             :             return 0
    1639             : 
    1640             :         c = (n.bit_length() - 1) // 2
    1641             :         a = 1
    1642             :         d = 0
    1643             :         for s in reversed(range(c.bit_length())):
    1644             :             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
    1645             :             e = d
    1646             :             d = c >> s
    1647             :             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
    1648             : 
    1649             :         return a - (a*a > n)
    1650             : 
    1651             : 
    1652             : Sketch of proof of correctness
    1653             : ------------------------------
    1654             : 
    1655             : The delicate part of the correctness proof is showing that the loop invariant
    1656             : is preserved from one iteration to the next. That is, just before the line
    1657             : 
    1658             :     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
    1659             : 
    1660             : is executed in the above code, we know that
    1661             : 
    1662             :     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
    1663             : 
    1664             : (since `e` is always the value of `d` from the previous iteration). We must
    1665             : prove that after that line is executed, we have
    1666             : 
    1667             :     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
    1668             : 
    1669             : To facilitate the proof, we make some changes of notation. Write `m` for
    1670             : `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
    1671             : 
    1672             :     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
    1673             : 
    1674             : or equivalently:
    1675             : 
    1676             :     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
    1677             : 
    1678             : Then we can rewrite (1) as:
    1679             : 
    1680             :     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
    1681             : 
    1682             : and we must show that (b - 1)**2 < m < (b + 1)**2.
    1683             : 
    1684             : From this point on, we switch to mathematical notation, so `/` means exact
    1685             : division rather than integer division and `^` is used for exponentiation. We
    1686             : use the `√` symbol for the exact square root. In (3), we can remove the
    1687             : implicit floor operation to give:
    1688             : 
    1689             :     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
    1690             : 
    1691             : Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
    1692             : 
    1693             :     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
    1694             : 
    1695             : Squaring and dividing through by `2^(d-e+1) a` gives
    1696             : 
    1697             :     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
    1698             : 
    1699             : We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
    1700             : right-hand side of (6) with `1`, and now replacing the central
    1701             : term `m / (2^(d-e+1) a)` with its floor in (6) gives
    1702             : 
    1703             :     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
    1704             : 
    1705             : Or equivalently, from (2):
    1706             : 
    1707             :     (7) -1 < b - √m < 1
    1708             : 
    1709             : and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
    1710             : to prove.
    1711             : 
    1712             : We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
    1713             : a` that was used to get line (7) above. From the definition of `c`, we have
    1714             : `4^c <= n`, which implies
    1715             : 
    1716             :     (8)  4^d <= m
    1717             : 
    1718             : also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
    1719             : that `2d - 2e - 1 <= d` and hence that
    1720             : 
    1721             :     (9)  4^(2d - 2e - 1) <= m
    1722             : 
    1723             : Dividing both sides by `4^(d - e)` gives
    1724             : 
    1725             :     (10)  4^(d - e - 1) <= m / 4^(d - e)
    1726             : 
    1727             : But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
    1728             : 
    1729             :     (11)  4^(d - e - 1) < (a + 1)^2
    1730             : 
    1731             : Now taking square roots of both sides and observing that both `2^(d-e-1)` and
    1732             : `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
    1733             : completes the proof sketch.
    1734             : 
    1735             : */
    1736             : 
    1737             : /*
    1738             :     The _approximate_isqrt_tab table provides approximate square roots for
    1739             :     16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
    1740             : 
    1741             :         a = _approximate_isqrt_tab[(n >> 8) - 64]
    1742             : 
    1743             :     is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
    1744             : 
    1745             :     The table was computed in Python using the expression:
    1746             : 
    1747             :         [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
    1748             : */
    1749             : 
    1750             : static const uint8_t _approximate_isqrt_tab[192] = {
    1751             :     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
    1752             :     140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
    1753             :     151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
    1754             :     160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
    1755             :     170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
    1756             :     179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
    1757             :     187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
    1758             :     195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
    1759             :     203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
    1760             :     210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
    1761             :     217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
    1762             :     224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
    1763             :     231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
    1764             :     238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
    1765             :     244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
    1766             :     250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
    1767             : };
    1768             : 
    1769             : /* Approximate square root of a large 64-bit integer.
    1770             : 
    1771             :    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
    1772             :    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
    1773             : 
    1774             : static inline uint32_t
    1775       77910 : _approximate_isqrt(uint64_t n)
    1776             : {
    1777       77910 :     uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
    1778       77910 :     u = (u << 7) + (uint32_t)(n >> 41) / u;
    1779       77910 :     return (u << 15) + (uint32_t)((n >> 17) / u);
    1780             : }
    1781             : 
    1782             : /*[clinic input]
    1783             : math.isqrt
    1784             : 
    1785             :     n: object
    1786             :     /
    1787             : 
    1788             : Return the integer part of the square root of the input.
    1789             : [clinic start generated code]*/
    1790             : 
    1791             : static PyObject *
    1792      174216 : math_isqrt(PyObject *module, PyObject *n)
    1793             : /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
    1794             : {
    1795             :     int a_too_large, c_bit_length;
    1796             :     size_t c, d;
    1797             :     uint64_t m;
    1798             :     uint32_t u;
    1799      174216 :     PyObject *a = NULL, *b;
    1800             : 
    1801      174216 :     n = _PyNumber_Index(n);
    1802      174216 :     if (n == NULL) {
    1803           6 :         return NULL;
    1804             :     }
    1805             : 
    1806      174210 :     if (_PyLong_Sign(n) < 0) {
    1807           4 :         PyErr_SetString(
    1808             :             PyExc_ValueError,
    1809             :             "isqrt() argument must be nonnegative");
    1810           4 :         goto error;
    1811             :     }
    1812      174206 :     if (_PyLong_Sign(n) == 0) {
    1813       96296 :         Py_DECREF(n);
    1814       96296 :         return PyLong_FromLong(0);
    1815             :     }
    1816             : 
    1817             :     /* c = (n.bit_length() - 1) // 2 */
    1818       77910 :     c = _PyLong_NumBits(n);
    1819       77910 :     if (c == (size_t)(-1)) {
    1820           0 :         goto error;
    1821             :     }
    1822       77910 :     c = (c - 1U) / 2U;
    1823             : 
    1824             :     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
    1825             :        fast, almost branch-free algorithm. */
    1826       77910 :     if (c <= 31U) {
    1827        8311 :         int shift = 31 - (int)c;
    1828        8311 :         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
    1829        8311 :         Py_DECREF(n);
    1830        8311 :         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
    1831           0 :             return NULL;
    1832             :         }
    1833        8311 :         u = _approximate_isqrt(m << 2*shift) >> shift;
    1834        8311 :         u -= (uint64_t)u * u > m;
    1835        8311 :         return PyLong_FromUnsignedLong(u);
    1836             :     }
    1837             : 
    1838             :     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
    1839             :        arithmetic, then switch to using Python long integers. */
    1840             : 
    1841             :     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
    1842       69599 :     c_bit_length = 6;
    1843       75334 :     while ((c >> c_bit_length) > 0U) {
    1844        5735 :         ++c_bit_length;
    1845             :     }
    1846             : 
    1847             :     /* Initialise d and a. */
    1848       69599 :     d = c >> (c_bit_length - 5);
    1849       69599 :     b = _PyLong_Rshift(n, 2U*c - 62U);
    1850       69599 :     if (b == NULL) {
    1851           0 :         goto error;
    1852             :     }
    1853       69599 :     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
    1854       69599 :     Py_DECREF(b);
    1855       69599 :     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
    1856           0 :         goto error;
    1857             :     }
    1858       69599 :     u = _approximate_isqrt(m) >> (31U - d);
    1859       69599 :     a = PyLong_FromUnsignedLong(u);
    1860       69599 :     if (a == NULL) {
    1861           0 :         goto error;
    1862             :     }
    1863             : 
    1864      144933 :     for (int s = c_bit_length - 6; s >= 0; --s) {
    1865             :         PyObject *q;
    1866       75334 :         size_t e = d;
    1867             : 
    1868       75334 :         d = c >> s;
    1869             : 
    1870             :         /* q = (n >> 2*c - e - d + 1) // a */
    1871       75334 :         q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
    1872       75334 :         if (q == NULL) {
    1873           0 :             goto error;
    1874             :         }
    1875       75334 :         Py_SETREF(q, PyNumber_FloorDivide(q, a));
    1876       75334 :         if (q == NULL) {
    1877           0 :             goto error;
    1878             :         }
    1879             : 
    1880             :         /* a = (a << d - 1 - e) + q */
    1881       75334 :         Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
    1882       75334 :         if (a == NULL) {
    1883           0 :             Py_DECREF(q);
    1884           0 :             goto error;
    1885             :         }
    1886       75334 :         Py_SETREF(a, PyNumber_Add(a, q));
    1887       75334 :         Py_DECREF(q);
    1888       75334 :         if (a == NULL) {
    1889           0 :             goto error;
    1890             :         }
    1891             :     }
    1892             : 
    1893             :     /* The correct result is either a or a - 1. Figure out which, and
    1894             :        decrement a if necessary. */
    1895             : 
    1896             :     /* a_too_large = n < a * a */
    1897       69599 :     b = PyNumber_Multiply(a, a);
    1898       69599 :     if (b == NULL) {
    1899           0 :         goto error;
    1900             :     }
    1901       69599 :     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
    1902       69599 :     Py_DECREF(b);
    1903       69599 :     if (a_too_large == -1) {
    1904           0 :         goto error;
    1905             :     }
    1906             : 
    1907       69599 :     if (a_too_large) {
    1908       11548 :         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
    1909             :     }
    1910       69599 :     Py_DECREF(n);
    1911       69599 :     return a;
    1912             : 
    1913           4 :   error:
    1914           4 :     Py_XDECREF(a);
    1915           4 :     Py_DECREF(n);
    1916           4 :     return NULL;
    1917             : }
    1918             : 
    1919             : /* Divide-and-conquer factorial algorithm
    1920             :  *
    1921             :  * Based on the formula and pseudo-code provided at:
    1922             :  * http://www.luschny.de/math/factorial/binarysplitfact.html
    1923             :  *
    1924             :  * Faster algorithms exist, but they're more complicated and depend on
    1925             :  * a fast prime factorization algorithm.
    1926             :  *
    1927             :  * Notes on the algorithm
    1928             :  * ----------------------
    1929             :  *
    1930             :  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
    1931             :  * computed separately, and then combined using a left shift.
    1932             :  *
    1933             :  * The function factorial_odd_part computes the odd part m (i.e., the greatest
    1934             :  * odd divisor) of factorial(n), using the formula:
    1935             :  *
    1936             :  *   factorial_odd_part(n) =
    1937             :  *
    1938             :  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
    1939             :  *
    1940             :  * Example: factorial_odd_part(20) =
    1941             :  *
    1942             :  *        (1) *
    1943             :  *        (1) *
    1944             :  *        (1 * 3 * 5) *
    1945             :  *        (1 * 3 * 5 * 7 * 9) *
    1946             :  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
    1947             :  *
    1948             :  * Here i goes from large to small: the first term corresponds to i=4 (any
    1949             :  * larger i gives an empty product), and the last term corresponds to i=0.
    1950             :  * Each term can be computed from the last by multiplying by the extra odd
    1951             :  * numbers required: e.g., to get from the penultimate term to the last one,
    1952             :  * we multiply by (11 * 13 * 15 * 17 * 19).
    1953             :  *
    1954             :  * To see a hint of why this formula works, here are the same numbers as above
    1955             :  * but with the even parts (i.e., the appropriate powers of 2) included.  For
    1956             :  * each subterm in the product for i, we multiply that subterm by 2**i:
    1957             :  *
    1958             :  *   factorial(20) =
    1959             :  *
    1960             :  *        (16) *
    1961             :  *        (8) *
    1962             :  *        (4 * 12 * 20) *
    1963             :  *        (2 * 6 * 10 * 14 * 18) *
    1964             :  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
    1965             :  *
    1966             :  * The factorial_partial_product function computes the product of all odd j in
    1967             :  * range(start, stop) for given start and stop.  It's used to compute the
    1968             :  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
    1969             :  * operates recursively, repeatedly splitting the range into two roughly equal
    1970             :  * pieces until the subranges are small enough to be computed using only C
    1971             :  * integer arithmetic.
    1972             :  *
    1973             :  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
    1974             :  * the factorial) is computed independently in the main math_factorial
    1975             :  * function.  By standard results, its value is:
    1976             :  *
    1977             :  *    two_valuation = n//2 + n//4 + n//8 + ....
    1978             :  *
    1979             :  * It can be shown (e.g., by complete induction on n) that two_valuation is
    1980             :  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
    1981             :  * '1'-bits in the binary expansion of n.
    1982             :  */
    1983             : 
    1984             : /* factorial_partial_product: Compute product(range(start, stop, 2)) using
    1985             :  * divide and conquer.  Assumes start and stop are odd and stop > start.
    1986             :  * max_bits must be >= bit_length(stop - 2). */
    1987             : 
    1988             : static PyObject *
    1989     1179940 : factorial_partial_product(unsigned long start, unsigned long stop,
    1990             :                           unsigned long max_bits)
    1991             : {
    1992             :     unsigned long midpoint, num_operands;
    1993     1179940 :     PyObject *left = NULL, *right = NULL, *result = NULL;
    1994             : 
    1995             :     /* If the return value will fit an unsigned long, then we can
    1996             :      * multiply in a tight, fast loop where each multiply is O(1).
    1997             :      * Compute an upper bound on the number of bits required to store
    1998             :      * the answer.
    1999             :      *
    2000             :      * Storing some integer z requires floor(lg(z))+1 bits, which is
    2001             :      * conveniently the value returned by bit_length(z).  The
    2002             :      * product x*y will require at most
    2003             :      * bit_length(x) + bit_length(y) bits to store, based
    2004             :      * on the idea that lg product = lg x + lg y.
    2005             :      *
    2006             :      * We know that stop - 2 is the largest number to be multiplied.  From
    2007             :      * there, we have: bit_length(answer) <= num_operands *
    2008             :      * bit_length(stop - 2)
    2009             :      */
    2010             : 
    2011     1179940 :     num_operands = (stop - start) / 2;
    2012             :     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
    2013             :      * unlikely case of an overflow in num_operands * max_bits. */
    2014     1179940 :     if (num_operands <= 8 * SIZEOF_LONG &&
    2015     1165970 :         num_operands * max_bits <= 8 * SIZEOF_LONG) {
    2016             :         unsigned long j, total;
    2017     3968460 :         for (total = start, j = start + 2; j < stop; j += 2)
    2018     3244830 :             total *= j;
    2019      723623 :         return PyLong_FromUnsignedLong(total);
    2020             :     }
    2021             : 
    2022             :     /* find midpoint of range(start, stop), rounded up to next odd number. */
    2023      456322 :     midpoint = (start + num_operands) | 1;
    2024      456322 :     left = factorial_partial_product(start, midpoint,
    2025      456322 :                                      _Py_bit_length(midpoint - 2));
    2026      456322 :     if (left == NULL)
    2027           0 :         goto error;
    2028      456322 :     right = factorial_partial_product(midpoint, stop, max_bits);
    2029      456322 :     if (right == NULL)
    2030           0 :         goto error;
    2031      456322 :     result = PyNumber_Multiply(left, right);
    2032             : 
    2033      456322 :   error:
    2034      456322 :     Py_XDECREF(left);
    2035      456322 :     Py_XDECREF(right);
    2036      456322 :     return result;
    2037             : }
    2038             : 
    2039             : /* factorial_odd_part:  compute the odd part of factorial(n). */
    2040             : 
    2041             : static PyObject *
    2042       46055 : factorial_odd_part(unsigned long n)
    2043             : {
    2044             :     long i;
    2045             :     unsigned long v, lower, upper;
    2046             :     PyObject *partial, *tmp, *inner, *outer;
    2047             : 
    2048       46055 :     inner = PyLong_FromLong(1);
    2049       46055 :     if (inner == NULL)
    2050           0 :         return NULL;
    2051       46055 :     outer = inner;
    2052       46055 :     Py_INCREF(outer);
    2053             : 
    2054       46055 :     upper = 3;
    2055      339855 :     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
    2056      293800 :         v = n >> i;
    2057      293800 :         if (v <= 2)
    2058       26499 :             continue;
    2059      267301 :         lower = upper;
    2060             :         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
    2061      267301 :         upper = (v + 1) | 1;
    2062             :         /* Here inner is the product of all odd integers j in the range (0,
    2063             :            n/2**(i+1)].  The factorial_partial_product call below gives the
    2064             :            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
    2065      267301 :         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
    2066             :         /* inner *= partial */
    2067      267301 :         if (partial == NULL)
    2068           0 :             goto error;
    2069      267301 :         tmp = PyNumber_Multiply(inner, partial);
    2070      267301 :         Py_DECREF(partial);
    2071      267301 :         if (tmp == NULL)
    2072           0 :             goto error;
    2073      267301 :         Py_DECREF(inner);
    2074      267301 :         inner = tmp;
    2075             :         /* Now inner is the product of all odd integers j in the range (0,
    2076             :            n/2**i], giving the inner product in the formula above. */
    2077             : 
    2078             :         /* outer *= inner; */
    2079      267301 :         tmp = PyNumber_Multiply(outer, inner);
    2080      267301 :         if (tmp == NULL)
    2081           0 :             goto error;
    2082      267301 :         Py_DECREF(outer);
    2083      267301 :         outer = tmp;
    2084             :     }
    2085       46055 :     Py_DECREF(inner);
    2086       46055 :     return outer;
    2087             : 
    2088           0 :   error:
    2089           0 :     Py_DECREF(outer);
    2090           0 :     Py_DECREF(inner);
    2091           0 :     return NULL;
    2092             : }
    2093             : 
    2094             : 
    2095             : /* Lookup table for small factorial values */
    2096             : 
    2097             : static const unsigned long SmallFactorials[] = {
    2098             :     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
    2099             :     362880, 3628800, 39916800, 479001600,
    2100             : #if SIZEOF_LONG >= 8
    2101             :     6227020800, 87178291200, 1307674368000,
    2102             :     20922789888000, 355687428096000, 6402373705728000,
    2103             :     121645100408832000, 2432902008176640000
    2104             : #endif
    2105             : };
    2106             : 
    2107             : /*[clinic input]
    2108             : math.factorial
    2109             : 
    2110             :     n as arg: object
    2111             :     /
    2112             : 
    2113             : Find n!.
    2114             : 
    2115             : Raise a ValueError if x is negative or non-integral.
    2116             : [clinic start generated code]*/
    2117             : 
    2118             : static PyObject *
    2119       57459 : math_factorial(PyObject *module, PyObject *arg)
    2120             : /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
    2121             : {
    2122             :     long x, two_valuation;
    2123             :     int overflow;
    2124             :     PyObject *result, *odd_part;
    2125             : 
    2126       57459 :     x = PyLong_AsLongAndOverflow(arg, &overflow);
    2127       57459 :     if (x == -1 && PyErr_Occurred()) {
    2128           8 :         return NULL;
    2129             :     }
    2130       57451 :     else if (overflow == 1) {
    2131           1 :         PyErr_Format(PyExc_OverflowError,
    2132             :                      "factorial() argument should not exceed %ld",
    2133             :                      LONG_MAX);
    2134           1 :         return NULL;
    2135             :     }
    2136       57450 :     else if (overflow == -1 || x < 0) {
    2137           2 :         PyErr_SetString(PyExc_ValueError,
    2138             :                         "factorial() not defined for negative values");
    2139           2 :         return NULL;
    2140             :     }
    2141             : 
    2142             :     /* use lookup table if x is small */
    2143       57448 :     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
    2144       11393 :         return PyLong_FromUnsignedLong(SmallFactorials[x]);
    2145             : 
    2146             :     /* else express in the form odd_part * 2**two_valuation, and compute as
    2147             :        odd_part << two_valuation. */
    2148       46055 :     odd_part = factorial_odd_part(x);
    2149       46055 :     if (odd_part == NULL)
    2150           0 :         return NULL;
    2151       46055 :     two_valuation = x - count_set_bits(x);
    2152       46055 :     result = _PyLong_Lshift(odd_part, two_valuation);
    2153       46055 :     Py_DECREF(odd_part);
    2154       46055 :     return result;
    2155             : }
    2156             : 
    2157             : 
    2158             : /*[clinic input]
    2159             : math.trunc
    2160             : 
    2161             :     x: object
    2162             :     /
    2163             : 
    2164             : Truncates the Real x to the nearest Integral toward 0.
    2165             : 
    2166             : Uses the __trunc__ magic method.
    2167             : [clinic start generated code]*/
    2168             : 
    2169             : static PyObject *
    2170        1027 : math_trunc(PyObject *module, PyObject *x)
    2171             : /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
    2172             : {
    2173             :     PyObject *trunc, *result;
    2174             : 
    2175        1027 :     if (PyFloat_CheckExact(x)) {
    2176          12 :         return PyFloat_Type.tp_as_number->nb_int(x);
    2177             :     }
    2178             : 
    2179        1015 :     if (Py_TYPE(x)->tp_dict == NULL) {
    2180           0 :         if (PyType_Ready(Py_TYPE(x)) < 0)
    2181           0 :             return NULL;
    2182             :     }
    2183             : 
    2184        1015 :     math_module_state *state = get_math_module_state(module);
    2185        1015 :     trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
    2186        1015 :     if (trunc == NULL) {
    2187           4 :         if (!PyErr_Occurred())
    2188           3 :             PyErr_Format(PyExc_TypeError,
    2189             :                          "type %.100s doesn't define __trunc__ method",
    2190           3 :                          Py_TYPE(x)->tp_name);
    2191           4 :         return NULL;
    2192             :     }
    2193        1011 :     result = _PyObject_CallNoArgs(trunc);
    2194        1011 :     Py_DECREF(trunc);
    2195        1011 :     return result;
    2196             : }
    2197             : 
    2198             : 
    2199             : /*[clinic input]
    2200             : math.frexp
    2201             : 
    2202             :     x: double
    2203             :     /
    2204             : 
    2205             : Return the mantissa and exponent of x, as pair (m, e).
    2206             : 
    2207             : m is a float and e is an int, such that x = m * 2.**e.
    2208             : If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
    2209             : [clinic start generated code]*/
    2210             : 
    2211             : static PyObject *
    2212      523934 : math_frexp_impl(PyObject *module, double x)
    2213             : /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
    2214             : {
    2215             :     int i;
    2216             :     /* deal with special cases directly, to sidestep platform
    2217             :        differences */
    2218      523934 :     if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
    2219           6 :         i = 0;
    2220             :     }
    2221             :     else {
    2222      523928 :         x = frexp(x, &i);
    2223             :     }
    2224      523934 :     return Py_BuildValue("(di)", x, i);
    2225             : }
    2226             : 
    2227             : 
    2228             : /*[clinic input]
    2229             : math.ldexp
    2230             : 
    2231             :     x: double
    2232             :     i: object
    2233             :     /
    2234             : 
    2235             : Return x * (2**i).
    2236             : 
    2237             : This is essentially the inverse of frexp().
    2238             : [clinic start generated code]*/
    2239             : 
    2240             : static PyObject *
    2241      597508 : math_ldexp_impl(PyObject *module, double x, PyObject *i)
    2242             : /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
    2243             : {
    2244             :     double r;
    2245             :     long exp;
    2246             :     int overflow;
    2247             : 
    2248      597508 :     if (PyLong_Check(i)) {
    2249             :         /* on overflow, replace exponent with either LONG_MAX
    2250             :            or LONG_MIN, depending on the sign. */
    2251      597508 :         exp = PyLong_AsLongAndOverflow(i, &overflow);
    2252      597508 :         if (exp == -1 && PyErr_Occurred())
    2253           0 :             return NULL;
    2254      597508 :         if (overflow)
    2255          28 :             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
    2256             :     }
    2257             :     else {
    2258           0 :         PyErr_SetString(PyExc_TypeError,
    2259             :                         "Expected an int as second argument to ldexp.");
    2260           0 :         return NULL;
    2261             :     }
    2262             : 
    2263      597508 :     if (x == 0. || !Py_IS_FINITE(x)) {
    2264             :         /* NaNs, zeros and infinities are returned unchanged */
    2265         239 :         r = x;
    2266         239 :         errno = 0;
    2267      597269 :     } else if (exp > INT_MAX) {
    2268             :         /* overflow */
    2269           6 :         r = copysign(Py_HUGE_VAL, x);
    2270           6 :         errno = ERANGE;
    2271      597263 :     } else if (exp < INT_MIN) {
    2272             :         /* underflow to +-0 */
    2273           6 :         r = copysign(0., x);
    2274           6 :         errno = 0;
    2275             :     } else {
    2276      597257 :         errno = 0;
    2277      597257 :         r = ldexp(x, (int)exp);
    2278      597257 :         if (Py_IS_INFINITY(r))
    2279         697 :             errno = ERANGE;
    2280             :     }
    2281             : 
    2282      597508 :     if (errno && is_error(r))
    2283         703 :         return NULL;
    2284      596805 :     return PyFloat_FromDouble(r);
    2285             : }
    2286             : 
    2287             : 
    2288             : /*[clinic input]
    2289             : math.modf
    2290             : 
    2291             :     x: double
    2292             :     /
    2293             : 
    2294             : Return the fractional and integer parts of x.
    2295             : 
    2296             : Both results carry the sign of x and are floats.
    2297             : [clinic start generated code]*/
    2298             : 
    2299             : static PyObject *
    2300        3146 : math_modf_impl(PyObject *module, double x)
    2301             : /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
    2302             : {
    2303             :     double y;
    2304             :     /* some platforms don't do the right thing for NaNs and
    2305             :        infinities, so we take care of special cases directly. */
    2306        3146 :     if (!Py_IS_FINITE(x)) {
    2307           3 :         if (Py_IS_INFINITY(x))
    2308           2 :             return Py_BuildValue("(dd)", copysign(0., x), x);
    2309           1 :         else if (Py_IS_NAN(x))
    2310           1 :             return Py_BuildValue("(dd)", x, x);
    2311             :     }
    2312             : 
    2313        3143 :     errno = 0;
    2314        3143 :     x = modf(x, &y);
    2315        3143 :     return Py_BuildValue("(dd)", x, y);
    2316             : }
    2317             : 
    2318             : 
    2319             : /* A decent logarithm is easy to compute even for huge ints, but libm can't
    2320             :    do that by itself -- loghelper can.  func is log or log10, and name is
    2321             :    "log" or "log10".  Note that overflow of the result isn't possible: an int
    2322             :    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
    2323             :    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
    2324             :    small enough to fit in an IEEE single.  log and log10 are even smaller.
    2325             :    However, intermediate overflow is possible for an int if the number of bits
    2326             :    in that int is larger than PY_SSIZE_T_MAX. */
    2327             : 
    2328             : static PyObject*
    2329      607168 : loghelper(PyObject* arg, double (*func)(double))
    2330             : {
    2331             :     /* If it is int, do it ourselves. */
    2332      607168 :     if (PyLong_Check(arg)) {
    2333             :         double x, result;
    2334             :         Py_ssize_t e;
    2335             : 
    2336             :         /* Negative or zero inputs give a ValueError. */
    2337      291239 :         if (Py_SIZE(arg) <= 0) {
    2338           8 :             PyErr_SetString(PyExc_ValueError,
    2339             :                             "math domain error");
    2340           8 :             return NULL;
    2341             :         }
    2342             : 
    2343      291231 :         x = PyLong_AsDouble(arg);
    2344      291231 :         if (x == -1.0 && PyErr_Occurred()) {
    2345           8 :             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
    2346           0 :                 return NULL;
    2347             :             /* Here the conversion to double overflowed, but it's possible
    2348             :                to compute the log anyway.  Clear the exception and continue. */
    2349           8 :             PyErr_Clear();
    2350           8 :             x = _PyLong_Frexp((PyLongObject *)arg, &e);
    2351           8 :             if (x == -1.0 && PyErr_Occurred())
    2352           0 :                 return NULL;
    2353             :             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
    2354           8 :             result = func(x) + func(2.0) * e;
    2355             :         }
    2356             :         else
    2357             :             /* Successfully converted x to a double. */
    2358      291223 :             result = func(x);
    2359      291231 :         return PyFloat_FromDouble(result);
    2360             :     }
    2361             : 
    2362             :     /* Else let libm handle it by itself. */
    2363      315929 :     return math_1(arg, func, 0);
    2364             : }
    2365             : 
    2366             : 
    2367             : /*[clinic input]
    2368             : math.log
    2369             : 
    2370             :     x:    object
    2371             :     [
    2372             :     base: object(c_default="NULL") = math.e
    2373             :     ]
    2374             :     /
    2375             : 
    2376             : Return the logarithm of x to the given base.
    2377             : 
    2378             : If the base not specified, returns the natural logarithm (base e) of x.
    2379             : [clinic start generated code]*/
    2380             : 
    2381             : static PyObject *
    2382      597993 : math_log_impl(PyObject *module, PyObject *x, int group_right_1,
    2383             :               PyObject *base)
    2384             : /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
    2385             : {
    2386             :     PyObject *num, *den;
    2387             :     PyObject *ans;
    2388             : 
    2389      597993 :     num = loghelper(x, m_log);
    2390      597993 :     if (num == NULL || base == NULL)
    2391      591092 :         return num;
    2392             : 
    2393        6901 :     den = loghelper(base, m_log);
    2394        6901 :     if (den == NULL) {
    2395           0 :         Py_DECREF(num);
    2396           0 :         return NULL;
    2397             :     }
    2398             : 
    2399        6901 :     ans = PyNumber_TrueDivide(num, den);
    2400        6901 :     Py_DECREF(num);
    2401        6901 :     Py_DECREF(den);
    2402        6901 :     return ans;
    2403             : }
    2404             : 
    2405             : 
    2406             : /*[clinic input]
    2407             : math.log2
    2408             : 
    2409             :     x: object
    2410             :     /
    2411             : 
    2412             : Return the base 2 logarithm of x.
    2413             : [clinic start generated code]*/
    2414             : 
    2415             : static PyObject *
    2416        2198 : math_log2(PyObject *module, PyObject *x)
    2417             : /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
    2418             : {
    2419        2198 :     return loghelper(x, m_log2);
    2420             : }
    2421             : 
    2422             : 
    2423             : /*[clinic input]
    2424             : math.log10
    2425             : 
    2426             :     x: object
    2427             :     /
    2428             : 
    2429             : Return the base 10 logarithm of x.
    2430             : [clinic start generated code]*/
    2431             : 
    2432             : static PyObject *
    2433          76 : math_log10(PyObject *module, PyObject *x)
    2434             : /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
    2435             : {
    2436          76 :     return loghelper(x, m_log10);
    2437             : }
    2438             : 
    2439             : 
    2440             : /*[clinic input]
    2441             : math.fmod
    2442             : 
    2443             :     x: double
    2444             :     y: double
    2445             :     /
    2446             : 
    2447             : Return fmod(x, y), according to platform C.
    2448             : 
    2449             : x % y may differ.
    2450             : [clinic start generated code]*/
    2451             : 
    2452             : static PyObject *
    2453          19 : math_fmod_impl(PyObject *module, double x, double y)
    2454             : /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
    2455             : {
    2456             :     double r;
    2457             :     /* fmod(x, +/-Inf) returns x for finite x. */
    2458          19 :     if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
    2459           5 :         return PyFloat_FromDouble(x);
    2460          14 :     errno = 0;
    2461          14 :     r = fmod(x, y);
    2462          14 :     if (Py_IS_NAN(r)) {
    2463           7 :         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
    2464           4 :             errno = EDOM;
    2465             :         else
    2466           3 :             errno = 0;
    2467             :     }
    2468          14 :     if (errno && is_error(r))
    2469           4 :         return NULL;
    2470             :     else
    2471          10 :         return PyFloat_FromDouble(r);
    2472             : }
    2473             : 
    2474             : /*
    2475             : Given a *vec* of values, compute the vector norm:
    2476             : 
    2477             :     sqrt(sum(x ** 2 for x in vec))
    2478             : 
    2479             : The *max* variable should be equal to the largest fabs(x).
    2480             : The *n* variable is the length of *vec*.
    2481             : If n==0, then *max* should be 0.0.
    2482             : If an infinity is present in the vec, *max* should be INF.
    2483             : The *found_nan* variable indicates whether some member of
    2484             : the *vec* is a NaN.
    2485             : 
    2486             : To avoid overflow/underflow and to achieve high accuracy giving results
    2487             : that are almost always correctly rounded, four techniques are used:
    2488             : 
    2489             : * lossless scaling using a power-of-two scaling factor
    2490             : * accurate squaring using Veltkamp-Dekker splitting [1]
    2491             : * compensated summation using a variant of the Neumaier algorithm [2]
    2492             : * differential correction of the square root [3]
    2493             : 
    2494             : The usual presentation of the Neumaier summation algorithm has an
    2495             : expensive branch depending on which operand has the larger
    2496             : magnitude.  We avoid this cost by arranging the calculation so that
    2497             : fabs(csum) is always as large as fabs(x).
    2498             : 
    2499             : To establish the invariant, *csum* is initialized to 1.0 which is
    2500             : always larger than x**2 after scaling or after division by *max*.
    2501             : After the loop is finished, the initial 1.0 is subtracted out for a
    2502             : net zero effect on the final sum.  Since *csum* will be greater than
    2503             : 1.0, the subtraction of 1.0 will not cause fractional digits to be
    2504             : dropped from *csum*.
    2505             : 
    2506             : To get the full benefit from compensated summation, the largest
    2507             : addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
    2508             : scaling or division by *max* should not be skipped even if not
    2509             : otherwise needed to prevent overflow or loss of precision.
    2510             : 
    2511             : The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
    2512             : gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
    2513             : algorithm gives a *hi* value that is correctly rounded to half
    2514             : precision.  When a value at or below 1.0 is correctly rounded, it
    2515             : never goes above 1.0.  And when values at or below 1.0 are squared,
    2516             : they remain at or below 1.0, thus preserving the summation invariant.
    2517             : 
    2518             : Another interesting assertion is that csum+lo*lo == csum. In the loop,
    2519             : each scaled vector element has a magnitude less than 1.0.  After the
    2520             : Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
    2521             : value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
    2522             : Given that csum >= 1.0, we have:
    2523             :     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
    2524             : Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
    2525             : 
    2526             : To minimize loss of information during the accumulation of fractional
    2527             : values, each term has a separate accumulator.  This also breaks up
    2528             : sequential dependencies in the inner loop so the CPU can maximize
    2529             : floating point throughput. [4]  On a 2.6 GHz Haswell, adding one
    2530             : dimension has an incremental cost of only 5ns -- for example when
    2531             : moving from hypot(x,y) to hypot(x,y,z).
    2532             : 
    2533             : The square root differential correction is needed because a
    2534             : correctly rounded square root of a correctly rounded sum of
    2535             : squares can still be off by as much as one ulp.
    2536             : 
    2537             : The differential correction starts with a value *x* that is
    2538             : the difference between the square of *h*, the possibly inaccurately
    2539             : rounded square root, and the accurately computed sum of squares.
    2540             : The correction is the first order term of the Maclaurin series
    2541             : expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
    2542             : 
    2543             : Essentially, this differential correction is equivalent to one
    2544             : refinement step in Newton's divide-and-average square root
    2545             : algorithm, effectively doubling the number of accurate bits.
    2546             : This technique is used in Dekker's SQRT2 algorithm and again in
    2547             : Borges' ALGORITHM 4 and 5.
    2548             : 
    2549             : Without proof for all cases, hypot() cannot claim to be always
    2550             : correctly rounded.  However for n <= 1000, prior to the final addition
    2551             : that rounds the overall result, the internal accuracy of "h" together
    2552             : with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
    2553             : Also, hypot() was tested against a Decimal implementation with
    2554             : prec=300.  After 100 million trials, no incorrectly rounded examples
    2555             : were found.  In addition, perfect commutativity (all permutations are
    2556             : exactly equal) was verified for 1 billion random inputs with n=5. [7]
    2557             : 
    2558             : References:
    2559             : 
    2560             : 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
    2561             : 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
    2562             : 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
    2563             : 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
    2564             : 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
    2565             : 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
    2566             : 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
    2567             : 
    2568             : */
    2569             : 
    2570             : static inline double
    2571      113901 : vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
    2572             : {
    2573      113901 :     const double T27 = 134217729.0;     /* ldexp(1.0, 27) + 1.0) */
    2574      113901 :     double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
    2575             :     double t, hi, lo, h;
    2576             :     int max_e;
    2577             :     Py_ssize_t i;
    2578             : 
    2579      113901 :     if (Py_IS_INFINITY(max)) {
    2580       87869 :         return max;
    2581             :     }
    2582       26032 :     if (found_nan) {
    2583       25701 :         return Py_NAN;
    2584             :     }
    2585         331 :     if (max == 0.0 || n <= 1) {
    2586          47 :         return max;
    2587             :     }
    2588         284 :     frexp(max, &max_e);
    2589         284 :     if (max_e >= -1023) {
    2590         203 :         scale = ldexp(1.0, -max_e);
    2591         203 :         assert(max * scale >= 0.5);
    2592         203 :         assert(max * scale < 1.0);
    2593        2039 :         for (i=0 ; i < n ; i++) {
    2594        1836 :             x = vec[i];
    2595        1836 :             assert(Py_IS_FINITE(x) && fabs(x) <= max);
    2596             : 
    2597        1836 :             x *= scale;
    2598        1836 :             assert(fabs(x) < 1.0);
    2599             : 
    2600        1836 :             t = x * T27;
    2601        1836 :             hi = t - (t - x);
    2602        1836 :             lo = x - hi;
    2603        1836 :             assert(hi + lo == x);
    2604             : 
    2605        1836 :             x = hi * hi;
    2606        1836 :             assert(x <= 1.0);
    2607        1836 :             assert(fabs(csum) >= fabs(x));
    2608        1836 :             oldcsum = csum;
    2609        1836 :             csum += x;
    2610        1836 :             frac1 += (oldcsum - csum) + x;
    2611             : 
    2612        1836 :             x = 2.0 * hi * lo;
    2613        1836 :             assert(fabs(csum) >= fabs(x));
    2614        1836 :             oldcsum = csum;
    2615        1836 :             csum += x;
    2616        1836 :             frac2 += (oldcsum - csum) + x;
    2617             : 
    2618        1836 :             assert(csum + lo * lo == csum);
    2619        1836 :             frac3 += lo * lo;
    2620             :         }
    2621         203 :         h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
    2622             : 
    2623         203 :         x = h;
    2624         203 :         t = x * T27;
    2625         203 :         hi = t - (t - x);
    2626         203 :         lo = x - hi;
    2627         203 :         assert (hi + lo == x);
    2628             : 
    2629         203 :         x = -hi * hi;
    2630         203 :         assert(fabs(csum) >= fabs(x));
    2631         203 :         oldcsum = csum;
    2632         203 :         csum += x;
    2633         203 :         frac1 += (oldcsum - csum) + x;
    2634             : 
    2635         203 :         x = -2.0 * hi * lo;
    2636         203 :         assert(fabs(csum) >= fabs(x));
    2637         203 :         oldcsum = csum;
    2638         203 :         csum += x;
    2639         203 :         frac2 += (oldcsum - csum) + x;
    2640             : 
    2641         203 :         x = -lo * lo;
    2642         203 :         assert(fabs(csum) >= fabs(x));
    2643         203 :         oldcsum = csum;
    2644         203 :         csum += x;
    2645         203 :         frac3 += (oldcsum - csum) + x;
    2646             : 
    2647         203 :         x = csum - 1.0 + (frac1 + frac2 + frac3);
    2648         203 :         return (h + x / (2.0 * h)) / scale;
    2649             :     }
    2650             :     /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
    2651             :        So instead of multiplying by a scale, we just divide by *max*.
    2652             :     */
    2653         243 :     for (i=0 ; i < n ; i++) {
    2654         162 :         x = vec[i];
    2655         162 :         assert(Py_IS_FINITE(x) && fabs(x) <= max);
    2656         162 :         x /= max;
    2657         162 :         x = x*x;
    2658         162 :         assert(x <= 1.0);
    2659         162 :         assert(fabs(csum) >= fabs(x));
    2660         162 :         oldcsum = csum;
    2661         162 :         csum += x;
    2662         162 :         frac1 += (oldcsum - csum) + x;
    2663             :     }
    2664          81 :     return max * sqrt(csum - 1.0 + frac1);
    2665             : }
    2666             : 
    2667             : #define NUM_STACK_ELEMS 16
    2668             : 
    2669             : /*[clinic input]
    2670             : math.dist
    2671             : 
    2672             :     p: object
    2673             :     q: object
    2674             :     /
    2675             : 
    2676             : Return the Euclidean distance between two points p and q.
    2677             : 
    2678             : The points should be specified as sequences (or iterables) of
    2679             : coordinates.  Both inputs must have the same dimension.
    2680             : 
    2681             : Roughly equivalent to:
    2682             :     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
    2683             : [clinic start generated code]*/
    2684             : 
    2685             : static PyObject *
    2686      113769 : math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
    2687             : /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
    2688             : {
    2689             :     PyObject *item;
    2690      113769 :     double max = 0.0;
    2691             :     double x, px, qx, result;
    2692             :     Py_ssize_t i, m, n;
    2693      113769 :     int found_nan = 0, p_allocated = 0, q_allocated = 0;
    2694             :     double diffs_on_stack[NUM_STACK_ELEMS];
    2695      113769 :     double *diffs = diffs_on_stack;
    2696             : 
    2697      113769 :     if (!PyTuple_Check(p)) {
    2698           4 :         p = PySequence_Tuple(p);
    2699           4 :         if (p == NULL) {
    2700           1 :             return NULL;
    2701             :         }
    2702           3 :         p_allocated = 1;
    2703             :     }
    2704      113768 :     if (!PyTuple_Check(q)) {
    2705           3 :         q = PySequence_Tuple(q);
    2706           3 :         if (q == NULL) {
    2707           0 :             if (p_allocated) {
    2708           0 :                 Py_DECREF(p);
    2709             :             }
    2710           0 :             return NULL;
    2711             :         }
    2712           3 :         q_allocated = 1;
    2713             :     }
    2714             : 
    2715      113768 :     m = PyTuple_GET_SIZE(p);
    2716      113768 :     n = PyTuple_GET_SIZE(q);
    2717      113768 :     if (m != n) {
    2718           2 :         PyErr_SetString(PyExc_ValueError,
    2719             :                         "both points must have the same number of dimensions");
    2720           2 :         return NULL;
    2721             : 
    2722             :     }
    2723      113766 :     if (n > NUM_STACK_ELEMS) {
    2724          30 :         diffs = (double *) PyObject_Malloc(n * sizeof(double));
    2725          30 :         if (diffs == NULL) {
    2726           0 :             return PyErr_NoMemory();
    2727             :         }
    2728             :     }
    2729      455787 :     for (i=0 ; i<n ; i++) {
    2730      342025 :         item = PyTuple_GET_ITEM(p, i);
    2731      342025 :         ASSIGN_DOUBLE(px, item, error_exit);
    2732      342022 :         item = PyTuple_GET_ITEM(q, i);
    2733      342022 :         ASSIGN_DOUBLE(qx, item, error_exit);
    2734      342021 :         x = fabs(px - qx);
    2735      342021 :         diffs[i] = x;
    2736      342021 :         found_nan |= Py_IS_NAN(x);
    2737      342021 :         if (x > max) {
    2738      123948 :             max = x;
    2739             :         }
    2740             :     }
    2741      113762 :     result = vector_norm(n, diffs, max, found_nan);
    2742      113762 :     if (diffs != diffs_on_stack) {
    2743          30 :         PyObject_Free(diffs);
    2744             :     }
    2745      113762 :     if (p_allocated) {
    2746           2 :         Py_DECREF(p);
    2747             :     }
    2748      113762 :     if (q_allocated) {
    2749           2 :         Py_DECREF(q);
    2750             :     }
    2751      113762 :     return PyFloat_FromDouble(result);
    2752             : 
    2753           4 :   error_exit:
    2754           4 :     if (diffs != diffs_on_stack) {
    2755           0 :         PyObject_Free(diffs);
    2756             :     }
    2757           4 :     if (p_allocated) {
    2758           1 :         Py_DECREF(p);
    2759             :     }
    2760           4 :     if (q_allocated) {
    2761           1 :         Py_DECREF(q);
    2762             :     }
    2763           4 :     return NULL;
    2764             : }
    2765             : 
    2766             : /* AC: cannot convert yet, waiting for *args support */
    2767             : static PyObject *
    2768         141 : math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
    2769             : {
    2770             :     Py_ssize_t i;
    2771             :     PyObject *item;
    2772         141 :     double max = 0.0;
    2773             :     double x, result;
    2774         141 :     int found_nan = 0;
    2775             :     double coord_on_stack[NUM_STACK_ELEMS];
    2776         141 :     double *coordinates = coord_on_stack;
    2777             : 
    2778         141 :     if (nargs > NUM_STACK_ELEMS) {
    2779          15 :         coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
    2780          15 :         if (coordinates == NULL) {
    2781           0 :             return PyErr_NoMemory();
    2782             :         }
    2783             :     }
    2784         853 :     for (i = 0; i < nargs; i++) {
    2785         714 :         item = args[i];
    2786         714 :         ASSIGN_DOUBLE(x, item, error_exit);
    2787         712 :         x = fabs(x);
    2788         712 :         coordinates[i] = x;
    2789         712 :         found_nan |= Py_IS_NAN(x);
    2790         712 :         if (x > max) {
    2791         159 :             max = x;
    2792             :         }
    2793             :     }
    2794         139 :     result = vector_norm(nargs, coordinates, max, found_nan);
    2795         139 :     if (coordinates != coord_on_stack) {
    2796          15 :         PyObject_Free(coordinates);
    2797             :     }
    2798         139 :     return PyFloat_FromDouble(result);
    2799             : 
    2800           2 :   error_exit:
    2801           2 :     if (coordinates != coord_on_stack) {
    2802           0 :         PyObject_Free(coordinates);
    2803             :     }
    2804           2 :     return NULL;
    2805             : }
    2806             : 
    2807             : #undef NUM_STACK_ELEMS
    2808             : 
    2809             : PyDoc_STRVAR(math_hypot_doc,
    2810             :              "hypot(*coordinates) -> value\n\n\
    2811             : Multidimensional Euclidean distance from the origin to a point.\n\
    2812             : \n\
    2813             : Roughly equivalent to:\n\
    2814             :     sqrt(sum(x**2 for x in coordinates))\n\
    2815             : \n\
    2816             : For a two dimensional point (x, y), gives the hypotenuse\n\
    2817             : using the Pythagorean theorem:  sqrt(x*x + y*y).\n\
    2818             : \n\
    2819             : For example, the hypotenuse of a 3/4/5 right triangle is:\n\
    2820             : \n\
    2821             :     >>> hypot(3.0, 4.0)\n\
    2822             :     5.0\n\
    2823             : ");
    2824             : 
    2825             : /* pow can't use math_2, but needs its own wrapper: the problem is
    2826             :    that an infinite result can arise either as a result of overflow
    2827             :    (in which case OverflowError should be raised) or as a result of
    2828             :    e.g. 0.**-5. (for which ValueError needs to be raised.)
    2829             : */
    2830             : 
    2831             : /*[clinic input]
    2832             : math.pow
    2833             : 
    2834             :     x: double
    2835             :     y: double
    2836             :     /
    2837             : 
    2838             : Return x**y (x to the power of y).
    2839             : [clinic start generated code]*/
    2840             : 
    2841             : static PyObject *
    2842         120 : math_pow_impl(PyObject *module, double x, double y)
    2843             : /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
    2844             : {
    2845             :     double r;
    2846             :     int odd_y;
    2847             : 
    2848             :     /* deal directly with IEEE specials, to cope with problems on various
    2849             :        platforms whose semantics don't exactly match C99 */
    2850         120 :     r = 0.; /* silence compiler warning */
    2851         120 :     if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
    2852          66 :         errno = 0;
    2853          66 :         if (Py_IS_NAN(x))
    2854           3 :             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
    2855          63 :         else if (Py_IS_NAN(y))
    2856          11 :             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
    2857          52 :         else if (Py_IS_INFINITY(x)) {
    2858          22 :             odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
    2859          22 :             if (y > 0.)
    2860          10 :                 r = odd_y ? x : fabs(x);
    2861          12 :             else if (y == 0.)
    2862           4 :                 r = 1.;
    2863             :             else /* y < 0. */
    2864           8 :                 r = odd_y ? copysign(0., x) : 0.;
    2865             :         }
    2866          30 :         else if (Py_IS_INFINITY(y)) {
    2867          30 :             if (fabs(x) == 1.0)
    2868           8 :                 r = 1.;
    2869          22 :             else if (y > 0. && fabs(x) > 1.0)
    2870           4 :                 r = y;
    2871          18 :             else if (y < 0. && fabs(x) < 1.0) {
    2872           7 :                 r = -y; /* result is +inf */
    2873             :             }
    2874             :             else
    2875          11 :                 r = 0.;
    2876             :         }
    2877             :     }
    2878             :     else {
    2879             :         /* let libm handle finite**finite */
    2880          54 :         errno = 0;
    2881          54 :         r = pow(x, y);
    2882             :         /* a NaN result should arise only from (-ve)**(finite
    2883             :            non-integer); in this case we want to raise ValueError. */
    2884          54 :         if (!Py_IS_FINITE(r)) {
    2885          12 :             if (Py_IS_NAN(r)) {
    2886           6 :                 errno = EDOM;
    2887             :             }
    2888             :             /*
    2889             :                an infinite result here arises either from:
    2890             :                (A) (+/-0.)**negative (-> divide-by-zero)
    2891             :                (B) overflow of x**y with x and y finite
    2892             :             */
    2893           6 :             else if (Py_IS_INFINITY(r)) {
    2894           6 :                 if (x == 0.)
    2895           6 :                     errno = EDOM;
    2896             :                 else
    2897           0 :                     errno = ERANGE;
    2898             :             }
    2899             :         }
    2900             :     }
    2901             : 
    2902         120 :     if (errno && is_error(r))
    2903          12 :         return NULL;
    2904             :     else
    2905         108 :         return PyFloat_FromDouble(r);
    2906             : }
    2907             : 
    2908             : 
    2909             : static const double degToRad = Py_MATH_PI / 180.0;
    2910             : static const double radToDeg = 180.0 / Py_MATH_PI;
    2911             : 
    2912             : /*[clinic input]
    2913             : math.degrees
    2914             : 
    2915             :     x: double
    2916             :     /
    2917             : 
    2918             : Convert angle x from radians to degrees.
    2919             : [clinic start generated code]*/
    2920             : 
    2921             : static PyObject *
    2922          56 : math_degrees_impl(PyObject *module, double x)
    2923             : /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
    2924             : {
    2925          56 :     return PyFloat_FromDouble(x * radToDeg);
    2926             : }
    2927             : 
    2928             : 
    2929             : /*[clinic input]
    2930             : math.radians
    2931             : 
    2932             :     x: double
    2933             :     /
    2934             : 
    2935             : Convert angle x from degrees to radians.
    2936             : [clinic start generated code]*/
    2937             : 
    2938             : static PyObject *
    2939          43 : math_radians_impl(PyObject *module, double x)
    2940             : /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
    2941             : {
    2942          43 :     return PyFloat_FromDouble(x * degToRad);
    2943             : }
    2944             : 
    2945             : 
    2946             : /*[clinic input]
    2947             : math.isfinite
    2948             : 
    2949             :     x: double
    2950             :     /
    2951             : 
    2952             : Return True if x is neither an infinity nor a NaN, and False otherwise.
    2953             : [clinic start generated code]*/
    2954             : 
    2955             : static PyObject *
    2956         163 : math_isfinite_impl(PyObject *module, double x)
    2957             : /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
    2958             : {
    2959         163 :     return PyBool_FromLong((long)Py_IS_FINITE(x));
    2960             : }
    2961             : 
    2962             : 
    2963             : /*[clinic input]
    2964             : math.isnan
    2965             : 
    2966             :     x: double
    2967             :     /
    2968             : 
    2969             : Return True if x is a NaN (not a number), and False otherwise.
    2970             : [clinic start generated code]*/
    2971             : 
    2972             : static PyObject *
    2973      111353 : math_isnan_impl(PyObject *module, double x)
    2974             : /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
    2975             : {
    2976      111353 :     return PyBool_FromLong((long)Py_IS_NAN(x));
    2977             : }
    2978             : 
    2979             : 
    2980             : /*[clinic input]
    2981             : math.isinf
    2982             : 
    2983             :     x: double
    2984             :     /
    2985             : 
    2986             : Return True if x is a positive or negative infinity, and False otherwise.
    2987             : [clinic start generated code]*/
    2988             : 
    2989             : static PyObject *
    2990      247236 : math_isinf_impl(PyObject *module, double x)
    2991             : /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
    2992             : {
    2993      247236 :     return PyBool_FromLong((long)Py_IS_INFINITY(x));
    2994             : }
    2995             : 
    2996             : 
    2997             : /*[clinic input]
    2998             : math.isclose -> bool
    2999             : 
    3000             :     a: double
    3001             :     b: double
    3002             :     *
    3003             :     rel_tol: double = 1e-09
    3004             :         maximum difference for being considered "close", relative to the
    3005             :         magnitude of the input values
    3006             :     abs_tol: double = 0.0
    3007             :         maximum difference for being considered "close", regardless of the
    3008             :         magnitude of the input values
    3009             : 
    3010             : Determine whether two floating point numbers are close in value.
    3011             : 
    3012             : Return True if a is close in value to b, and False otherwise.
    3013             : 
    3014             : For the values to be considered close, the difference between them
    3015             : must be smaller than at least one of the tolerances.
    3016             : 
    3017             : -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
    3018             : is, NaN is not close to anything, even itself.  inf and -inf are
    3019             : only close to themselves.
    3020             : [clinic start generated code]*/
    3021             : 
    3022             : static int
    3023         313 : math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
    3024             :                   double abs_tol)
    3025             : /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
    3026             : {
    3027         313 :     double diff = 0.0;
    3028             : 
    3029             :     /* sanity check on the inputs */
    3030         313 :     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
    3031           2 :         PyErr_SetString(PyExc_ValueError,
    3032             :                         "tolerances must be non-negative");
    3033           2 :         return -1;
    3034             :     }
    3035             : 
    3036         311 :     if ( a == b ) {
    3037             :         /* short circuit exact equality -- needed to catch two infinities of
    3038             :            the same sign. And perhaps speeds things up a bit sometimes.
    3039             :         */
    3040         211 :         return 1;
    3041             :     }
    3042             : 
    3043             :     /* This catches the case of two infinities of opposite sign, or
    3044             :        one infinity and one finite number. Two infinities of opposite
    3045             :        sign would otherwise have an infinite relative tolerance.
    3046             :        Two infinities of the same sign are caught by the equality check
    3047             :        above.
    3048             :     */
    3049             : 
    3050         100 :     if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
    3051           7 :         return 0;
    3052             :     }
    3053             : 
    3054             :     /* now do the regular computation
    3055             :        this is essentially the "weak" test from the Boost library
    3056             :     */
    3057             : 
    3058          93 :     diff = fabs(b - a);
    3059             : 
    3060         128 :     return (((diff <= fabs(rel_tol * b)) ||
    3061          93 :              (diff <= fabs(rel_tol * a))) ||
    3062             :             (diff <= abs_tol));
    3063             : }
    3064             : 
    3065             : static inline int
    3066         142 : _check_long_mult_overflow(long a, long b) {
    3067             : 
    3068             :     /* From Python2's int_mul code:
    3069             : 
    3070             :     Integer overflow checking for * is painful:  Python tried a couple ways, but
    3071             :     they didn't work on all platforms, or failed in endcases (a product of
    3072             :     -sys.maxint-1 has been a particular pain).
    3073             : 
    3074             :     Here's another way:
    3075             : 
    3076             :     The native long product x*y is either exactly right or *way* off, being
    3077             :     just the last n bits of the true product, where n is the number of bits
    3078             :     in a long (the delivered product is the true product plus i*2**n for
    3079             :     some integer i).
    3080             : 
    3081             :     The native double product (double)x * (double)y is subject to three
    3082             :     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
    3083             :     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
    3084             :     But, unlike the native long product, it's not in *range* trouble:  even
    3085             :     if sizeof(long)==32 (256-bit longs), the product easily fits in the
    3086             :     dynamic range of a double.  So the leading 50 (or so) bits of the double
    3087             :     product are correct.
    3088             : 
    3089             :     We check these two ways against each other, and declare victory if they're
    3090             :     approximately the same.  Else, because the native long product is the only
    3091             :     one that can lose catastrophic amounts of information, it's the native long
    3092             :     product that must have overflowed.
    3093             : 
    3094             :     */
    3095             : 
    3096         142 :     long longprod = (long)((unsigned long)a * b);
    3097         142 :     double doubleprod = (double)a * (double)b;
    3098         142 :     double doubled_longprod = (double)longprod;
    3099             : 
    3100         142 :     if (doubled_longprod == doubleprod) {
    3101         138 :         return 0;
    3102             :     }
    3103             : 
    3104           4 :     const double diff = doubled_longprod - doubleprod;
    3105           4 :     const double absdiff = diff >= 0.0 ? diff : -diff;
    3106           4 :     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
    3107             : 
    3108           4 :     if (32.0 * absdiff <= absprod) {
    3109           0 :         return 0;
    3110             :     }
    3111             : 
    3112           4 :     return 1;
    3113             : }
    3114             : 
    3115             : /*[clinic input]
    3116             : math.prod
    3117             : 
    3118             :     iterable: object
    3119             :     /
    3120             :     *
    3121             :     start: object(c_default="NULL") = 1
    3122             : 
    3123             : Calculate the product of all the elements in the input iterable.
    3124             : 
    3125             : The default start value for the product is 1.
    3126             : 
    3127             : When the iterable is empty, return the start value.  This function is
    3128             : intended specifically for use with numeric values and may reject
    3129             : non-numeric types.
    3130             : [clinic start generated code]*/
    3131             : 
    3132             : static PyObject *
    3133          60 : math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
    3134             : /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
    3135             : {
    3136          60 :     PyObject *result = start;
    3137             :     PyObject *temp, *item, *iter;
    3138             : 
    3139          60 :     iter = PyObject_GetIter(iterable);
    3140          60 :     if (iter == NULL) {
    3141           1 :         return NULL;
    3142             :     }
    3143             : 
    3144          59 :     if (result == NULL) {
    3145          48 :         result = _PyLong_GetOne();
    3146             :     }
    3147          59 :     Py_INCREF(result);
    3148             : #ifndef SLOW_PROD
    3149             :     /* Fast paths for integers keeping temporary products in C.
    3150             :      * Assumes all inputs are the same type.
    3151             :      * If the assumption fails, default to use PyObjects instead.
    3152             :     */
    3153          59 :     if (PyLong_CheckExact(result)) {
    3154             :         int overflow;
    3155          50 :         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
    3156             :         /* If this already overflowed, don't even enter the loop. */
    3157          50 :         if (overflow == 0) {
    3158          50 :             Py_DECREF(result);
    3159          50 :             result = NULL;
    3160             :         }
    3161             :         /* Loop over all the items in the iterable until we finish, we overflow
    3162             :          * or we found a non integer element */
    3163         225 :         while (result == NULL) {
    3164         188 :             item = PyIter_Next(iter);
    3165         188 :             if (item == NULL) {
    3166          12 :                 Py_DECREF(iter);
    3167          12 :                 if (PyErr_Occurred()) {
    3168          13 :                     return NULL;
    3169             :                 }
    3170          12 :                 return PyLong_FromLong(i_result);
    3171             :             }
    3172         176 :             if (PyLong_CheckExact(item)) {
    3173         142 :                 long b = PyLong_AsLongAndOverflow(item, &overflow);
    3174         142 :                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
    3175         138 :                     long x = i_result * b;
    3176         138 :                     i_result = x;
    3177         138 :                     Py_DECREF(item);
    3178         138 :                     continue;
    3179             :                 }
    3180             :             }
    3181             :             /* Either overflowed or is not an int.
    3182             :              * Restore real objects and process normally */
    3183          38 :             result = PyLong_FromLong(i_result);
    3184          38 :             if (result == NULL) {
    3185           0 :                 Py_DECREF(item);
    3186           0 :                 Py_DECREF(iter);
    3187           0 :                 return NULL;
    3188             :             }
    3189          38 :             temp = PyNumber_Multiply(result, item);
    3190          38 :             Py_DECREF(result);
    3191          38 :             Py_DECREF(item);
    3192          38 :             result = temp;
    3193          38 :             if (result == NULL) {
    3194           1 :                 Py_DECREF(iter);
    3195           1 :                 return NULL;
    3196             :             }
    3197             :         }
    3198             :     }
    3199             : 
    3200             :     /* Fast paths for floats keeping temporary products in C.
    3201             :      * Assumes all inputs are the same type.
    3202             :      * If the assumption fails, default to use PyObjects instead.
    3203             :     */
    3204          46 :     if (PyFloat_CheckExact(result)) {
    3205          22 :         double f_result = PyFloat_AS_DOUBLE(result);
    3206          22 :         Py_DECREF(result);
    3207          22 :         result = NULL;
    3208       14061 :         while(result == NULL) {
    3209       14061 :             item = PyIter_Next(iter);
    3210       14061 :             if (item == NULL) {
    3211          22 :                 Py_DECREF(iter);
    3212          22 :                 if (PyErr_Occurred()) {
    3213           0 :                     return NULL;
    3214             :                 }
    3215          22 :                 return PyFloat_FromDouble(f_result);
    3216             :             }
    3217       14039 :             if (PyFloat_CheckExact(item)) {
    3218        4007 :                 f_result *= PyFloat_AS_DOUBLE(item);
    3219        4007 :                 Py_DECREF(item);
    3220        4007 :                 continue;
    3221             :             }
    3222       10032 :             if (PyLong_CheckExact(item)) {
    3223             :                 long value;
    3224             :                 int overflow;
    3225       10032 :                 value = PyLong_AsLongAndOverflow(item, &overflow);
    3226       10032 :                 if (!overflow) {
    3227       10032 :                     f_result *= (double)value;
    3228       10032 :                     Py_DECREF(item);
    3229       10032 :                     continue;
    3230             :                 }
    3231             :             }
    3232           0 :             result = PyFloat_FromDouble(f_result);
    3233           0 :             if (result == NULL) {
    3234           0 :                 Py_DECREF(item);
    3235           0 :                 Py_DECREF(iter);
    3236           0 :                 return NULL;
    3237             :             }
    3238           0 :             temp = PyNumber_Multiply(result, item);
    3239           0 :             Py_DECREF(result);
    3240           0 :             Py_DECREF(item);
    3241           0 :             result = temp;
    3242           0 :             if (result == NULL) {
    3243           0 :                 Py_DECREF(iter);
    3244           0 :                 return NULL;
    3245             :             }
    3246             :         }
    3247             :     }
    3248             : #endif
    3249             :     /* Consume rest of the iterable (if any) that could not be handled
    3250             :      * by specialized functions above.*/
    3251             :     for(;;) {
    3252       55403 :         item = PyIter_Next(iter);
    3253       55403 :         if (item == NULL) {
    3254             :             /* error, or end-of-sequence */
    3255          17 :             if (PyErr_Occurred()) {
    3256           0 :                 Py_DECREF(result);
    3257           0 :                 result = NULL;
    3258             :             }
    3259          17 :             break;
    3260             :         }
    3261       55386 :         temp = PyNumber_Multiply(result, item);
    3262       55386 :         Py_DECREF(result);
    3263       55386 :         Py_DECREF(item);
    3264       55386 :         result = temp;
    3265       55386 :         if (result == NULL)
    3266           7 :             break;
    3267             :     }
    3268          24 :     Py_DECREF(iter);
    3269          24 :     return result;
    3270             : }
    3271             : 
    3272             : 
    3273             : /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
    3274             : 
    3275             : Python code to generate the values:
    3276             : 
    3277             :     import math
    3278             : 
    3279             :     for n in range(128):
    3280             :         fac = math.factorial(n)
    3281             :         fac_odd_part = fac // (fac & -fac)
    3282             :         reduced_fac_odd_part = fac_odd_part % (2**64)
    3283             :         print(f"{reduced_fac_odd_part:#018x}u")
    3284             : */
    3285             : static const uint64_t reduced_factorial_odd_part[] = {
    3286             :     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
    3287             :     0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
    3288             :     0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
    3289             :     0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
    3290             :     0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
    3291             :     0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
    3292             :     0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
    3293             :     0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
    3294             :     0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
    3295             :     0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
    3296             :     0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
    3297             :     0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
    3298             :     0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
    3299             :     0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
    3300             :     0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
    3301             :     0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
    3302             :     0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
    3303             :     0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
    3304             :     0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
    3305             :     0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
    3306             :     0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
    3307             :     0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
    3308             :     0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
    3309             :     0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
    3310             :     0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
    3311             :     0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
    3312             :     0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
    3313             :     0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
    3314             :     0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
    3315             :     0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
    3316             :     0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
    3317             :     0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
    3318             : };
    3319             : 
    3320             : /* inverses of reduced_factorial_odd_part values modulo 2**64.
    3321             : 
    3322             : Python code to generate the values:
    3323             : 
    3324             :     import math
    3325             : 
    3326             :     for n in range(128):
    3327             :         fac = math.factorial(n)
    3328             :         fac_odd_part = fac // (fac & -fac)
    3329             :         inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
    3330             :         print(f"{inverted_fac_odd_part:#018x}u")
    3331             : */
    3332             : static const uint64_t inverted_factorial_odd_part[] = {
    3333             :     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
    3334             :     0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
    3335             :     0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
    3336             :     0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
    3337             :     0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
    3338             :     0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
    3339             :     0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
    3340             :     0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
    3341             :     0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
    3342             :     0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
    3343             :     0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
    3344             :     0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
    3345             :     0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
    3346             :     0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
    3347             :     0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
    3348             :     0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
    3349             :     0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
    3350             :     0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
    3351             :     0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
    3352             :     0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
    3353             :     0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
    3354             :     0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
    3355             :     0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
    3356             :     0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
    3357             :     0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
    3358             :     0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
    3359             :     0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
    3360             :     0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
    3361             :     0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
    3362             :     0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
    3363             :     0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
    3364             :     0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
    3365             : };
    3366             : 
    3367             : /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
    3368             : 
    3369             : Python code to generate the values:
    3370             : 
    3371             : import math
    3372             : 
    3373             : for n in range(128):
    3374             :     fac = math.factorial(n)
    3375             :     fac_trailing_zeros = (fac & -fac).bit_length() - 1
    3376             :     print(fac_trailing_zeros)
    3377             : */
    3378             : 
    3379             : static const uint8_t factorial_trailing_zeros[] = {
    3380             :      0,  0,  1,  1,  3,  3,  4,  4,  7,  7,  8,  8, 10, 10, 11, 11,  //  0-15
    3381             :     15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26,  // 16-31
    3382             :     31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42,  // 32-47
    3383             :     46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57,  // 48-63
    3384             :     63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74,  // 64-79
    3385             :     78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89,  // 80-95
    3386             :     94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105,  // 96-111
    3387             :     109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120,  // 112-127
    3388             : };
    3389             : 
    3390             : /* Number of permutations and combinations.
    3391             :  * P(n, k) = n! / (n-k)!
    3392             :  * C(n, k) = P(n, k) / k!
    3393             :  */
    3394             : 
    3395             : /* Calculate C(n, k) for n in the 63-bit range. */
    3396             : static PyObject *
    3397      214430 : perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
    3398             : {
    3399      214430 :     if (k == 0) {
    3400           0 :         return PyLong_FromLong(1);
    3401             :     }
    3402             : 
    3403             :     /* For small enough n and k the result fits in the 64-bit range and can
    3404             :      * be calculated without allocating intermediate PyLong objects. */
    3405      214430 :     if (iscomb) {
    3406             :         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
    3407             :          * fits into a uint64_t.  Exclude k = 1, because the second fast
    3408             :          * path is faster for this case.*/
    3409             :         static const unsigned char fast_comb_limits1[] = {
    3410             :             0, 0, 127, 127, 127, 127, 127, 127,  // 0-7
    3411             :             127, 127, 127, 127, 127, 127, 127, 127,  // 8-15
    3412             :             116, 105, 97, 91, 86, 82, 78, 76,  // 16-23
    3413             :             74, 72, 71, 70, 69, 68, 68, 67,  // 24-31
    3414             :             67, 67, 67,  // 32-34
    3415             :         };
    3416       56869 :         if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
    3417             :             /*
    3418             :                 comb(n, k) fits into a uint64_t. We compute it as
    3419             : 
    3420             :                     comb_odd_part << shift
    3421             : 
    3422             :                 where 2**shift is the largest power of two dividing comb(n, k)
    3423             :                 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
    3424             :                 calculated efficiently via arithmetic modulo 2**64, using three
    3425             :                 lookups and two uint64_t multiplications.
    3426             :             */
    3427       41247 :             uint64_t comb_odd_part = reduced_factorial_odd_part[n]
    3428       41247 :                                    * inverted_factorial_odd_part[k]
    3429       41247 :                                    * inverted_factorial_odd_part[n - k];
    3430       41247 :             int shift = factorial_trailing_zeros[n]
    3431       41247 :                       - factorial_trailing_zeros[k]
    3432       41247 :                       - factorial_trailing_zeros[n - k];
    3433       41247 :             return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
    3434             :         }
    3435             : 
    3436             :         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
    3437             :          * fits into a long long (which is at least 64 bit).  Only contains
    3438             :          * items larger than in fast_comb_limits1. */
    3439             :         static const unsigned long long fast_comb_limits2[] = {
    3440             :             0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449,  // 0-7
    3441             :             746, 453, 308, 227, 178, 147,  // 8-13
    3442             :         };
    3443       15622 :         if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
    3444             :             /* C(n, k) = C(n, k-1) * (n-k+1) / k */
    3445        6049 :             unsigned long long result = n;
    3446       42266 :             for (unsigned long long i = 1; i < k;) {
    3447       36217 :                 result *= --n;
    3448       36217 :                 result /= ++i;
    3449             :             }
    3450        6049 :             return PyLong_FromUnsignedLongLong(result);
    3451             :         }
    3452             :     }
    3453             :     else {
    3454             :         /* Maps k to the maximal n so that k <= n and P(n, k)
    3455             :          * fits into a long long (which is at least 64 bit). */
    3456             :         static const unsigned long long fast_perm_limits[] = {
    3457             :             0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568,  // 0-7
    3458             :             259, 142, 88, 61, 45, 36, 30, 26,  // 8-15
    3459             :             24, 22, 21, 20, 20,  // 16-20
    3460             :         };
    3461      157561 :         if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
    3462       90938 :             if (n <= 127) {
    3463             :                 /* P(n, k) fits into a uint64_t. */
    3464       83204 :                 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
    3465       83204 :                                        * inverted_factorial_odd_part[n - k];
    3466       83204 :                 int shift = factorial_trailing_zeros[n]
    3467       83204 :                           - factorial_trailing_zeros[n - k];
    3468       83204 :                 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
    3469             :             }
    3470             : 
    3471             :             /* P(n, k) = P(n, k-1) * (n-k+1) */
    3472        7734 :             unsigned long long result = n;
    3473       41599 :             for (unsigned long long i = 1; i < k;) {
    3474       33865 :                 result *= --n;
    3475       33865 :                 ++i;
    3476             :             }
    3477        7734 :             return PyLong_FromUnsignedLongLong(result);
    3478             :         }
    3479             :     }
    3480             : 
    3481             :     /* For larger n use recursive formulas:
    3482             :      *
    3483             :      *   P(n, k) = P(n, j) * P(n-j, k-j)
    3484             :      *   C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
    3485             :      */
    3486       76196 :     unsigned long long j = k / 2;
    3487             :     PyObject *a, *b;
    3488       76196 :     a = perm_comb_small(n, j, iscomb);
    3489       76196 :     if (a == NULL) {
    3490           0 :         return NULL;
    3491             :     }
    3492       76196 :     b = perm_comb_small(n - j, k - j, iscomb);
    3493       76196 :     if (b == NULL) {
    3494           0 :         goto error;
    3495             :     }
    3496       76196 :     Py_SETREF(a, PyNumber_Multiply(a, b));
    3497       76196 :     Py_DECREF(b);
    3498       76196 :     if (iscomb && a != NULL) {
    3499        9573 :         b = perm_comb_small(k, j, 1);
    3500        9573 :         if (b == NULL) {
    3501           0 :             goto error;
    3502             :         }
    3503        9573 :         Py_SETREF(a, PyNumber_FloorDivide(a, b));
    3504        9573 :         Py_DECREF(b);
    3505             :     }
    3506       76196 :     return a;
    3507             : 
    3508           0 : error:
    3509           0 :     Py_DECREF(a);
    3510           0 :     return NULL;
    3511             : }
    3512             : 
    3513             : /* Calculate P(n, k) or C(n, k) using recursive formulas.
    3514             :  * It is more efficient than sequential multiplication thanks to
    3515             :  * Karatsuba multiplication.
    3516             :  */
    3517             : static PyObject *
    3518        4381 : perm_comb(PyObject *n, unsigned long long k, int iscomb)
    3519             : {
    3520        4381 :     if (k == 0) {
    3521        1907 :         return PyLong_FromLong(1);
    3522             :     }
    3523        2474 :     if (k == 1) {
    3524        2471 :         Py_INCREF(n);
    3525        2471 :         return n;
    3526             :     }
    3527             : 
    3528             :     /* P(n, k) = P(n, j) * P(n-j, k-j) */
    3529             :     /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
    3530           3 :     unsigned long long j = k / 2;
    3531             :     PyObject *a, *b;
    3532           3 :     a = perm_comb(n, j, iscomb);
    3533           3 :     if (a == NULL) {
    3534           0 :         return NULL;
    3535             :     }
    3536           3 :     PyObject *t = PyLong_FromUnsignedLongLong(j);
    3537           3 :     if (t == NULL) {
    3538           0 :         goto error;
    3539             :     }
    3540           3 :     n = PyNumber_Subtract(n, t);
    3541           3 :     Py_DECREF(t);
    3542           3 :     if (n == NULL) {
    3543           0 :         goto error;
    3544             :     }
    3545           3 :     b = perm_comb(n, k - j, iscomb);
    3546           3 :     Py_DECREF(n);
    3547           3 :     if (b == NULL) {
    3548           0 :         goto error;
    3549             :     }
    3550           3 :     Py_SETREF(a, PyNumber_Multiply(a, b));
    3551           3 :     Py_DECREF(b);
    3552           3 :     if (iscomb && a != NULL) {
    3553           2 :         b = perm_comb_small(k, j, 1);
    3554           2 :         if (b == NULL) {
    3555           0 :             goto error;
    3556             :         }
    3557           2 :         Py_SETREF(a, PyNumber_FloorDivide(a, b));
    3558           2 :         Py_DECREF(b);
    3559             :     }
    3560           3 :     return a;
    3561             : 
    3562           0 : error:
    3563           0 :     Py_DECREF(a);
    3564           0 :     return NULL;
    3565             : }
    3566             : 
    3567             : /*[clinic input]
    3568             : math.perm
    3569             : 
    3570             :     n: object
    3571             :     k: object = None
    3572             :     /
    3573             : 
    3574             : Number of ways to choose k items from n items without repetition and with order.
    3575             : 
    3576             : Evaluates to n! / (n - k)! when k <= n and evaluates
    3577             : to zero when k > n.
    3578             : 
    3579             : If k is not specified or is None, then k defaults to n
    3580             : and the function returns n!.
    3581             : 
    3582             : Raises TypeError if either of the arguments are not integers.
    3583             : Raises ValueError if either of the arguments are negative.
    3584             : [clinic start generated code]*/
    3585             : 
    3586             : static PyObject *
    3587       25970 : math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
    3588             : /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
    3589             : {
    3590       25970 :     PyObject *result = NULL;
    3591             :     int overflow, cmp;
    3592             :     long long ki, ni;
    3593             : 
    3594       25970 :     if (k == Py_None) {
    3595          40 :         return math_factorial(module, n);
    3596             :     }
    3597       25930 :     n = PyNumber_Index(n);
    3598       25930 :     if (n == NULL) {
    3599           3 :         return NULL;
    3600             :     }
    3601       25927 :     k = PyNumber_Index(k);
    3602       25927 :     if (k == NULL) {
    3603           3 :         Py_DECREF(n);
    3604           3 :         return NULL;
    3605             :     }
    3606       25924 :     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
    3607             : 
    3608       25924 :     if (Py_SIZE(n) < 0) {
    3609           2 :         PyErr_SetString(PyExc_ValueError,
    3610             :                         "n must be a non-negative integer");
    3611           2 :         goto error;
    3612             :     }
    3613       25922 :     if (Py_SIZE(k) < 0) {
    3614           2 :         PyErr_SetString(PyExc_ValueError,
    3615             :                         "k must be a non-negative integer");
    3616           2 :         goto error;
    3617             :     }
    3618             : 
    3619       25920 :     cmp = PyObject_RichCompareBool(n, k, Py_LT);
    3620       25920 :     if (cmp != 0) {
    3621           2 :         if (cmp > 0) {
    3622           2 :             result = PyLong_FromLong(0);
    3623           2 :             goto done;
    3624             :         }
    3625           0 :         goto error;
    3626             :     }
    3627             : 
    3628       25918 :     ki = PyLong_AsLongLongAndOverflow(k, &overflow);
    3629       25918 :     assert(overflow >= 0 && !PyErr_Occurred());
    3630       25918 :     if (overflow > 0) {
    3631           1 :         PyErr_Format(PyExc_OverflowError,
    3632             :                      "k must not exceed %lld",
    3633             :                      LLONG_MAX);
    3634           1 :         goto error;
    3635             :     }
    3636       25917 :     assert(ki >= 0);
    3637             : 
    3638       25917 :     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
    3639       25917 :     assert(overflow >= 0 && !PyErr_Occurred());
    3640       25917 :     if (!overflow && ki > 1) {
    3641       24315 :         assert(ni >= 0);
    3642       24315 :         result = perm_comb_small((unsigned long long)ni,
    3643             :                                  (unsigned long long)ki, 0);
    3644             :     }
    3645             :     else {
    3646        1602 :         result = perm_comb(n, (unsigned long long)ki, 0);
    3647             :     }
    3648             : 
    3649       25919 : done:
    3650       25919 :     Py_DECREF(n);
    3651       25919 :     Py_DECREF(k);
    3652       25919 :     return result;
    3653             : 
    3654           5 : error:
    3655           5 :     Py_DECREF(n);
    3656           5 :     Py_DECREF(k);
    3657           5 :     return NULL;
    3658             : }
    3659             : 
    3660             : /*[clinic input]
    3661             : math.comb
    3662             : 
    3663             :     n: object
    3664             :     k: object
    3665             :     /
    3666             : 
    3667             : Number of ways to choose k items from n items without repetition and without order.
    3668             : 
    3669             : Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
    3670             : to zero when k > n.
    3671             : 
    3672             : Also called the binomial coefficient because it is equivalent
    3673             : to the coefficient of k-th term in polynomial expansion of the
    3674             : expression (1 + x)**n.
    3675             : 
    3676             : Raises TypeError if either of the arguments are not integers.
    3677             : Raises ValueError if either of the arguments are negative.
    3678             : 
    3679             : [clinic start generated code]*/
    3680             : 
    3681             : static PyObject *
    3682       30934 : math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
    3683             : /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
    3684             : {
    3685       30934 :     PyObject *result = NULL, *temp;
    3686             :     int overflow, cmp;
    3687             :     long long ki, ni;
    3688             : 
    3689       30934 :     n = PyNumber_Index(n);
    3690       30934 :     if (n == NULL) {
    3691           3 :         return NULL;
    3692             :     }
    3693       30931 :     k = PyNumber_Index(k);
    3694       30931 :     if (k == NULL) {
    3695           3 :         Py_DECREF(n);
    3696           3 :         return NULL;
    3697             :     }
    3698       30928 :     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
    3699             : 
    3700       30928 :     if (Py_SIZE(n) < 0) {
    3701           2 :         PyErr_SetString(PyExc_ValueError,
    3702             :                         "n must be a non-negative integer");
    3703           2 :         goto error;
    3704             :     }
    3705       30926 :     if (Py_SIZE(k) < 0) {
    3706           2 :         PyErr_SetString(PyExc_ValueError,
    3707             :                         "k must be a non-negative integer");
    3708           2 :         goto error;
    3709             :     }
    3710             : 
    3711       30924 :     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
    3712       30924 :     assert(overflow >= 0 && !PyErr_Occurred());
    3713       30924 :     if (!overflow) {
    3714       30917 :         assert(ni >= 0);
    3715       30917 :         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
    3716       30917 :         assert(overflow >= 0 && !PyErr_Occurred());
    3717       30917 :         if (overflow || ki > ni) {
    3718           2 :             result = PyLong_FromLong(0);
    3719           2 :             goto done;
    3720             :         }
    3721       30915 :         assert(ki >= 0);
    3722             : 
    3723       30915 :         ki = Py_MIN(ki, ni - ki);
    3724       30915 :         if (ki > 1) {
    3725       28148 :             result = perm_comb_small((unsigned long long)ni,
    3726             :                                      (unsigned long long)ki, 1);
    3727       28148 :             goto done;
    3728             :         }
    3729             :         /* For k == 1 just return the original n in perm_comb(). */
    3730             :     }
    3731             :     else {
    3732             :         /* k = min(k, n - k) */
    3733           7 :         temp = PyNumber_Subtract(n, k);
    3734           7 :         if (temp == NULL) {
    3735           0 :             goto error;
    3736             :         }
    3737           7 :         if (Py_SIZE(temp) < 0) {
    3738           0 :             Py_DECREF(temp);
    3739           0 :             result = PyLong_FromLong(0);
    3740           0 :             goto done;
    3741             :         }
    3742           7 :         cmp = PyObject_RichCompareBool(temp, k, Py_LT);
    3743           7 :         if (cmp > 0) {
    3744           3 :             Py_SETREF(k, temp);
    3745             :         }
    3746             :         else {
    3747           4 :             Py_DECREF(temp);
    3748           4 :             if (cmp < 0) {
    3749           0 :                 goto error;
    3750             :             }
    3751             :         }
    3752             : 
    3753           7 :         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
    3754           7 :         assert(overflow >= 0 && !PyErr_Occurred());
    3755           7 :         if (overflow) {
    3756           1 :             PyErr_Format(PyExc_OverflowError,
    3757             :                          "min(n - k, k) must not exceed %lld",
    3758             :                          LLONG_MAX);
    3759           1 :             goto error;
    3760             :         }
    3761           6 :         assert(ki >= 0);
    3762             :     }
    3763             : 
    3764        2773 :     result = perm_comb(n, (unsigned long long)ki, 1);
    3765             : 
    3766       30923 : done:
    3767       30923 :     Py_DECREF(n);
    3768       30923 :     Py_DECREF(k);
    3769       30923 :     return result;
    3770             : 
    3771           5 : error:
    3772           5 :     Py_DECREF(n);
    3773           5 :     Py_DECREF(k);
    3774           5 :     return NULL;
    3775             : }
    3776             : 
    3777             : 
    3778             : /*[clinic input]
    3779             : math.nextafter
    3780             : 
    3781             :     x: double
    3782             :     y: double
    3783             :     /
    3784             : 
    3785             : Return the next floating-point value after x towards y.
    3786             : [clinic start generated code]*/
    3787             : 
    3788             : static PyObject *
    3789      117415 : math_nextafter_impl(PyObject *module, double x, double y)
    3790             : /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
    3791             : {
    3792             : #if defined(_AIX)
    3793             :     if (x == y) {
    3794             :         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
    3795             :            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
    3796             :         return PyFloat_FromDouble(y);
    3797             :     }
    3798             :     if (Py_IS_NAN(x)) {
    3799             :         return PyFloat_FromDouble(x);
    3800             :     }
    3801             :     if (Py_IS_NAN(y)) {
    3802             :         return PyFloat_FromDouble(y);
    3803             :     }
    3804             : #endif
    3805      117415 :     return PyFloat_FromDouble(nextafter(x, y));
    3806             : }
    3807             : 
    3808             : 
    3809             : /*[clinic input]
    3810             : math.ulp -> double
    3811             : 
    3812             :     x: double
    3813             :     /
    3814             : 
    3815             : Return the value of the least significant bit of the float x.
    3816             : [clinic start generated code]*/
    3817             : 
    3818             : static double
    3819          21 : math_ulp_impl(PyObject *module, double x)
    3820             : /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
    3821             : {
    3822          21 :     if (Py_IS_NAN(x)) {
    3823           1 :         return x;
    3824             :     }
    3825          20 :     x = fabs(x);
    3826          20 :     if (Py_IS_INFINITY(x)) {
    3827           3 :         return x;
    3828             :     }
    3829          17 :     double inf = m_inf();
    3830          17 :     double x2 = nextafter(x, inf);
    3831          17 :     if (Py_IS_INFINITY(x2)) {
    3832             :         /* special case: x is the largest positive representable float */
    3833           1 :         x2 = nextafter(x, -inf);
    3834           1 :         return x - x2;
    3835             :     }
    3836          16 :     return x2 - x;
    3837             : }
    3838             : 
    3839             : static int
    3840        1308 : math_exec(PyObject *module)
    3841             : {
    3842             : 
    3843        1308 :     math_module_state *state = get_math_module_state(module);
    3844        1308 :     state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
    3845        1308 :     if (state->str___ceil__ == NULL) {
    3846           0 :         return -1;
    3847             :     }
    3848        1308 :     state->str___floor__ = PyUnicode_InternFromString("__floor__");
    3849        1308 :     if (state->str___floor__ == NULL) {
    3850           0 :         return -1;
    3851             :     }
    3852        1308 :     state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
    3853        1308 :     if (state->str___trunc__ == NULL) {
    3854           0 :         return -1;
    3855             :     }
    3856        1308 :     if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
    3857           0 :         return -1;
    3858             :     }
    3859        1308 :     if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
    3860           0 :         return -1;
    3861             :     }
    3862             :     // 2pi
    3863        1308 :     if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
    3864           0 :         return -1;
    3865             :     }
    3866        1308 :     if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
    3867           0 :         return -1;
    3868             :     }
    3869             : #if _PY_SHORT_FLOAT_REPR == 1
    3870        1308 :     if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
    3871           0 :         return -1;
    3872             :     }
    3873             : #endif
    3874        1308 :     return 0;
    3875             : }
    3876             : 
    3877             : static int
    3878        1510 : math_clear(PyObject *module)
    3879             : {
    3880        1510 :     math_module_state *state = get_math_module_state(module);
    3881        1510 :     Py_CLEAR(state->str___ceil__);
    3882        1510 :     Py_CLEAR(state->str___floor__);
    3883        1510 :     Py_CLEAR(state->str___trunc__);
    3884        1510 :     return 0;
    3885             : }
    3886             : 
    3887             : static void
    3888        1308 : math_free(void *module)
    3889             : {
    3890        1308 :     math_clear((PyObject *)module);
    3891        1308 : }
    3892             : 
    3893             : static PyMethodDef math_methods[] = {
    3894             :     {"acos",            math_acos,      METH_O,         math_acos_doc},
    3895             :     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
    3896             :     {"asin",            math_asin,      METH_O,         math_asin_doc},
    3897             :     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
    3898             :     {"atan",            math_atan,      METH_O,         math_atan_doc},
    3899             :     {"atan2",           _PyCFunction_CAST(math_atan2),     METH_FASTCALL,  math_atan2_doc},
    3900             :     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
    3901             :     {"cbrt",            math_cbrt,      METH_O,         math_cbrt_doc},
    3902             :     MATH_CEIL_METHODDEF
    3903             :     {"copysign",        _PyCFunction_CAST(math_copysign),  METH_FASTCALL,  math_copysign_doc},
    3904             :     {"cos",             math_cos,       METH_O,         math_cos_doc},
    3905             :     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
    3906             :     MATH_DEGREES_METHODDEF
    3907             :     MATH_DIST_METHODDEF
    3908             :     {"erf",             math_erf,       METH_O,         math_erf_doc},
    3909             :     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
    3910             :     {"exp",             math_exp,       METH_O,         math_exp_doc},
    3911             :     {"exp2",            math_exp2,      METH_O,         math_exp2_doc},
    3912             :     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
    3913             :     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
    3914             :     MATH_FACTORIAL_METHODDEF
    3915             :     MATH_FLOOR_METHODDEF
    3916             :     MATH_FMOD_METHODDEF
    3917             :     MATH_FREXP_METHODDEF
    3918             :     MATH_FSUM_METHODDEF
    3919             :     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
    3920             :     {"gcd",             _PyCFunction_CAST(math_gcd),       METH_FASTCALL,  math_gcd_doc},
    3921             :     {"hypot",           _PyCFunction_CAST(math_hypot),     METH_FASTCALL,  math_hypot_doc},
    3922             :     MATH_ISCLOSE_METHODDEF
    3923             :     MATH_ISFINITE_METHODDEF
    3924             :     MATH_ISINF_METHODDEF
    3925             :     MATH_ISNAN_METHODDEF
    3926             :     MATH_ISQRT_METHODDEF
    3927             :     {"lcm",             _PyCFunction_CAST(math_lcm),       METH_FASTCALL,  math_lcm_doc},
    3928             :     MATH_LDEXP_METHODDEF
    3929             :     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
    3930             :     MATH_LOG_METHODDEF
    3931             :     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
    3932             :     MATH_LOG10_METHODDEF
    3933             :     MATH_LOG2_METHODDEF
    3934             :     MATH_MODF_METHODDEF
    3935             :     MATH_POW_METHODDEF
    3936             :     MATH_RADIANS_METHODDEF
    3937             :     {"remainder",       _PyCFunction_CAST(math_remainder), METH_FASTCALL,  math_remainder_doc},
    3938             :     {"sin",             math_sin,       METH_O,         math_sin_doc},
    3939             :     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
    3940             :     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
    3941             :     {"tan",             math_tan,       METH_O,         math_tan_doc},
    3942             :     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
    3943             :     MATH_TRUNC_METHODDEF
    3944             :     MATH_PROD_METHODDEF
    3945             :     MATH_PERM_METHODDEF
    3946             :     MATH_COMB_METHODDEF
    3947             :     MATH_NEXTAFTER_METHODDEF
    3948             :     MATH_ULP_METHODDEF
    3949             :     {NULL,              NULL}           /* sentinel */
    3950             : };
    3951             : 
    3952             : static PyModuleDef_Slot math_slots[] = {
    3953             :     {Py_mod_exec, math_exec},
    3954             :     {0, NULL}
    3955             : };
    3956             : 
    3957             : PyDoc_STRVAR(module_doc,
    3958             : "This module provides access to the mathematical functions\n"
    3959             : "defined by the C standard.");
    3960             : 
    3961             : static struct PyModuleDef mathmodule = {
    3962             :     PyModuleDef_HEAD_INIT,
    3963             :     .m_name = "math",
    3964             :     .m_doc = module_doc,
    3965             :     .m_size = sizeof(math_module_state),
    3966             :     .m_methods = math_methods,
    3967             :     .m_slots = math_slots,
    3968             :     .m_clear = math_clear,
    3969             :     .m_free = math_free,
    3970             : };
    3971             : 
    3972             : PyMODINIT_FUNC
    3973        1308 : PyInit_math(void)
    3974             : {
    3975        1308 :     return PyModuleDef_Init(&mathmodule);
    3976             : }

Generated by: LCOV version 1.14