/home/mdboom/Work/builds/cpython/Include/internal/pycore_atomic.h

Source
#ifndef Py_ATOMIC_H
#define Py_ATOMIC_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
#  error "this header requires Py_BUILD_CORE define"
#endif

#include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
#include "pyconfig.h"

#ifdef HAVE_STD_ATOMIC
#  include <stdatomic.h>
#endif


#if defined(_MSC_VER)
#include <intrin.h>
#if defined(_M_IX86) || defined(_M_X64)
#  include <immintrin.h>
#endif
#endif

/* This is modeled after the atomics interface from C1x, according to
 * the draft at
 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
 * Operations and types are named the same except with a _Py_ prefix
 * and have the same semantics.
 *
 * Beware, the implementations here are deep magic.
 */

#if defined(HAVE_STD_ATOMIC)

typedef enum _Py_memory_order {
    _Py_memory_order_relaxed = memory_order_relaxed,
    _Py_memory_order_acquire = memory_order_acquire,
    _Py_memory_order_release = memory_order_release,
    _Py_memory_order_acq_rel = memory_order_acq_rel,
    _Py_memory_order_seq_cst = memory_order_seq_cst
} _Py_memory_order;

typedef struct _Py_atomic_address {
    atomic_uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    atomic_int _value;
} _Py_atomic_int;

#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
    atomic_signal_fence(ORDER)

#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
    atomic_thread_fence(ORDER)

#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    atomic_store_explicit(&((ATOMIC_VAL)->_value), 60.2kNEW_VAL15.7M, ORDER)

#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)814M

// Use builtin atomic operations in GCC >= 4.7 and clang
#elif defined(HAVE_BUILTIN_ATOMIC)

typedef enum _Py_memory_order {
    _Py_memory_order_relaxed = __ATOMIC_RELAXED,
    _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
    _Py_memory_order_release = __ATOMIC_RELEASE,
    _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
    _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
} _Py_memory_order;

typedef struct _Py_atomic_address {
    uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    int _value;
} _Py_atomic_int;

#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
    __atomic_signal_fence(ORDER)

#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
    __atomic_thread_fence(ORDER)

#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    (assert((ORDER) == __ATOMIC_RELAXED                       \
            || (ORDER) == __ATOMIC_SEQ_CST                    \
            || (ORDER) == __ATOMIC_RELEASE),                  \
     __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))

#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
    (assert((ORDER) == __ATOMIC_RELAXED                       \
            || (ORDER) == __ATOMIC_SEQ_CST                    \
            || (ORDER) == __ATOMIC_ACQUIRE                    \
            || (ORDER) == __ATOMIC_CONSUME),                  \
     __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))

/* Only support GCC (for expression statements) and x86 (for simple
 * atomic semantics) and MSVC x86/x64/ARM */
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
typedef enum _Py_memory_order {
    _Py_memory_order_relaxed,
    _Py_memory_order_acquire,
    _Py_memory_order_release,
    _Py_memory_order_acq_rel,
    _Py_memory_order_seq_cst
} _Py_memory_order;

typedef struct _Py_atomic_address {
    uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    int _value;
} _Py_atomic_int;


static __inline__ void
_Py_atomic_signal_fence(_Py_memory_order order)
{
    if (order != _Py_memory_order_relaxed)
        __asm__ volatile("":::"memory");
}

static __inline__ void
_Py_atomic_thread_fence(_Py_memory_order order)
{
    if (order != _Py_memory_order_relaxed)
        __asm__ volatile("mfence":::"memory");
}

/* Tell the race checker about this operation's effects. */
static __inline__ void
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
{
    (void)address;              /* shut up -Wunused-parameter */
    switch(order) {
    case _Py_memory_order_release:
    case _Py_memory_order_acq_rel:
    case _Py_memory_order_seq_cst:
        _Py_ANNOTATE_HAPPENS_BEFORE(address);
        break;
    case _Py_memory_order_relaxed:
    case _Py_memory_order_acquire:
        break;
    }
    switch(order) {
    case _Py_memory_order_acquire:
    case _Py_memory_order_acq_rel:
    case _Py_memory_order_seq_cst:
        _Py_ANNOTATE_HAPPENS_AFTER(address);
        break;
    case _Py_memory_order_relaxed:
    case _Py_memory_order_release:
        break;
    }
}

#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    __extension__ ({ \
        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
        __typeof__(atomic_val->_value) new_val = NEW_VAL;\
        volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
        _Py_memory_order order = ORDER; \
        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
        \
        /* Perform the operation. */ \
        _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
        switch(order) { \
        case _Py_memory_order_release: \
            _Py_atomic_signal_fence(_Py_memory_order_release); \
            /* fallthrough */ \
        case _Py_memory_order_relaxed: \
            *volatile_data = new_val; \
            break; \
        \
        case _Py_memory_order_acquire: \
        case _Py_memory_order_acq_rel: \
        case _Py_memory_order_seq_cst: \
            __asm__ volatile("xchg %0, %1" \
                         : "+r"(new_val) \
                         : "m"(atomic_val->_value) \
                         : "memory"); \
            break; \
        } \
        _Py_ANNOTATE_IGNORE_WRITES_END(); \
    })

#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    __extension__ ({  \
        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
        __typeof__(atomic_val->_value) result; \
        volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
        _Py_memory_order order = ORDER; \
        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
        \
        /* Perform the operation. */ \
        _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
        switch(order) { \
        case _Py_memory_order_release: \
        case _Py_memory_order_acq_rel: \
        case _Py_memory_order_seq_cst: \
            /* Loads on x86 are not releases by default, so need a */ \
            /* thread fence. */ \
            _Py_atomic_thread_fence(_Py_memory_order_release); \
            break; \
        default: \
            /* No fence */ \
            break; \
        } \
        result = *volatile_data; \
        switch(order) { \
        case _Py_memory_order_acquire: \
        case _Py_memory_order_acq_rel: \
        case _Py_memory_order_seq_cst: \
            /* Loads on x86 are automatically acquire operations so */ \
            /* can get by with just a compiler fence. */ \
            _Py_atomic_signal_fence(_Py_memory_order_acquire); \
            break; \
        default: \
            /* No fence */ \
            break; \
        } \
        _Py_ANNOTATE_IGNORE_READS_END(); \
        result; \
    })

#elif defined(_MSC_VER)
/*  _Interlocked* functions provide a full memory barrier and are therefore
    enough for acq_rel and seq_cst. If the HLE variants aren't available
    in hardware they will fall back to a full memory barrier as well.

    This might affect performance but likely only in some very specific and
    hard to meassure scenario.
*/
#if defined(_M_IX86) || defined(_M_X64)
typedef enum _Py_memory_order {
    _Py_memory_order_relaxed,
    _Py_memory_order_acquire,
    _Py_memory_order_release,
    _Py_memory_order_acq_rel,
    _Py_memory_order_seq_cst
} _Py_memory_order;

typedef struct _Py_atomic_address {
    volatile uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    volatile int _value;
} _Py_atomic_int;


#if defined(_M_X64)
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    switch (ORDER) { \
    case _Py_memory_order_acquire: \
      _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
      break; \
    case _Py_memory_order_release: \
      _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
      break; \
    default: \
      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
      break; \
  }
#else
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
#endif

#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  switch (ORDER) { \
  case _Py_memory_order_acquire: \
    _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
    break; \
  case _Py_memory_order_release: \
    _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
    break; \
  default: \
    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
    break; \
  }

#if defined(_M_X64)
/*  This has to be an intptr_t for now.
    gil_created() uses -1 as a sentinel value, if this returns
    a uintptr_t it will do an unsigned compare and crash
*/
inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
    __int64 old;
    switch (order) {
    case _Py_memory_order_acquire:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
      break;
    }
    case _Py_memory_order_release:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
      break;
    }
    case _Py_memory_order_relaxed:
      old = *value;
      break;
    default:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
      break;
    }
    }
    return old;
}

#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))

#else
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
#endif

inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
    long old;
    switch (order) {
    case _Py_memory_order_acquire:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
      break;
    }
    case _Py_memory_order_release:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
      break;
    }
    case _Py_memory_order_relaxed:
      old = *value;
      break;
    default:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
      break;
    }
    }
    return old;
}

#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))

#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
    _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
    _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }

#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  ( \
    sizeof((ATOMIC_VAL)->_value) == 8 ? \
    _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
    _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
  )
#elif defined(_M_ARM) || defined(_M_ARM64)
typedef enum _Py_memory_order {
    _Py_memory_order_relaxed,
    _Py_memory_order_acquire,
    _Py_memory_order_release,
    _Py_memory_order_acq_rel,
    _Py_memory_order_seq_cst
} _Py_memory_order;

typedef struct _Py_atomic_address {
    volatile uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    volatile int _value;
} _Py_atomic_int;


#if defined(_M_ARM64)
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
    switch (ORDER) { \
    case _Py_memory_order_acquire: \
      _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
      break; \
    case _Py_memory_order_release: \
      _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
      break; \
    default: \
      _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
      break; \
  }
#else
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
#endif

#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  switch (ORDER) { \
  case _Py_memory_order_acquire: \
    _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
    break; \
  case _Py_memory_order_release: \
    _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
    break; \
  default: \
    _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
    break; \
  }

#if defined(_M_ARM64)
/*  This has to be an intptr_t for now.
    gil_created() uses -1 as a sentinel value, if this returns
    a uintptr_t it will do an unsigned compare and crash
*/
inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
    uintptr_t old;
    switch (order) {
    case _Py_memory_order_acquire:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
      break;
    }
    case _Py_memory_order_release:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
      break;
    }
    case _Py_memory_order_relaxed:
      old = *value;
      break;
    default:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange64(value, old, old) != old);
      break;
    }
    }
    return old;
}

#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
    _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))

#else
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
#endif

inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
    int old;
    switch (order) {
    case _Py_memory_order_acquire:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
      break;
    }
    case _Py_memory_order_release:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
      break;
    }
    case _Py_memory_order_relaxed:
      old = *value;
      break;
    default:
    {
      do {
        old = *value;
      } while(_InterlockedCompareExchange(value, old, old) != old);
      break;
    }
    }
    return old;
}

#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
    _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))

#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  if (sizeof((ATOMIC_VAL)->_value) == 8) { \
    _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
    _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }

#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  ( \
    sizeof((ATOMIC_VAL)->_value) == 8 ? \
    _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
    _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
  )
#endif
#else  /* !gcc x86  !_msc_ver */
typedef enum _Py_memory_order {
    _Py_memory_order_relaxed,
    _Py_memory_order_acquire,
    _Py_memory_order_release,
    _Py_memory_order_acq_rel,
    _Py_memory_order_seq_cst
} _Py_memory_order;

typedef struct _Py_atomic_address {
    uintptr_t _value;
} _Py_atomic_address;

typedef struct _Py_atomic_int {
    int _value;
} _Py_atomic_int;
/* Fall back to other compilers and processors by assuming that simple
   volatile accesses are atomic.  This is false, so people should port
   this. */
#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
    ((ATOMIC_VAL)->_value = NEW_VAL)
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
    ((ATOMIC_VAL)->_value)
#endif

/* Standardized shortcuts. */
#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
#define _Py_atomic_load(ATOMIC_VAL) \
    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)

/* Python-local extensions */

#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
    _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
    _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)

#ifdef __cplusplus
}
#endif
#endif  /* Py_ATOMIC_H */

Coverage Report

Created: 2022-07-08 09:39

Line	Count	Source
1	#ifndef Py_ATOMIC_H
2	#define Py_ATOMIC_H
3	#ifdef __cplusplus
4	extern "C" {
5	#endif
6
7	#ifndef Py_BUILD_CORE
8	# error "this header requires Py_BUILD_CORE define"
9	#endif
10
11	#include "dynamic_annotations.h" /* _Py_ANNOTATE_MEMORY_ORDER */
12	#include "pyconfig.h"
13
14	#ifdef HAVE_STD_ATOMIC
15	# include <stdatomic.h>
16	#endif
17
18
19	#if defined(_MSC_VER)
20	#include <intrin.h>
21	#if defined(_M_IX86) \|\| defined(_M_X64)
22	# include <immintrin.h>
23	#endif
24	#endif
25
26	/* This is modeled after the atomics interface from C1x, according to
27	* the draft at
28	* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29	* Operations and types are named the same except with a _Py_ prefix
30	* and have the same semantics.
31	*
32	* Beware, the implementations here are deep magic.
33	*/
34
35	#if defined(HAVE_STD_ATOMIC)
36
37	typedef enum _Py_memory_order {
38	_Py_memory_order_relaxed = memory_order_relaxed,
39	_Py_memory_order_acquire = memory_order_acquire,
40	_Py_memory_order_release = memory_order_release,
41	_Py_memory_order_acq_rel = memory_order_acq_rel,
42	_Py_memory_order_seq_cst = memory_order_seq_cst
43	} _Py_memory_order;
44
45	typedef struct _Py_atomic_address {
46	atomic_uintptr_t _value;
47	} _Py_atomic_address;
48
49	typedef struct _Py_atomic_int {
50	atomic_int _value;
51	} _Py_atomic_int;
52
53	#define _Py_atomic_signal_fence(/memory_order/ ORDER) \
54	atomic_signal_fence(ORDER)
55
56	#define _Py_atomic_thread_fence(/memory_order/ ORDER) \
57	atomic_thread_fence(ORDER)
58
59	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60	atomic_store_explicit (&((ATOMIC_VAL)->_value), 60.2k NEW_VAL15.7M , ORDER)
61
62	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63	atomic_load_explicit (&((ATOMIC_VAL)->_value), ORDER)814M
64
65	// Use builtin atomic operations in GCC >= 4.7 and clang
66	#elif defined(HAVE_BUILTIN_ATOMIC)
67
68	typedef enum _Py_memory_order {
69	_Py_memory_order_relaxed = __ATOMIC_RELAXED,
70	_Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71	_Py_memory_order_release = __ATOMIC_RELEASE,
72	_Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73	_Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74	} _Py_memory_order;
75
76	typedef struct _Py_atomic_address {
77	uintptr_t _value;
78	} _Py_atomic_address;
79
80	typedef struct _Py_atomic_int {
81	int _value;
82	} _Py_atomic_int;
83
84	#define _Py_atomic_signal_fence(/memory_order/ ORDER) \
85	__atomic_signal_fence(ORDER)
86
87	#define _Py_atomic_thread_fence(/memory_order/ ORDER) \
88	__atomic_thread_fence(ORDER)
89
90	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91	(assert((ORDER) == __ATOMIC_RELAXED \
92	\|\| (ORDER) == __ATOMIC_SEQ_CST \
93	\|\| (ORDER) == __ATOMIC_RELEASE), \
94	__atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95
96	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
97	(assert((ORDER) == __ATOMIC_RELAXED \
98	\|\| (ORDER) == __ATOMIC_SEQ_CST \
99	\|\| (ORDER) == __ATOMIC_ACQUIRE \
100	\|\| (ORDER) == __ATOMIC_CONSUME), \
101	__atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102
103	/* Only support GCC (for expression statements) and x86 (for simple
104	* atomic semantics) and MSVC x86/x64/ARM */
105	#elif defined(__GNUC__) && (defined(__i386__) \|\| defined(__amd64))
106	typedef enum _Py_memory_order {
107	_Py_memory_order_relaxed,
108	_Py_memory_order_acquire,
109	_Py_memory_order_release,
110	_Py_memory_order_acq_rel,
111	_Py_memory_order_seq_cst
112	} _Py_memory_order;
113
114	typedef struct _Py_atomic_address {
115	uintptr_t _value;
116	} _Py_atomic_address;
117
118	typedef struct _Py_atomic_int {
119	int _value;
120	} _Py_atomic_int;
121
122
123	static __inline__ void
124	_Py_atomic_signal_fence(_Py_memory_order order)
125	{
126	if (order != _Py_memory_order_relaxed)
127	__asm__ volatile("":::"memory");
128	}
129
130	static __inline__ void
131	_Py_atomic_thread_fence(_Py_memory_order order)
132	{
133	if (order != _Py_memory_order_relaxed)
134	__asm__ volatile("mfence":::"memory");
135	}
136
137	/* Tell the race checker about this operation's effects. */
138	static __inline__ void
139	_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140	{
141	(void)address; /* shut up -Wunused-parameter */
142	switch(order) {
143	case _Py_memory_order_release:
144	case _Py_memory_order_acq_rel:
145	case _Py_memory_order_seq_cst:
146	_Py_ANNOTATE_HAPPENS_BEFORE(address);
147	break;
148	case _Py_memory_order_relaxed:
149	case _Py_memory_order_acquire:
150	break;
151	}
152	switch(order) {
153	case _Py_memory_order_acquire:
154	case _Py_memory_order_acq_rel:
155	case _Py_memory_order_seq_cst:
156	_Py_ANNOTATE_HAPPENS_AFTER(address);
157	break;
158	case _Py_memory_order_relaxed:
159	case _Py_memory_order_release:
160	break;
161	}
162	}
163
164	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165	__extension__ ({ \
166	__typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167	__typeof__(atomic_val->_value) new_val = NEW_VAL;\
168	volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169	_Py_memory_order order = ORDER; \
170	_Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171	\
172	/* Perform the operation. */ \
173	_Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174	switch(order) { \
175	case _Py_memory_order_release: \
176	_Py_atomic_signal_fence(_Py_memory_order_release); \
177	/* fallthrough */ \
178	case _Py_memory_order_relaxed: \
179	*volatile_data = new_val; \
180	break; \
181	\
182	case _Py_memory_order_acquire: \
183	case _Py_memory_order_acq_rel: \
184	case _Py_memory_order_seq_cst: \
185	__asm__ volatile("xchg %0, %1" \
186	: "+r"(new_val) \
187	: "m"(atomic_val->_value) \
188	: "memory"); \
189	break; \
190	} \
191	_Py_ANNOTATE_IGNORE_WRITES_END(); \
192	})
193
194	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195	__extension__ ({ \
196	__typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197	__typeof__(atomic_val->_value) result; \
198	volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199	_Py_memory_order order = ORDER; \
200	_Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201	\
202	/* Perform the operation. */ \
203	_Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204	switch(order) { \
205	case _Py_memory_order_release: \
206	case _Py_memory_order_acq_rel: \
207	case _Py_memory_order_seq_cst: \
208	/* Loads on x86 are not releases by default, so need a */ \
209	/* thread fence. */ \
210	_Py_atomic_thread_fence(_Py_memory_order_release); \
211	break; \
212	default: \
213	/* No fence */ \
214	break; \
215	} \
216	result = *volatile_data; \
217	switch(order) { \
218	case _Py_memory_order_acquire: \
219	case _Py_memory_order_acq_rel: \
220	case _Py_memory_order_seq_cst: \
221	/* Loads on x86 are automatically acquire operations so */ \
222	/* can get by with just a compiler fence. */ \
223	_Py_atomic_signal_fence(_Py_memory_order_acquire); \
224	break; \
225	default: \
226	/* No fence */ \
227	break; \
228	} \
229	_Py_ANNOTATE_IGNORE_READS_END(); \
230	result; \
231	})
232
233	#elif defined(_MSC_VER)
234	/* _Interlocked* functions provide a full memory barrier and are therefore
235	enough for acq_rel and seq_cst. If the HLE variants aren't available
236	in hardware they will fall back to a full memory barrier as well.
237
238	This might affect performance but likely only in some very specific and
239	hard to meassure scenario.
240	*/
241	#if defined(_M_IX86) \|\| defined(_M_X64)
242	typedef enum _Py_memory_order {
243	_Py_memory_order_relaxed,
244	_Py_memory_order_acquire,
245	_Py_memory_order_release,
246	_Py_memory_order_acq_rel,
247	_Py_memory_order_seq_cst
248	} _Py_memory_order;
249
250	typedef struct _Py_atomic_address {
251	volatile uintptr_t _value;
252	} _Py_atomic_address;
253
254	typedef struct _Py_atomic_int {
255	volatile int _value;
256	} _Py_atomic_int;
257
258
259	#if defined(_M_X64)
260	#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261	switch (ORDER) { \
262	case _Py_memory_order_acquire: \
263	_InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264	break; \
265	case _Py_memory_order_release: \
266	_InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267	break; \
268	default: \
269	_InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270	break; \
271	}
272	#else
273	#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274	#endif
275
276	#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277	switch (ORDER) { \
278	case _Py_memory_order_acquire: \
279	_InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280	break; \
281	case _Py_memory_order_release: \
282	_InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283	break; \
284	default: \
285	_InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286	break; \
287	}
288
289	#if defined(_M_X64)
290	/* This has to be an intptr_t for now.
291	gil_created() uses -1 as a sentinel value, if this returns
292	a uintptr_t it will do an unsigned compare and crash
293	*/
294	inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295	__int64 old;
296	switch (order) {
297	case _Py_memory_order_acquire:
298	{
299	do {
300	old = *value;
301	} while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302	break;
303	}
304	case _Py_memory_order_release:
305	{
306	do {
307	old = *value;
308	} while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309	break;
310	}
311	case _Py_memory_order_relaxed:
312	old = *value;
313	break;
314	default:
315	{
316	do {
317	old = *value;
318	} while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319	break;
320	}
321	}
322	return old;
323	}
324
325	#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326	_Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327
328	#else
329	#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330	#endif
331
332	inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333	long old;
334	switch (order) {
335	case _Py_memory_order_acquire:
336	{
337	do {
338	old = *value;
339	} while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340	break;
341	}
342	case _Py_memory_order_release:
343	{
344	do {
345	old = *value;
346	} while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347	break;
348	}
349	case _Py_memory_order_relaxed:
350	old = *value;
351	break;
352	default:
353	{
354	do {
355	old = *value;
356	} while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357	break;
358	}
359	}
360	return old;
361	}
362
363	#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364	_Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365
366	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367	if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368	_Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369	_Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370
371	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372	( \
373	sizeof((ATOMIC_VAL)->_value) == 8 ? \
374	_Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375	_Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376	)
377	#elif defined(_M_ARM) \|\| defined(_M_ARM64)
378	typedef enum _Py_memory_order {
379	_Py_memory_order_relaxed,
380	_Py_memory_order_acquire,
381	_Py_memory_order_release,
382	_Py_memory_order_acq_rel,
383	_Py_memory_order_seq_cst
384	} _Py_memory_order;
385
386	typedef struct _Py_atomic_address {
387	volatile uintptr_t _value;
388	} _Py_atomic_address;
389
390	typedef struct _Py_atomic_int {
391	volatile int _value;
392	} _Py_atomic_int;
393
394
395	#if defined(_M_ARM64)
396	#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397	switch (ORDER) { \
398	case _Py_memory_order_acquire: \
399	_InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400	break; \
401	case _Py_memory_order_release: \
402	_InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403	break; \
404	default: \
405	_InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406	break; \
407	}
408	#else
409	#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410	#endif
411
412	#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413	switch (ORDER) { \
414	case _Py_memory_order_acquire: \
415	_InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416	break; \
417	case _Py_memory_order_release: \
418	_InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419	break; \
420	default: \
421	_InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422	break; \
423	}
424
425	#if defined(_M_ARM64)
426	/* This has to be an intptr_t for now.
427	gil_created() uses -1 as a sentinel value, if this returns
428	a uintptr_t it will do an unsigned compare and crash
429	*/
430	inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431	uintptr_t old;
432	switch (order) {
433	case _Py_memory_order_acquire:
434	{
435	do {
436	old = *value;
437	} while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438	break;
439	}
440	case _Py_memory_order_release:
441	{
442	do {
443	old = *value;
444	} while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445	break;
446	}
447	case _Py_memory_order_relaxed:
448	old = *value;
449	break;
450	default:
451	{
452	do {
453	old = *value;
454	} while(_InterlockedCompareExchange64(value, old, old) != old);
455	break;
456	}
457	}
458	return old;
459	}
460
461	#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462	_Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463
464	#else
465	#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466	#endif
467
468	inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469	int old;
470	switch (order) {
471	case _Py_memory_order_acquire:
472	{
473	do {
474	old = *value;
475	} while(_InterlockedCompareExchange_acq(value, old, old) != old);
476	break;
477	}
478	case _Py_memory_order_release:
479	{
480	do {
481	old = *value;
482	} while(_InterlockedCompareExchange_rel(value, old, old) != old);
483	break;
484	}
485	case _Py_memory_order_relaxed:
486	old = *value;
487	break;
488	default:
489	{
490	do {
491	old = *value;
492	} while(_InterlockedCompareExchange(value, old, old) != old);
493	break;
494	}
495	}
496	return old;
497	}
498
499	#define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500	_Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501
502	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503	if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504	_Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505	_Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506
507	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508	( \
509	sizeof((ATOMIC_VAL)->_value) == 8 ? \
510	_Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511	_Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512	)
513	#endif
514	#else /* !gcc x86 !_msc_ver */
515	typedef enum _Py_memory_order {
516	_Py_memory_order_relaxed,
517	_Py_memory_order_acquire,
518	_Py_memory_order_release,
519	_Py_memory_order_acq_rel,
520	_Py_memory_order_seq_cst
521	} _Py_memory_order;
522
523	typedef struct _Py_atomic_address {
524	uintptr_t _value;
525	} _Py_atomic_address;
526
527	typedef struct _Py_atomic_int {
528	int _value;
529	} _Py_atomic_int;
530	/* Fall back to other compilers and processors by assuming that simple
531	volatile accesses are atomic. This is false, so people should port
532	this. */
533	#define _Py_atomic_signal_fence(/memory_order/ ORDER) ((void)0)
534	#define _Py_atomic_thread_fence(/memory_order/ ORDER) ((void)0)
535	#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536	((ATOMIC_VAL)->_value = NEW_VAL)
537	#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538	((ATOMIC_VAL)->_value)
539	#endif
540
541	/* Standardized shortcuts. */
542	#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543	_Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544	#define _Py_atomic_load(ATOMIC_VAL) \
545	_Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546
547	/* Python-local extensions */
548
549	#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550	_Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551	#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552	_Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553
554	#ifdef __cplusplus
555	}
556	#endif
557	#endif /* Py_ATOMIC_H */