@@ -489,25 +489,56 @@ gc_maybe_untrack(PyObject *op)
489489#define BUFFER_HI 16
490490#define BUFFER_LO 8
491491
492+ // Prefetch intructions will fetch the line of data from memory that
493+ // contains the byte specified with the source operand to a location in
494+ // the cache hierarchy specified by a locality hint. The instruction
495+ // is only a hint and the CPU is free to ignore it. Instructions and
496+ // behaviour are CPU specific but the definitions of locality hints
497+ // below are mostly consistent.
498+ //
499+ // * T0 (temporal data) prefetch data into all levels of the cache hierarchy.
500+ //
501+ // * T1 (temporal data with respect to first level cache) prefetch data into
502+ // level 2 cache and higher.
503+ //
504+ // * T2 (temporal data with respect to second level cache) prefetch data into
505+ // level 3 cache and higher, or an implementation-specific choice.
506+ //
507+ // * NTA (non-temporal data with respect to all cache levels) prefetch data into
508+ // non-temporal cache structure and into a location close to the processor,
509+ // minimizing cache pollution.
510+
492511#if defined(__GNUC__ ) || defined(__clang__ )
493- #define PREFETCH_L1 (ptr ) __builtin_prefetch(ptr, 0, 3)
494- #define PREFETCH_L2 (ptr ) __builtin_prefetch(ptr, 0, 2)
512+ #define PREFETCH_T0 (ptr ) __builtin_prefetch(ptr, 0, 3)
513+ #define PREFETCH_T1 (ptr ) __builtin_prefetch(ptr, 0, 2)
514+ #define PREFETCH_T2 (ptr ) __builtin_prefetch(ptr, 0, 1)
515+ #define PREFETCH_NTA (ptr ) __builtin_prefetch(ptr, 0, 0)
495516#elif defined(_MSC_VER ) && (defined(_M_X64 ) || defined(_M_I86 )) && !defined(_M_ARM64EC )
496517 #include <mmintrin.h>
497- #define PREFETCH_L1 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
498- #define PREFETCH_L2 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
499- #elif defined(__aarch64__ )
500- #define PREFETCH_L1 (ptr ) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
501- #define PREFETCH_L2 (ptr ) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
518+ #define PREFETCH_T0 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
519+ #define PREFETCH_T1 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
520+ #define PREFETCH_T2 (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_T2)
521+ #define PREFETCH_NTA (ptr ) _mm_prefetch((const char*)(ptr), _MM_HINT_NTA)
522+ #elif defined (__aarch64__ )
523+ #define PREFETCH_T0 (ptr ) \
524+ do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
525+ #define PREFETCH_T1 (ptr ) \
526+ do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
527+ #define PREFETCH_T2 (ptr ) \
528+ do { __asm__ __volatile__("prfm pldl3keep, %0" ::"Q"(*(ptr))); } while (0)
529+ #define PREFETCH_NTA (ptr ) \
530+ do { __asm__ __volatile__("prfm pldl1strm, %0" ::"Q"(*(ptr))); } while (0)
502531#else
503- #define PREFETCH_L1 (ptr ) do { (void)(ptr); } while (0) /* disabled */
504- #define PREFETCH_L2 (ptr ) do { (void)(ptr); } while (0) /* disabled */
532+ #define PREFETCH_T0 (ptr ) do { (void)(ptr); } while (0) /* disabled */
533+ #define PREFETCH_T1 (ptr ) do { (void)(ptr); } while (0) /* disabled */
534+ #define PREFETCH_T2 (ptr ) do { (void)(ptr); } while (0) /* disabled */
535+ #define PREFETCH_NTA (ptr ) do { (void)(ptr); } while (0) /* disabled */
505536#endif
506537
507538#ifdef GC_ENABLE_PREFETCH_INSTRUCTIONS
508- #define prefetch (ptr ) PREFETCH_L1 (ptr)
539+ #define prefetch (ptr ) PREFETCH_T1 (ptr)
509540#else
510- #define prefetch (ptr )
541+ #define prefetch (ptr )
511542#endif
512543
513544// a contigous sequence of PyObject pointers, can contain NULLs
0 commit comments