Skip to content

Commit 51b6d1c

Browse files
committed
Merge remote-tracking branch 'upstream/main' into gh-144278
2 parents c3779af + 1dc12b2 commit 51b6d1c

38 files changed

+1894
-980
lines changed

Android/testbed/app/build.gradle.kts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,12 @@ android {
9292
}
9393
throw GradleException("Failed to find API level in $androidEnvFile")
9494
}
95-
targetSdk = 35
95+
96+
// This controls the API level of the maxVersion managed emulator, which is used
97+
// by CI and cibuildwheel. 34 takes up too much disk space (#142289), 35 has
98+
// issues connecting to the internet (#142387), and 36 and later are not
99+
// available as aosp_atd images yet.
100+
targetSdk = 33
96101

97102
versionCode = 1
98103
versionName = "1.0"

Doc/c-api/memory.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,11 @@ The pymalloc allocator
677677
Python has a *pymalloc* allocator optimized for small objects (smaller or equal
678678
to 512 bytes) with a short lifetime. It uses memory mappings called "arenas"
679679
with a fixed size of either 256 KiB on 32-bit platforms or 1 MiB on 64-bit
680-
platforms. It falls back to :c:func:`PyMem_RawMalloc` and
680+
platforms. When Python is configured with :option:`--with-pymalloc-hugepages`,
681+
the arena size on 64-bit platforms is increased to 2 MiB to match the huge page
682+
size, and arena allocation will attempt to use huge pages (``MAP_HUGETLB`` on
683+
Linux, ``MEM_LARGE_PAGES`` on Windows) with automatic fallback to regular pages.
684+
It falls back to :c:func:`PyMem_RawMalloc` and
681685
:c:func:`PyMem_RawRealloc` for allocations larger than 512 bytes.
682686
683687
*pymalloc* is the :ref:`default allocator <default-memory-allocators>` of the

Doc/using/configure.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,21 @@ also be used to improve performance.
783783

784784
See also :envvar:`PYTHONMALLOC` environment variable.
785785

786+
.. option:: --with-pymalloc-hugepages
787+
788+
Enable huge page support for :ref:`pymalloc <pymalloc>` arenas (disabled by
789+
default). When enabled, the arena size on 64-bit platforms is increased to
790+
2 MiB and arena allocation uses ``MAP_HUGETLB`` (Linux) or
791+
``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages.
792+
793+
The configure script checks that the platform supports ``MAP_HUGETLB``
794+
and emits a warning if it is not available.
795+
796+
On Windows, use the ``--pymalloc-hugepages`` flag with ``build.bat`` or
797+
set the ``UsePymallocHugepages`` MSBuild property.
798+
799+
.. versionadded:: 3.15
800+
786801
.. option:: --without-doc-strings
787802

788803
Disable static documentation strings to reduce the memory footprint (enabled

Doc/whatsnew/3.15.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,12 @@ Build changes
14771477
modules that are missing or packaged separately.
14781478
(Contributed by Stan Ulbrych and Petr Viktorin in :gh:`139707`.)
14791479

1480+
* The new configure option :option:`--with-pymalloc-hugepages` enables huge
1481+
page support for :ref:`pymalloc <pymalloc>` arenas. When enabled, arena size
1482+
increases to 2 MiB and allocation uses ``MAP_HUGETLB`` (Linux) or
1483+
``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages.
1484+
On Windows, use ``build.bat --pymalloc-hugepages``.
1485+
14801486
* Annotating anonymous mmap usage is now supported if Linux kernel supports
14811487
:manpage:`PR_SET_VMA_ANON_NAME <PR_SET_VMA(2const)>` (Linux 5.17 or newer).
14821488
Annotations are visible in ``/proc/<pid>/maps`` if the kernel supports the feature

Include/internal/pycore_obmalloc.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,11 @@ typedef unsigned int pymem_uint; /* assuming >= 16 bits */
208208
* mappings to reduce heap fragmentation.
209209
*/
210210
#ifdef USE_LARGE_ARENAS
211-
#define ARENA_BITS 20 /* 1 MiB */
211+
# ifdef PYMALLOC_USE_HUGEPAGES
212+
# define ARENA_BITS 21 /* 2 MiB */
213+
# else
214+
# define ARENA_BITS 20 /* 1 MiB */
215+
# endif
212216
#else
213217
#define ARENA_BITS 18 /* 256 KiB */
214218
#endif
@@ -469,7 +473,7 @@ nfp free pools in usable_arenas.
469473
*/
470474

471475
/* How many arena_objects do we initially allocate?
472-
* 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
476+
* 16 = can allocate 16 arenas = 16 * ARENA_SIZE before growing the
473477
* `arenas` vector.
474478
*/
475479
#define INITIAL_ARENA_OBJECTS 16
@@ -512,14 +516,26 @@ struct _obmalloc_mgmt {
512516
513517
memory address bit allocation for keys
514518
515-
64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
519+
ARENA_BITS is configurable: 20 (1 MiB) by default on 64-bit, or
520+
21 (2 MiB) when PYMALLOC_USE_HUGEPAGES is enabled. All bit widths
521+
below are derived from ARENA_BITS automatically.
522+
523+
64-bit pointers, IGNORE_BITS=0 and 2^20 arena size (default):
516524
15 -> MAP_TOP_BITS
517525
15 -> MAP_MID_BITS
518526
14 -> MAP_BOT_BITS
519527
20 -> ideal aligned arena
520528
----
521529
64
522530
531+
64-bit pointers, IGNORE_BITS=0 and 2^21 arena size (hugepages):
532+
15 -> MAP_TOP_BITS
533+
15 -> MAP_MID_BITS
534+
13 -> MAP_BOT_BITS
535+
21 -> ideal aligned arena
536+
----
537+
64
538+
523539
64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
524540
16 -> IGNORE_BITS
525541
10 -> MAP_TOP_BITS

Include/internal/pycore_optimizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,15 @@ typedef struct _JitOptContext {
3737
// Arena for the symbolic types.
3838
ty_arena t_arena;
3939

40+
// Arena for the descriptor mappings.
41+
descr_arena d_arena;
42+
4043
JitOptRef *n_consumed;
4144
JitOptRef *limit;
4245
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
4346
_PyJitUopBuffer out_buffer;
47+
// Index of the last escaped uop in out_buffer.
48+
int last_escape_index;
4449
} JitOptContext;
4550

4651

@@ -295,6 +300,9 @@ extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value,
295300
extern bool _Py_uop_sym_is_compact_int(JitOptRef sym);
296301
extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx);
297302
extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx, JitOptRef sym);
303+
extern JitOptRef _Py_uop_sym_new_descr_object(JitOptContext *ctx, unsigned int type_version);
304+
extern JitOptRef _Py_uop_sym_get_attr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index);
305+
extern JitOptRef _Py_uop_sym_set_attr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value);
298306
extern JitOptRef _Py_uop_sym_new_predicate(JitOptContext *ctx, JitOptRef lhs_ref, JitOptRef rhs_ref, JitOptPredicateKind kind);
299307
extern void _Py_uop_sym_apply_predicate_narrowing(JitOptContext *ctx, JitOptRef sym, bool branch_is_true);
300308

Include/internal/pycore_optimizer_types.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ extern "C" {
1616

1717
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
1818

19+
// Maximum descriptor mappings per object tracked symbolically
20+
#define MAX_SYMBOLIC_DESCR_SIZE 16
21+
#define DESCR_ARENA_SIZE (MAX_SYMBOLIC_DESCR_SIZE * 100)
22+
1923
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
2024
#define MAX_ABSTRACT_FRAME_DEPTH (16)
2125

@@ -41,6 +45,7 @@ typedef enum _JitSymType {
4145
JIT_SYM_TRUTHINESS_TAG = 9,
4246
JIT_SYM_COMPACT_INT = 10,
4347
JIT_SYM_PREDICATE_TAG = 11,
48+
JIT_SYM_DESCR_TAG = 12,
4449
} JitSymType;
4550

4651
typedef struct _jit_opt_known_class {
@@ -91,6 +96,31 @@ typedef struct {
9196
uint8_t tag;
9297
} JitOptCompactInt;
9398

99+
/*
100+
Mapping from slot index or attribute offset to its symbolic value.
101+
SAFETY:
102+
This structure is used for both STORE_ATTR_SLOT and STORE_ATTR_INSTANCE_VALUE.
103+
These two never appear on the same object type because:
104+
__slots__ classes don't have Py_TPFLAGS_INLINE_VALUES
105+
Therefore, there is no index collision between slot offsets and inline value offsets.
106+
Note:
107+
STORE_ATTR_WITH_HINT is NOT currently tracked.
108+
If we want to track it in the future, we need to be careful about
109+
potential index collisions with STORE_ATTR_INSTANCE_VALUE.
110+
*/
111+
typedef struct {
112+
uint16_t slot_index;
113+
uint16_t symbol;
114+
} JitOptDescrMapping;
115+
116+
typedef struct _jit_opt_descr {
117+
uint8_t tag;
118+
uint8_t num_descrs;
119+
uint16_t last_modified_index; // Index in out_buffer when this object was last modified
120+
uint32_t type_version;
121+
JitOptDescrMapping *descrs;
122+
} JitOptDescrObject;
123+
94124
typedef union _jit_opt_symbol {
95125
uint8_t tag;
96126
JitOptKnownClass cls;
@@ -99,6 +129,7 @@ typedef union _jit_opt_symbol {
99129
JitOptTuple tuple;
100130
JitOptTruthiness truthiness;
101131
JitOptCompactInt compact;
132+
JitOptDescrObject descr;
102133
JitOptPredicate predicate;
103134
} JitOptSymbol;
104135

@@ -128,6 +159,11 @@ typedef struct ty_arena {
128159
JitOptSymbol arena[TY_ARENA_SIZE];
129160
} ty_arena;
130161

162+
typedef struct descr_arena {
163+
int descr_curr_number;
164+
int descr_max_number;
165+
JitOptDescrMapping arena[DESCR_ARENA_SIZE];
166+
} descr_arena;
131167

132168
#ifdef __cplusplus
133169
}

0 commit comments

Comments
 (0)