Skip to content

Commit aeb3403

Browse files
gh-143421: Move JitOptContext from stack allocation to per-thread heap allocation (GH-143536)
* move JitOptContext to _PyThreadStateImpl * make _PyUOpInstruction buffer a part of _PyThreadStateImpl Co-authored-by: Kumar Aditya <kumaraditya@python.org>
1 parent cea2d24 commit aeb3403

File tree

6 files changed

+150
-147
lines changed

6 files changed

+150
-147
lines changed

Include/internal/pycore_optimizer.h

Lines changed: 2 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern "C" {
1212
#include "pycore_uop.h" // _PyUOpInstruction
1313
#include "pycore_uop_ids.h"
1414
#include "pycore_stackref.h" // _PyStackRef
15+
#include "pycore_optimizer_types.h"
1516
#include <stdbool.h>
1617

1718

@@ -84,7 +85,7 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
8485
#define JIT_CLEANUP_THRESHOLD 1000
8586

8687
int _Py_uop_analyze_and_optimize(
87-
PyFunctionObject *func,
88+
_PyThreadStateImpl *tstate,
8889
_PyUOpInstruction *trace, int trace_len, int curr_stackentries,
8990
_PyBloomFilter *dependencies);
9091

@@ -112,86 +113,6 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
112113
return inst->error_target;
113114
}
114115

115-
// Holds locals, stack, locals, stack ... co_consts (in that order)
116-
#define MAX_ABSTRACT_INTERP_SIZE 4096
117-
118-
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
119-
120-
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
121-
#define MAX_ABSTRACT_FRAME_DEPTH (16)
122-
123-
// The maximum number of side exits that we can take before requiring forward
124-
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
125-
// is the "maximum amount of polymorphism" that an isolated trace tree can
126-
// handle before rejoining the rest of the program.
127-
#define MAX_CHAIN_DEPTH 4
128-
129-
/* Symbols */
130-
/* See explanation in optimizer_symbols.c */
131-
132-
133-
typedef enum _JitSymType {
134-
JIT_SYM_UNKNOWN_TAG = 1,
135-
JIT_SYM_NULL_TAG = 2,
136-
JIT_SYM_NON_NULL_TAG = 3,
137-
JIT_SYM_BOTTOM_TAG = 4,
138-
JIT_SYM_TYPE_VERSION_TAG = 5,
139-
JIT_SYM_KNOWN_CLASS_TAG = 6,
140-
JIT_SYM_KNOWN_VALUE_TAG = 7,
141-
JIT_SYM_TUPLE_TAG = 8,
142-
JIT_SYM_TRUTHINESS_TAG = 9,
143-
JIT_SYM_COMPACT_INT = 10,
144-
} JitSymType;
145-
146-
typedef struct _jit_opt_known_class {
147-
uint8_t tag;
148-
uint32_t version;
149-
PyTypeObject *type;
150-
} JitOptKnownClass;
151-
152-
typedef struct _jit_opt_known_version {
153-
uint8_t tag;
154-
uint32_t version;
155-
} JitOptKnownVersion;
156-
157-
typedef struct _jit_opt_known_value {
158-
uint8_t tag;
159-
PyObject *value;
160-
} JitOptKnownValue;
161-
162-
#define MAX_SYMBOLIC_TUPLE_SIZE 7
163-
164-
typedef struct _jit_opt_tuple {
165-
uint8_t tag;
166-
uint8_t length;
167-
uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
168-
} JitOptTuple;
169-
170-
typedef struct {
171-
uint8_t tag;
172-
bool invert;
173-
uint16_t value;
174-
} JitOptTruthiness;
175-
176-
typedef struct {
177-
uint8_t tag;
178-
} JitOptCompactInt;
179-
180-
typedef union _jit_opt_symbol {
181-
uint8_t tag;
182-
JitOptKnownClass cls;
183-
JitOptKnownValue value;
184-
JitOptKnownVersion version;
185-
JitOptTuple tuple;
186-
JitOptTruthiness truthiness;
187-
JitOptCompactInt compact;
188-
} JitOptSymbol;
189-
190-
191-
// This mimics the _PyStackRef API
192-
typedef union {
193-
uintptr_t bits;
194-
} JitOptRef;
195116

196117
#define REF_IS_BORROWED 1
197118

@@ -238,48 +159,6 @@ PyJitRef_IsBorrowed(JitOptRef ref)
238159
return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
239160
}
240161

241-
struct _Py_UOpsAbstractFrame {
242-
bool globals_watched;
243-
// The version number of the globals dicts, once checked. 0 if unchecked.
244-
uint32_t globals_checked_version;
245-
// Max stacklen
246-
int stack_len;
247-
int locals_len;
248-
PyFunctionObject *func;
249-
PyCodeObject *code;
250-
251-
JitOptRef *stack_pointer;
252-
JitOptRef *stack;
253-
JitOptRef *locals;
254-
};
255-
256-
typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
257-
258-
typedef struct ty_arena {
259-
int ty_curr_number;
260-
int ty_max_number;
261-
JitOptSymbol arena[TY_ARENA_SIZE];
262-
} ty_arena;
263-
264-
typedef struct _JitOptContext {
265-
char done;
266-
char out_of_space;
267-
bool contradiction;
268-
// Has the builtins dict been watched?
269-
bool builtins_watched;
270-
// The current "executing" frame.
271-
_Py_UOpsAbstractFrame *frame;
272-
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
273-
int curr_frame_depth;
274-
275-
// Arena for the symbolic types.
276-
ty_arena t_arena;
277-
278-
JitOptRef *n_consumed;
279-
JitOptRef *limit;
280-
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
281-
} JitOptContext;
282-
283162
extern bool _Py_uop_sym_is_null(JitOptRef sym);
284163
extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
285164
extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#ifndef Py_INTERNAL_OPTIMIZER_TYPES_H
2+
#define Py_INTERNAL_OPTIMIZER_TYPES_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
11+
#include "pycore_uop.h" // UOP_MAX_TRACE_LENGTH
12+
13+
// Holds locals, stack, locals, stack ... co_consts (in that order)
14+
#define MAX_ABSTRACT_INTERP_SIZE 4096
15+
16+
#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
17+
18+
// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
19+
#define MAX_ABSTRACT_FRAME_DEPTH (16)
20+
21+
// The maximum number of side exits that we can take before requiring forward
22+
// progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
23+
// is the "maximum amount of polymorphism" that an isolated trace tree can
24+
// handle before rejoining the rest of the program.
25+
#define MAX_CHAIN_DEPTH 4
26+
27+
/* Symbols */
28+
/* See explanation in optimizer_symbols.c */
29+
30+
31+
typedef enum _JitSymType {
32+
JIT_SYM_UNKNOWN_TAG = 1,
33+
JIT_SYM_NULL_TAG = 2,
34+
JIT_SYM_NON_NULL_TAG = 3,
35+
JIT_SYM_BOTTOM_TAG = 4,
36+
JIT_SYM_TYPE_VERSION_TAG = 5,
37+
JIT_SYM_KNOWN_CLASS_TAG = 6,
38+
JIT_SYM_KNOWN_VALUE_TAG = 7,
39+
JIT_SYM_TUPLE_TAG = 8,
40+
JIT_SYM_TRUTHINESS_TAG = 9,
41+
JIT_SYM_COMPACT_INT = 10,
42+
} JitSymType;
43+
44+
typedef struct _jit_opt_known_class {
45+
uint8_t tag;
46+
uint32_t version;
47+
PyTypeObject *type;
48+
} JitOptKnownClass;
49+
50+
typedef struct _jit_opt_known_version {
51+
uint8_t tag;
52+
uint32_t version;
53+
} JitOptKnownVersion;
54+
55+
typedef struct _jit_opt_known_value {
56+
uint8_t tag;
57+
PyObject *value;
58+
} JitOptKnownValue;
59+
60+
#define MAX_SYMBOLIC_TUPLE_SIZE 7
61+
62+
typedef struct _jit_opt_tuple {
63+
uint8_t tag;
64+
uint8_t length;
65+
uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
66+
} JitOptTuple;
67+
68+
typedef struct {
69+
uint8_t tag;
70+
bool invert;
71+
uint16_t value;
72+
} JitOptTruthiness;
73+
74+
typedef struct {
75+
uint8_t tag;
76+
} JitOptCompactInt;
77+
78+
typedef union _jit_opt_symbol {
79+
uint8_t tag;
80+
JitOptKnownClass cls;
81+
JitOptKnownValue value;
82+
JitOptKnownVersion version;
83+
JitOptTuple tuple;
84+
JitOptTruthiness truthiness;
85+
JitOptCompactInt compact;
86+
} JitOptSymbol;
87+
88+
// This mimics the _PyStackRef API
89+
typedef union {
90+
uintptr_t bits;
91+
} JitOptRef;
92+
93+
typedef struct _Py_UOpsAbstractFrame {
94+
bool globals_watched;
95+
// The version number of the globals dicts, once checked. 0 if unchecked.
96+
uint32_t globals_checked_version;
97+
// Max stacklen
98+
int stack_len;
99+
int locals_len;
100+
PyFunctionObject *func;
101+
PyCodeObject *code;
102+
103+
JitOptRef *stack_pointer;
104+
JitOptRef *stack;
105+
JitOptRef *locals;
106+
} _Py_UOpsAbstractFrame;
107+
108+
typedef struct ty_arena {
109+
int ty_curr_number;
110+
int ty_max_number;
111+
JitOptSymbol arena[TY_ARENA_SIZE];
112+
} ty_arena;
113+
114+
typedef struct _JitOptContext {
115+
char done;
116+
char out_of_space;
117+
bool contradiction;
118+
// Has the builtins dict been watched?
119+
bool builtins_watched;
120+
// The current "executing" frame.
121+
_Py_UOpsAbstractFrame *frame;
122+
_Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
123+
int curr_frame_depth;
124+
125+
// Arena for the symbolic types.
126+
ty_arena t_arena;
127+
128+
JitOptRef *n_consumed;
129+
JitOptRef *limit;
130+
JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
131+
} JitOptContext;
132+
133+
134+
#ifdef __cplusplus
135+
}
136+
#endif
137+
#endif /* !Py_INTERNAL_OPTIMIZER_TYPES_H */

Include/internal/pycore_tstate.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern "C" {
1212
#include "pycore_freelist_state.h" // struct _Py_freelists
1313
#include "pycore_interpframe_structs.h" // _PyInterpreterFrame
1414
#include "pycore_mimalloc.h" // struct _mimalloc_thread_state
15+
#include "pycore_optimizer_types.h" // JitOptContext
1516
#include "pycore_qsbr.h" // struct qsbr
1617
#include "pycore_uop.h" // struct _PyUOpInstruction
1718
#include "pycore_structs.h"
@@ -52,10 +53,11 @@ typedef struct _PyJitTracerTranslatorState {
5253
} _PyJitTracerTranslatorState;
5354

5455
typedef struct _PyJitTracerState {
55-
_PyUOpInstruction *code_buffer;
5656
_PyJitTracerInitialState initial_state;
5757
_PyJitTracerPreviousState prev_state;
5858
_PyJitTracerTranslatorState translator_state;
59+
JitOptContext opt_context;
60+
_PyUOpInstruction code_buffer[UOP_MAX_TRACE_LENGTH];
5961
} _PyJitTracerState;
6062

6163
#endif

Python/optimizer.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,13 +1025,6 @@ _PyJit_TryInitializeTracing(
10251025
if (oparg > 0xFFFF) {
10261026
return 0;
10271027
}
1028-
if (_tstate->jit_tracer_state.code_buffer == NULL) {
1029-
_tstate->jit_tracer_state.code_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
1030-
if (_tstate->jit_tracer_state.code_buffer == NULL) {
1031-
// Don't error, just go to next instruction.
1032-
return 0;
1033-
}
1034-
}
10351028
PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj);
10361029
if (func == NULL) {
10371030
return 0;
@@ -1484,8 +1477,8 @@ uop_optimize(
14841477
OPT_STAT_INC(traces_created);
14851478
if (!is_noopt) {
14861479
length = _Py_uop_analyze_and_optimize(
1487-
_tstate->jit_tracer_state.initial_state.func,
1488-
buffer,length,
1480+
_tstate,
1481+
buffer, length,
14891482
curr_stackentries, dependencies);
14901483
if (length <= 0) {
14911484
return length;

Python/optimizer_analysis.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "pycore_opcode_metadata.h"
1919
#include "pycore_opcode_utils.h"
2020
#include "pycore_pystate.h" // _PyInterpreterState_GET()
21+
#include "pycore_tstate.h" // _PyThreadStateImpl
2122
#include "pycore_uop_metadata.h"
2223
#include "pycore_long.h"
2324
#include "pycore_interpframe.h" // _PyFrame_GetCode
@@ -334,17 +335,17 @@ _Py_opt_assert_within_stack_bounds(
334335
/* >0 (length) for success, 0 for not ready, clears all possible errors. */
335336
static int
336337
optimize_uops(
337-
PyFunctionObject *func,
338+
_PyThreadStateImpl *tstate,
338339
_PyUOpInstruction *trace,
339340
int trace_len,
340341
int curr_stacklen,
341342
_PyBloomFilter *dependencies
342343
)
343344
{
344345
assert(!PyErr_Occurred());
346+
PyFunctionObject *func = tstate->jit_tracer_state.initial_state.func;
345347

346-
JitOptContext context;
347-
JitOptContext *ctx = &context;
348+
JitOptContext *ctx = &tstate->jit_tracer_state.opt_context;
348349
uint32_t opcode = UINT16_MAX;
349350

350351
// Make sure that watchers are set up
@@ -574,7 +575,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
574575
// > 0 - length of optimized trace
575576
int
576577
_Py_uop_analyze_and_optimize(
577-
PyFunctionObject *func,
578+
_PyThreadStateImpl *tstate,
578579
_PyUOpInstruction *buffer,
579580
int length,
580581
int curr_stacklen,
@@ -584,7 +585,7 @@ _Py_uop_analyze_and_optimize(
584585
OPT_STAT_INC(optimizer_attempts);
585586

586587
length = optimize_uops(
587-
func, buffer,
588+
tstate, buffer,
588589
length, curr_stacklen, dependencies);
589590

590591
if (length == 0) {

0 commit comments

Comments
 (0)