diff --git a/Include/internal/pycore_optimizer_types.h b/Include/internal/pycore_optimizer_types.h index 1996ce10735736..409cae1d1dfa28 100644 --- a/Include/internal/pycore_optimizer_types.h +++ b/Include/internal/pycore_optimizer_types.h @@ -116,7 +116,8 @@ typedef struct { typedef struct _jit_opt_descr { uint8_t tag; uint8_t num_descrs; - uint16_t last_modified_index; // Index in out_buffer when this object was last modified + // Index in out_buffer when this object was last escaped + uint16_t last_escape_index; uint32_t type_version; JitOptDescrMapping *descrs; } JitOptDescrObject; diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index d19f219c165324..b3a4d8b43c74fa 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -201,7 +201,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_PROPERTY_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_GUARD_DORV_NO_DICT] = HAS_EXIT_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, - [_STORE_ATTR_INSTANCE_VALUE_NULL] = HAS_DEOPT_FLAG, + [_STORE_ATTR_INSTANCE_VALUE_NULL] = 0, [_STORE_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR_SLOT_NULL] = HAS_DEOPT_FLAG, @@ -280,7 +280,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_CALLABLE_TUPLE_1] = HAS_DEOPT_FLAG, [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_SYNC_SP_FLAG, + [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_SYNC_SP_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index f4373a753ed566..be071587457dab 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -1919,10 +1919,8 @@ args = &stack_pointer[-oparg]; self = self_or_null; init = callable; - _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); - stack_pointer = _PyFrame_GetStackPointer(frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); shim->localsplus[0] = PyStackRef_DUP(self); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8bc58240e908a7..ed9b9d2fd6a479 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2677,8 +2677,7 @@ dummy_func( STAT_INC(STORE_ATTR, hit); assert(_PyObject_GetManagedDict(owner_o) == NULL); PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset); - PyObject *old_value = *value_ptr; - DEOPT_IF(old_value != NULL); + assert(*value_ptr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value)); PyDictValues *values = _PyObject_InlineValues(owner_o); Py_ssize_t index = value_ptr - values->values; @@ -2756,8 +2755,7 @@ dummy_func( DEOPT_IF(!LOCK_OBJECT(owner_o)); char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); - PyObject *old_value = *(PyObject **)addr; - DEOPT_IF(old_value != NULL); + assert(*(PyObject **)addr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); UNLOCK_OBJECT(owner_o); INPUTS_DEAD(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1a705098472425..db64be12f3b06b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -9795,12 +9795,7 @@ STAT_INC(STORE_ATTR, hit); assert(_PyObject_GetManagedDict(owner_o) == NULL); PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset); - PyObject *old_value = *value_ptr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - SET_CURRENT_CACHED_VALUES(0); - JUMP_TO_JUMP_TARGET(); - } + assert(*value_ptr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value)); PyDictValues *values = _PyObject_InlineValues(owner_o); Py_ssize_t index = value_ptr - values->values; @@ -9829,13 +9824,7 @@ STAT_INC(STORE_ATTR, hit); assert(_PyObject_GetManagedDict(owner_o) == NULL); PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset); - PyObject *old_value = *value_ptr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache0 = owner; - SET_CURRENT_CACHED_VALUES(1); - JUMP_TO_JUMP_TARGET(); - } + assert(*value_ptr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value)); PyDictValues *values = _PyObject_InlineValues(owner_o); Py_ssize_t index = value_ptr - values->values; @@ -9865,14 +9854,7 @@ STAT_INC(STORE_ATTR, hit); assert(_PyObject_GetManagedDict(owner_o) == NULL); PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset); - PyObject *old_value = *value_ptr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache1 = owner; - _tos_cache0 = value; - SET_CURRENT_CACHED_VALUES(2); - JUMP_TO_JUMP_TARGET(); - } + assert(*value_ptr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value)); PyDictValues *values = _PyObject_InlineValues(owner_o); Py_ssize_t index = value_ptr - values->values; @@ -9901,15 +9883,7 @@ STAT_INC(STORE_ATTR, hit); assert(_PyObject_GetManagedDict(owner_o) == NULL); PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset); - PyObject *old_value = *value_ptr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache2 = owner; - _tos_cache1 = value; - _tos_cache0 = _stack_item_0; - SET_CURRENT_CACHED_VALUES(3); - JUMP_TO_JUMP_TARGET(); - } + assert(*value_ptr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value)); PyDictValues *values = _PyObject_InlineValues(owner_o); Py_ssize_t index = value_ptr - values->values; @@ -10072,12 +10046,7 @@ } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); - PyObject *old_value = *(PyObject **)addr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - SET_CURRENT_CACHED_VALUES(0); - JUMP_TO_JUMP_TARGET(); - } + assert(*(PyObject **)addr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); UNLOCK_OBJECT(owner_o); o = owner; @@ -10108,13 +10077,7 @@ } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); - PyObject *old_value = *(PyObject **)addr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache0 = owner; - SET_CURRENT_CACHED_VALUES(1); - JUMP_TO_JUMP_TARGET(); - } + assert(*(PyObject **)addr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); UNLOCK_OBJECT(owner_o); o = owner; @@ -10147,14 +10110,7 @@ } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); - PyObject *old_value = *(PyObject **)addr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache1 = owner; - _tos_cache0 = value; - SET_CURRENT_CACHED_VALUES(2); - JUMP_TO_JUMP_TARGET(); - } + assert(*(PyObject **)addr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); UNLOCK_OBJECT(owner_o); o = owner; @@ -10187,15 +10143,7 @@ } char *addr = (char *)owner_o + index; STAT_INC(STORE_ATTR, hit); - PyObject *old_value = *(PyObject **)addr; - if (old_value != NULL) { - UOP_STAT_INC(uopcode, miss); - _tos_cache2 = owner; - _tos_cache1 = value; - _tos_cache0 = _stack_item_0; - SET_CURRENT_CACHED_VALUES(3); - JUMP_TO_JUMP_TARGET(); - } + assert(*(PyObject **)addr == NULL); FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value)); UNLOCK_OBJECT(owner_o); o = owner; @@ -14583,10 +14531,8 @@ args = &stack_pointer[-oparg]; self = stack_pointer[-1 - oparg]; init = stack_pointer[-2 - oparg]; - _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); - stack_pointer = _PyFrame_GetStackPointer(frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); shim->localsplus[0] = PyStackRef_DUP(self); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 77d9b182adae96..f1cc273b64215b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1919,10 +1919,8 @@ args = &stack_pointer[-oparg]; self = self_or_null; init = callable; - _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); - stack_pointer = _PyFrame_GetStackPointer(frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); shim->localsplus[0] = PyStackRef_DUP(self); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0a1f2672f4b6a0..fe4d778ea328a7 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -526,9 +526,9 @@ optimize_uops( if (ctx->out_buffer.next == out_ptr) { *(ctx->out_buffer.next++) = *this_instr; } - // Track escapes - but skip when from init shim frame, since self hasn't escaped yet bool is_init_shim = CURRENT_FRAME_IS_INIT_SHIM(); - if ((_PyUop_Flags[out_ptr->opcode] & HAS_ESCAPES_FLAG) && !is_init_shim) + // Track escapes + if (_PyUop_Flags[out_ptr->opcode] & HAS_ESCAPES_FLAG) { ctx->last_escape_index = uop_buffer_length(&ctx->out_buffer) - 1; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d79c273a099e8f..ffaae1973eacfc 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -25,24 +25,24 @@ state represents no information, and the BOTTOM state represents contradictory information. Though symbols logically progress through all intermediate nodes, we often skip in-between states for convenience: - UNKNOWN-------------------+ - | | | -NULL | | -| | | <- Anything below this level is an object. -| NON_NULL-+ | -| | | | <- Anything below this level has a known type version. -| TYPE_VERSION | | -| | | | <- Anything below this level has a known type. -| KNOWN_CLASS | | -| | | | | | PREDICATE -| | | INT* | | | -| | | | | | | <- Anything below this level has a known truthiness. -| | | | | TRUTHINESS | -| | | | | | | -| TUPLE | | | | | -| | | | | | | <- Anything below this level is a known constant. -| KNOWN_VALUE--+----------+ -| | <- Anything below this level is unreachable. + UNKNOWN-------------------+------------+ + | | | | +NULL | | | +| | | | <- Anything below this level is an object. +| NON_NULL-+ | | +| | | | | <- Anything below this level has a known type version. +| TYPE_VERSION | | | +| | | | | <- Anything below this level has a known type. +| KNOWN_CLASS | | | +| | | | | | PREDICATE DESCR +| | | INT* | | | | +| | | | | | | | <- Anything below this level has a known truthiness. +| | | | | TRUTHINESS | | +| | | | | | | | +| TUPLE | | | | | | +| | | | | | | | <- Anything below this level is a known constant. +| KNOWN_VALUE--+----------+------------+ +| | <- Anything below this level is unreachable. BOTTOM For example, after guarding that the type of an UNKNOWN local is int, we can @@ -980,7 +980,7 @@ _Py_uop_sym_new_descr_object(JitOptContext *ctx, unsigned int type_version) res->descr.num_descrs = 0; res->descr.descrs = NULL; res->descr.type_version = type_version; - res->descr.last_modified_index = uop_buffer_length(&ctx->out_buffer); + res->descr.last_escape_index = uop_buffer_length(&ctx->out_buffer); return PyJitRef_Wrap(res); } @@ -995,7 +995,7 @@ _Py_uop_sym_get_attr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index) // 1. Symbol has mappings allocated // 2. No escape has occurred since last modification (state is fresh) if (sym->descr.descrs != NULL && - sym->descr.last_modified_index >= ctx->last_escape_index) + sym->descr.last_escape_index >= ctx->last_escape_index) { for (int i = 0; i < sym->descr.num_descrs; i++) { if (sym->descr.descrs[i].slot_index == slot_index) { @@ -1026,57 +1026,56 @@ JitOptRef _Py_uop_sym_set_attr(JitOptContext *ctx, JitOptRef ref, uint16_t slot_index, JitOptRef value) { JitOptSymbol *sym = PyJitRef_Unwrap(ref); - int curr_index = uop_buffer_length(&ctx->out_buffer); - switch (sym->tag) { - case JIT_SYM_DESCR_TAG: - break; - default: - return _Py_uop_sym_new_not_null(ctx); - } - - // Check escape - if (sym->descr.last_modified_index < ctx->last_escape_index) { - sym->descr.num_descrs = 0; - } + case JIT_SYM_DESCR_TAG: { + // Check escape + if (sym->descr.last_escape_index < ctx->last_escape_index) { + sym->descr.num_descrs = 0; + sym->descr.last_escape_index = uop_buffer_length(&ctx->out_buffer); + return _Py_uop_sym_new_unknown(ctx); + } - // Get old value before updating - JitOptRef old_value = PyJitRef_NULL; - if (sym->descr.descrs != NULL) { - for (int i = 0; i < sym->descr.num_descrs; i++) { - if (sym->descr.descrs[i].slot_index == slot_index) { - old_value = PyJitRef_Wrap(allocation_base(ctx) + sym->descr.descrs[i].symbol); - break; + // Get old value before updating + JitOptRef old_value = _Py_uop_sym_new_unknown(ctx); + if (sym->descr.descrs != NULL) { + for (int i = 0; i < sym->descr.num_descrs; i++) { + if (sym->descr.descrs[i].slot_index == slot_index) { + old_value = PyJitRef_Wrap(allocation_base(ctx) + sym->descr.descrs[i].symbol); + break; + } + } } - } - } - // Update the last modified timestamp - sym->descr.last_modified_index = curr_index; + // Check if have arena space allocated + if (sym->descr.descrs == NULL) { + sym->descr.descrs = descr_arena_alloc(ctx); + if (sym->descr.descrs == NULL) { + ctx->done = true; + ctx->out_of_space = true; + return _Py_uop_sym_new_null(ctx); + } + } + // Check if the slot already exists + for (int i = 0; i < sym->descr.num_descrs; i++) { + if (sym->descr.descrs[i].slot_index == slot_index) { + sym->descr.descrs[i].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + assert(!_Py_uop_sym_is_null(old_value)); + return old_value; + } + } + // Add new mapping if there's space + if (sym->descr.num_descrs < MAX_SYMBOLIC_DESCR_SIZE) { + int idx = sym->descr.num_descrs++; + sym->descr.descrs[idx].slot_index = slot_index; + sym->descr.descrs[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); + } - // Check if have arena space allocated - if (sym->descr.descrs == NULL) { - sym->descr.descrs = descr_arena_alloc(ctx); - if (sym->descr.descrs == NULL) { - return PyJitRef_IsNull(old_value) ? _Py_uop_sym_new_not_null(ctx) : old_value; + // No value there. Objects are initialized to NULL, so it's safe. + return _Py_uop_sym_new_null(ctx); } + default: + return _Py_uop_sym_new_unknown(ctx); } - // Check if the slot already exists - for (int i = 0; i < sym->descr.num_descrs; i++) { - if (sym->descr.descrs[i].slot_index == slot_index) { - sym->descr.descrs[i].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); - assert(!PyJitRef_IsNull(old_value)); - return old_value; - } - } - // Add new mapping if there's space - if (sym->descr.num_descrs < MAX_SYMBOLIC_DESCR_SIZE) { - int idx = sym->descr.num_descrs++; - sym->descr.descrs[idx].slot_index = slot_index; - sym->descr.descrs[idx].symbol = (uint16_t)(PyJitRef_Unwrap(value) - allocation_base(ctx)); - } - - return PyJitRef_IsNull(old_value) ? PyJitRef_Borrow(_Py_uop_sym_new_null(ctx)) : old_value; } // 0 on success, -1 on error. @@ -1698,6 +1697,12 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored)) retrieved = _Py_uop_sym_get_attr(ctx, descr_obj, 0); TEST_PREDICATE(_Py_uop_sym_get_const(ctx, retrieved) == val_43, "descr getattr(0) changed unexpectedly"); + // Test setattr with escape + ctx->last_escape_index = INT_MAX; + retrieved = _Py_uop_sym_set_attr(ctx, descr_obj, 1, slot_val3); + TEST_PREDICATE(PyJitRef_Unwrap(retrieved)->tag == JIT_SYM_UNKNOWN_TAG, + "descr setattr should be unknown after escaping"); + ctx->last_escape_index = 0; // Test escape invalidation JitOptRef descr_obj3 = _Py_uop_sym_new_descr_object(ctx, 100); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2001fb7c37931a..2d36edae9384c9 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -710,8 +710,14 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "_Py_set_eval_breaker_bit", "trigger_backoff_counter", "_PyThreadState_PopCStackRefSteal", + "_PyFrame_PushTrampolineUnchecked", ) +KNOWN_NON_ESCAPING_UOPS = { + # This only calls escaping functions in the error path. + "_CREATE_INIT_FRAME", +} + def check_escaping_calls(instr: parser.CodeDef, escapes: dict[SimpleStmt, EscapingCall]) -> None: error: lexer.Token | None = None @@ -965,7 +971,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) error_with_pop = has_error_with_pop(op) error_without_pop = has_error_without_pop(op) - escapes = stmt_escapes(op.block) + escapes = not (isinstance(op, parser.InstDef) and op.name in KNOWN_NON_ESCAPING_UOPS) and stmt_escapes(op.block) pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(op.block, 0)