Skip to content

Commit e3e5071

Browse files
Merge branch 'main' into doc-fix-84116
2 parents 2e7f2b0 + bb25f72 commit e3e5071

File tree

7 files changed

+66
-118
lines changed

7 files changed

+66
-118
lines changed

Include/internal/pycore_dict.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject
114114

115115
extern int _PyDict_GetMethodStackRef(PyDictObject *dict, PyObject *name, _PyStackRef *method);
116116

117+
extern Py_ssize_t _PyDict_LookupIndexAndValue(PyDictObject *, PyObject *, PyObject **);
117118
extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
118119
extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
119120

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
If we are specializing to ``LOAD_GLOBAL_MODULE`` or ``LOAD_ATTR_MODULE``, try
2+
to enable deferred reference counting for the value, if the object is owned by
3+
a different thread. This applies to the free-threaded build only and should
4+
improve scaling of multi-threaded programs. Note that when deferred reference
5+
counting is enabled, the object will be deallocated by the GC, rather than by
6+
:c:func:`Py_DECREF`.

Modules/_testinternalcapi/interpreter.c

Lines changed: 12 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@
1111

1212
int Test_EvalFrame_Resumes, Test_EvalFrame_Loads;
1313

14+
#ifdef _Py_TIER2
15+
static int
16+
stop_tracing_and_jit(PyThreadState *tstate, _PyInterpreterFrame *frame)
17+
{
18+
(void)(tstate);
19+
(void)(frame);
20+
return 0;
21+
}
22+
#endif
23+
24+
_PyJitEntryFuncPtr _Py_jit_entry;
25+
1426
#if _Py_TAIL_CALL_INTERP
1527
#include "test_targets.h"
1628
#include "test_cases.c.h"
@@ -78,12 +90,6 @@ Test_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag)
7890
frame->previous = &entry.frame;
7991
tstate->current_frame = frame;
8092
entry.frame.localsplus[0] = PyStackRef_NULL;
81-
#ifdef _Py_TIER2
82-
if (tstate->current_executor != NULL) {
83-
entry.frame.localsplus[0] = PyStackRef_FromPyObjectNew(tstate->current_executor);
84-
tstate->current_executor = NULL;
85-
}
86-
#endif
8793

8894
/* support for generator.throw() */
8995
if (throwflag) {
@@ -119,11 +125,6 @@ Test_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag)
119125
#endif
120126
}
121127

122-
#if defined(_Py_TIER2) && !defined(_Py_JIT)
123-
/* Tier 2 interpreter state */
124-
_PyExecutorObject *current_executor = NULL;
125-
const _PyUOpInstruction *next_uop = NULL;
126-
#endif
127128
#if _Py_TAIL_CALL_INTERP
128129
# if Py_STATS
129130
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, instruction_funcptr_handler_table, 0, lastopcode);
@@ -136,108 +137,6 @@ Test_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag)
136137
#endif
137138

138139

139-
#ifdef _Py_TIER2
140-
141-
// Tier 2 is also here!
142-
enter_tier_two:
143-
144-
#ifdef _Py_JIT
145-
assert(0);
146-
#else
147-
148-
#undef LOAD_IP
149-
#define LOAD_IP(UNUSED) (void)0
150-
151-
#ifdef Py_STATS
152-
// Disable these macros that apply to Tier 1 stats when we are in Tier 2
153-
#undef STAT_INC
154-
#define STAT_INC(opname, name) ((void)0)
155-
#undef STAT_DEC
156-
#define STAT_DEC(opname, name) ((void)0)
157-
#endif
158-
159-
#undef ENABLE_SPECIALIZATION
160-
#define ENABLE_SPECIALIZATION 0
161-
#undef ENABLE_SPECIALIZATION_FT
162-
#define ENABLE_SPECIALIZATION_FT 0
163-
164-
; // dummy statement after a label, before a declaration
165-
uint16_t uopcode;
166-
#ifdef Py_STATS
167-
int lastuop = 0;
168-
uint64_t trace_uop_execution_counter = 0;
169-
#endif
170-
171-
assert(next_uop->opcode == _START_EXECUTOR);
172-
tier2_dispatch:
173-
for (;;) {
174-
uopcode = next_uop->opcode;
175-
#ifdef Py_DEBUG
176-
if (frame->lltrace >= 3) {
177-
dump_stack(frame, stack_pointer);
178-
if (next_uop->opcode == _START_EXECUTOR) {
179-
printf("%4d uop: ", 0);
180-
}
181-
else {
182-
printf("%4d uop: ", (int)(next_uop - current_executor->trace));
183-
}
184-
_PyUOpPrint(next_uop);
185-
printf("\n");
186-
}
187-
#endif
188-
next_uop++;
189-
OPT_STAT_INC(uops_executed);
190-
UOP_STAT_INC(uopcode, execution_count);
191-
UOP_PAIR_INC(uopcode, lastuop);
192-
#ifdef Py_STATS
193-
trace_uop_execution_counter++;
194-
((_PyUOpInstruction *)next_uop)[-1].execution_count++;
195-
#endif
196-
197-
switch (uopcode) {
198-
199-
#include "executor_cases.c.h"
200-
201-
default:
202-
#ifdef Py_DEBUG
203-
{
204-
printf("Unknown uop: ");
205-
_PyUOpPrint(&next_uop[-1]);
206-
printf(" @ %d\n", (int)(next_uop - current_executor->trace - 1));
207-
Py_FatalError("Unknown uop");
208-
}
209-
#else
210-
Py_UNREACHABLE();
211-
#endif
212-
213-
}
214-
}
215-
216-
jump_to_error_target:
217-
#ifdef Py_DEBUG
218-
if (frame->lltrace >= 2) {
219-
printf("Error: [UOp ");
220-
_PyUOpPrint(&next_uop[-1]);
221-
printf(" @ %d -> %s]\n",
222-
(int)(next_uop - current_executor->trace - 1),
223-
_PyOpcode_OpName[frame->instr_ptr->op.code]);
224-
}
225-
#endif
226-
assert(next_uop[-1].format == UOP_FORMAT_JUMP);
227-
uint16_t target = uop_get_error_target(&next_uop[-1]);
228-
next_uop = current_executor->trace + target;
229-
goto tier2_dispatch;
230-
231-
jump_to_jump_target:
232-
assert(next_uop[-1].format == UOP_FORMAT_JUMP);
233-
target = uop_get_jump_target(&next_uop[-1]);
234-
next_uop = current_executor->trace + target;
235-
goto tier2_dispatch;
236-
237-
#endif // _Py_JIT
238-
239-
#endif // _Py_TIER2
240-
241140
early_exit:
242141
assert(_PyErr_Occurred(tstate));
243142
_Py_LeaveRecursiveCallPy(tstate);

Objects/dictobject.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,10 +2349,9 @@ dict_unhashable_type(PyObject *key)
23492349
}
23502350

23512351
Py_ssize_t
2352-
_PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
2352+
_PyDict_LookupIndexAndValue(PyDictObject *mp, PyObject *key, PyObject **value)
23532353
{
23542354
// TODO: Thread safety
2355-
PyObject *value;
23562355
assert(PyDict_CheckExact((PyObject*)mp));
23572356
assert(PyUnicode_CheckExact(key));
23582357

@@ -2362,7 +2361,14 @@ _PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
23622361
return -1;
23632362
}
23642363

2365-
return _Py_dict_lookup(mp, key, hash, &value);
2364+
return _Py_dict_lookup(mp, key, hash, value);
2365+
}
2366+
2367+
Py_ssize_t
2368+
_PyDict_LookupIndex(PyDictObject *mp, PyObject *key)
2369+
{
2370+
PyObject *value; // discarded
2371+
return _PyDict_LookupIndexAndValue(mp, key, &value);
23662372
}
23672373

23682374
/* Same as PyDict_GetItemWithError() but with hash supplied by caller.

Python/specialize.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,21 @@ static int function_kind(PyCodeObject *code);
358358
static bool function_check_args(PyObject *o, int expected_argcount, int opcode);
359359
static uint32_t function_get_version(PyObject *o, int opcode);
360360

361+
#ifdef Py_GIL_DISABLED
362+
static void
363+
maybe_enable_deferred_ref_count(PyObject *op)
364+
{
365+
if (!_Py_IsOwnedByCurrentThread(op)) {
366+
// For module level variables that are heavily used from multiple
367+
// threads, deferred reference counting provides good scaling
368+
// benefits. The downside is that the object will only be deallocated
369+
// by a GC run.
370+
PyUnstable_Object_EnableDeferredRefcount(op);
371+
}
372+
}
373+
#endif
374+
375+
361376
static int
362377
specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, PyObject *name)
363378
{
@@ -366,7 +381,8 @@ specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, P
366381
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NON_STRING);
367382
return -1;
368383
}
369-
Py_ssize_t index = _PyDict_LookupIndex(dict, name);
384+
PyObject *value;
385+
Py_ssize_t index = _PyDict_LookupIndexAndValue(dict, name, &value);
370386
assert(index != DKIX_ERROR);
371387
if (index != (uint16_t)index) {
372388
SPECIALIZATION_FAIL(LOAD_ATTR,
@@ -381,6 +397,9 @@ specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, P
381397
SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS);
382398
return -1;
383399
}
400+
#ifdef Py_GIL_DISABLED
401+
maybe_enable_deferred_ref_count(value);
402+
#endif
384403
write_u32(cache->version, keys_version);
385404
cache->index = (uint16_t)index;
386405
specialize(instr, LOAD_ATTR_MODULE);
@@ -1269,7 +1288,6 @@ specialize_attr_loadclassattr(PyObject *owner, _Py_CODEUNIT *instr,
12691288
return 1;
12701289
}
12711290

1272-
12731291
static void
12741292
specialize_load_global_lock_held(
12751293
PyObject *globals, PyObject *builtins,
@@ -1289,7 +1307,12 @@ specialize_load_global_lock_held(
12891307
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_STRING_OR_SPLIT);
12901308
goto fail;
12911309
}
1310+
#ifdef Py_GIL_DISABLED
1311+
PyObject *value;
1312+
Py_ssize_t index = _PyDict_LookupIndexAndValue((PyDictObject *)globals, name, &value);
1313+
#else
12921314
Py_ssize_t index = _PyDictKeys_StringLookup(globals_keys, name);
1315+
#endif
12931316
if (index == DKIX_ERROR) {
12941317
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_EXPECTED_ERROR);
12951318
goto fail;
@@ -1310,6 +1333,9 @@ specialize_load_global_lock_held(
13101333
SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE);
13111334
goto fail;
13121335
}
1336+
#ifdef Py_GIL_DISABLED
1337+
maybe_enable_deferred_ref_count(value);
1338+
#endif
13131339
cache->index = (uint16_t)index;
13141340
cache->module_keys_version = (uint16_t)keys_version;
13151341
specialize(instr, LOAD_GLOBAL_MODULE);

Tools/c-analyzer/cpython/_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def format_tsv_lines(lines):
8383
'Python/opcode_targets.h',
8484
'Modules/_testinternalcapi/test_targets.h',
8585
'Modules/_testinternalcapi/test_cases.c.h',
86+
'Modules/_testinternalcapi/interpreter.c',
8687
# XXX: Throws errors if PY_VERSION_HEX is not mocked out
8788
'Modules/clinic/_testclinic_depr.c.h',
8889

Tools/ftscalingbench/ftscalingbench.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# > echo "0" | sudo tee /sys/devices/system/cpu/cpufreq/boost
2222
#
2323

24+
import copy
2425
import math
2526
import os
2627
import queue
@@ -214,6 +215,14 @@ def instantiate_dataclass():
214215
for _ in range(1000 * WORK_SCALE):
215216
obj = MyDataClass(x=1, y=2, z=3)
216217

218+
219+
@register_benchmark
220+
def deepcopy():
221+
x = {'list': [1, 2], 'tuple': (1, None)}
222+
for i in range(40 * WORK_SCALE):
223+
copy.deepcopy(x)
224+
225+
217226
def bench_one_thread(func):
218227
t0 = time.perf_counter_ns()
219228
func()

0 commit comments

Comments
 (0)