Skip to content

Commit 4586652

Browse files
authored
Merge branch 'main' into warnings-as-error-2
2 parents 009a6a0 + 8ada7a9 commit 4586652

File tree

8 files changed

+84
-38
lines changed

8 files changed

+84
-38
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimize the AArch64 code generation for the JIT. Patch by Diego Russo
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix missing NULL check in ``_PyMem_FreeDelayed`` in :term:`free-threaded <free threading>` build.

Modules/_ctypes/_ctypes.c

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,18 +2148,7 @@ static PyObject *CreateSwappedType(ctypes_state *st, PyTypeObject *type,
21482148
if (!swapped_args)
21492149
return NULL;
21502150

2151-
if (st->swapped_suffix == NULL) {
2152-
#ifdef WORDS_BIGENDIAN
2153-
st->swapped_suffix = PyUnicode_InternFromString("_le");
2154-
#else
2155-
st->swapped_suffix = PyUnicode_InternFromString("_be");
2156-
#endif
2157-
}
2158-
if (st->swapped_suffix == NULL) {
2159-
Py_DECREF(swapped_args);
2160-
return NULL;
2161-
}
2162-
2151+
assert(st->swapped_suffix != NULL);
21632152
newname = PyUnicode_Concat(name, st->swapped_suffix);
21642153
if (newname == NULL) {
21652154
Py_DECREF(swapped_args);
@@ -5113,12 +5102,7 @@ PyCArrayType_from_ctype(ctypes_state *st, PyObject *itemtype, Py_ssize_t length)
51135102
char name[256];
51145103
PyObject *len;
51155104

5116-
if (st->array_cache == NULL) {
5117-
st->array_cache = PyDict_New();
5118-
if (st->array_cache == NULL) {
5119-
return NULL;
5120-
}
5121-
}
5105+
assert(st->array_cache != NULL);
51225106
len = PyLong_FromSsize_t(length);
51235107
if (len == NULL)
51245108
return NULL;
@@ -6099,6 +6083,20 @@ _ctypes_mod_exec(PyObject *mod)
60996083
return -1;
61006084
}
61016085

6086+
st->array_cache = PyDict_New();
6087+
if (st->array_cache == NULL) {
6088+
return -1;
6089+
}
6090+
6091+
#ifdef WORDS_BIGENDIAN
6092+
st->swapped_suffix = PyUnicode_InternFromString("_le");
6093+
#else
6094+
st->swapped_suffix = PyUnicode_InternFromString("_be");
6095+
#endif
6096+
if (st->swapped_suffix == NULL) {
6097+
return -1;
6098+
}
6099+
61026100
if (_ctypes_add_types(mod) < 0) {
61036101
return -1;
61046102
}

Modules/_ctypes/malloc_closure.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@
2727

2828
/******************************************************************/
2929

30+
31+
#ifdef Py_GIL_DISABLED
32+
static PyMutex malloc_closure_lock;
33+
# define MALLOC_CLOSURE_LOCK() PyMutex_Lock(&malloc_closure_lock)
34+
# define MALLOC_CLOSURE_UNLOCK() PyMutex_Unlock(&malloc_closure_lock)
35+
#else
36+
# define MALLOC_CLOSURE_LOCK() ((void)0)
37+
# define MALLOC_CLOSURE_UNLOCK() ((void)0)
38+
#endif
39+
3040
typedef union _tagITEM {
3141
ffi_closure closure;
3242
union _tagITEM *next;
@@ -110,9 +120,11 @@ void Py_ffi_closure_free(void *p)
110120
}
111121
#endif
112122
#endif
123+
MALLOC_CLOSURE_LOCK();
113124
ITEM *item = (ITEM *)p;
114125
item->next = free_list;
115126
free_list = item;
127+
MALLOC_CLOSURE_UNLOCK();
116128
}
117129

118130
/* return one item from the free list, allocating more if needed */
@@ -131,11 +143,15 @@ void *Py_ffi_closure_alloc(size_t size, void** codeloc)
131143
}
132144
#endif
133145
#endif
146+
MALLOC_CLOSURE_LOCK();
134147
ITEM *item;
135-
if (!free_list)
148+
if (!free_list) {
136149
more_core();
137-
if (!free_list)
150+
}
151+
if (!free_list) {
152+
MALLOC_CLOSURE_UNLOCK();
138153
return NULL;
154+
}
139155
item = free_list;
140156
free_list = item->next;
141157
#ifdef _M_ARM
@@ -144,5 +160,6 @@ void *Py_ffi_closure_alloc(size_t size, void** codeloc)
144160
#else
145161
*codeloc = (void *)item;
146162
#endif
163+
MALLOC_CLOSURE_UNLOCK();
147164
return (void *)item;
148165
}

Objects/obmalloc.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,9 @@ void
12151215
_PyMem_FreeDelayed(void *ptr)
12161216
{
12171217
assert(!((uintptr_t)ptr & 0x01));
1218-
free_delayed((uintptr_t)ptr);
1218+
if (ptr != NULL) {
1219+
free_delayed((uintptr_t)ptr);
1220+
}
12191221
}
12201222

12211223
#ifdef Py_GIL_DISABLED

Objects/typeobject.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -574,14 +574,16 @@ _PyType_GetMRO(PyTypeObject *self)
574574
static inline void
575575
set_tp_mro(PyTypeObject *self, PyObject *mro, int initial)
576576
{
577-
assert(PyTuple_CheckExact(mro));
578-
if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) {
579-
// XXX tp_mro can probably be statically allocated for each
580-
// static builtin type.
581-
assert(initial);
582-
assert(self->tp_mro == NULL);
583-
/* Other checks are done via set_tp_bases. */
584-
_Py_SetImmortal(mro);
577+
if (mro != NULL) {
578+
assert(PyTuple_CheckExact(mro));
579+
if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) {
580+
// XXX tp_mro can probably be statically allocated for each
581+
// static builtin type.
582+
assert(initial);
583+
assert(self->tp_mro == NULL);
584+
/* Other checks are done via set_tp_bases. */
585+
_Py_SetImmortal(mro);
586+
}
585587
}
586588
self->tp_mro = mro;
587589
}

Tools/jit/_stencils.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,24 @@ def pad(self, alignment: int) -> None:
209209
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
210210
self.body.extend([0] * padding)
211211

212-
def remove_jump(self, *, alignment: int = 1) -> None:
212+
def add_nops(self, nop: bytes, alignment: int) -> None:
213+
"""Add NOPs until there is alignment. Fail if it is not possible."""
214+
offset = len(self.body)
215+
nop_size = len(nop)
216+
217+
# Calculate the gap to the next multiple of alignment.
218+
gap = -offset % alignment
219+
if gap:
220+
if gap % nop_size == 0:
221+
count = gap // nop_size
222+
self.body.extend(nop * count)
223+
else:
224+
raise ValueError(
225+
f"Cannot add nops of size '{nop_size}' to a body with "
226+
f"offset '{offset}' to align with '{alignment}'"
227+
)
228+
229+
def remove_jump(self) -> None:
213230
"""Remove a zero-length continuation jump, if it exists."""
214231
hole = max(self.holes, key=lambda hole: hole.offset)
215232
match hole:
@@ -244,7 +261,7 @@ def remove_jump(self, *, alignment: int = 1) -> None:
244261
jump = b"\x00\x00\x00\x14"
245262
case _:
246263
return
247-
if self.body[offset:] == jump and offset % alignment == 0:
264+
if self.body[offset:] == jump:
248265
self.body = self.body[:offset]
249266
self.holes.remove(hole)
250267

@@ -266,10 +283,7 @@ class StencilGroup:
266283
_trampolines: set[int] = dataclasses.field(default_factory=set, init=False)
267284

268285
def process_relocations(
269-
self,
270-
known_symbols: dict[str, int],
271-
*,
272-
alignment: int = 1,
286+
self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b""
273287
) -> None:
274288
"""Fix up all GOT and internal relocations for this stencil group."""
275289
for hole in self.code.holes.copy():
@@ -289,8 +303,8 @@ def process_relocations(
289303
self._trampolines.add(ordinal)
290304
hole.addend = ordinal
291305
hole.symbol = None
292-
self.code.remove_jump(alignment=alignment)
293-
self.code.pad(alignment)
306+
self.code.remove_jump()
307+
self.code.add_nops(nop=nop, alignment=alignment)
294308
self.data.pad(8)
295309
for stencil in [self.code, self.data]:
296310
for hole in stencil.holes:

Tools/jit/_targets.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]):
4444
verbose: bool = False
4545
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
4646

47+
def _get_nop(self) -> bytes:
48+
if re.fullmatch(r"aarch64-.*", self.triple):
49+
nop = b"\x1f\x20\x03\xD5"
50+
elif re.fullmatch(r"x86_64-.*|i686.*", self.triple):
51+
nop = b"\x90"
52+
else:
53+
raise ValueError(f"NOP not defined for {self.triple}")
54+
return nop
55+
4756
def _compute_digest(self, out: pathlib.Path) -> str:
4857
hasher = hashlib.sha256()
4958
hasher.update(self.triple.encode())
@@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
172181
stencil_groups = {task.get_name(): task.result() for task in tasks}
173182
for stencil_group in stencil_groups.values():
174183
stencil_group.process_relocations(
175-
known_symbols=self.known_symbols, alignment=self.alignment
184+
known_symbols=self.known_symbols,
185+
alignment=self.alignment,
186+
nop=self._get_nop(),
176187
)
177188
return stencil_groups
178189

0 commit comments

Comments
 (0)