Skip to content

Commit fca7fec

Browse files
authored
gh-116738: Make lzma module thread-safe (#142947)
1 parent 865eb12 commit fca7fec

File tree

3 files changed

+47
-11
lines changed

3 files changed

+47
-11
lines changed

Lib/test/test_free_threading/test_lzma.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,24 @@ def worker():
4545
data = lzd.decompress(compressed, chunk_size)
4646
self.assertEqual(len(data), chunk_size)
4747
output.append(data)
48+
# Read attributes concurrently with other threads decompressing
49+
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
50+
self.assertIsInstance(lzd.eof, bool)
51+
self.assertIsInstance(lzd.needs_input, bool)
52+
self.assertIsInstance(lzd.unused_data, bytes)
4853

4954
run_concurrently(worker_func=worker, nthreads=NTHREADS)
5055
self.assertEqual(len(output), NTHREADS)
5156
# Verify the expected chunks (order doesn't matter due to append race)
5257
self.assertSetEqual(set(output), set(chunks))
58+
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
59+
self.assertTrue(lzd.eof)
60+
self.assertFalse(lzd.needs_input)
61+
# Each thread added full compressed data to the buffer, but only 1 copy
62+
# is consumed to produce the output. The rest remains as unused_data.
63+
self.assertEqual(
64+
len(lzd.unused_data), len(compressed) * (NTHREADS - 1)
65+
)
5366

5467

5568
if __name__ == "__main__":
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make the attributes in :mod:`lzma` thread-safe on the :term:`free threaded
2+
<free threading>` build.

Modules/_lzmamodule.c

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "pycore_long.h" // _PyLong_UInt32_Converter()
2121
// Blocks output buffer wrappers
2222
#include "pycore_blocks_output_buffer.h"
23+
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_*_RELAXED
2324

2425
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
2526
#error "The maximum block size accepted by liblzma is SIZE_MAX."
@@ -948,10 +949,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
948949
goto error;
949950
}
950951
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
951-
d->check = lzma_get_check(&d->lzs);
952+
FT_ATOMIC_STORE_INT_RELAXED(d->check, lzma_get_check(&d->lzs));
952953
}
953954
if (lzret == LZMA_STREAM_END) {
954-
d->eof = 1;
955+
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
955956
break;
956957
} else if (lzs->avail_out == 0) {
957958
/* Need to check lzs->avail_out before lzs->avail_in.
@@ -1038,13 +1039,14 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
10381039
}
10391040

10401041
if (d->eof) {
1041-
d->needs_input = 0;
1042+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10421043
if (lzs->avail_in > 0) {
1043-
Py_XSETREF(d->unused_data,
1044-
PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1045-
if (d->unused_data == NULL) {
1044+
PyObject *unused_data = PyBytes_FromStringAndSize(
1045+
(char *)lzs->next_in, lzs->avail_in);
1046+
if (unused_data == NULL) {
10461047
goto error;
10471048
}
1049+
Py_XSETREF(d->unused_data, unused_data);
10481050
}
10491051
}
10501052
else if (lzs->avail_in == 0) {
@@ -1054,17 +1056,17 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
10541056
/* (avail_in==0 && avail_out==0)
10551057
Maybe lzs's internal state still have a few bytes can
10561058
be output, try to output them next time. */
1057-
d->needs_input = 0;
1059+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10581060

10591061
/* If max_length < 0, lzs->avail_out always > 0 */
10601062
assert(max_length >= 0);
10611063
} else {
10621064
/* Input buffer exhausted, output buffer has space. */
1063-
d->needs_input = 1;
1065+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 1);
10641066
}
10651067
}
10661068
else {
1067-
d->needs_input = 0;
1069+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10681070

10691071
/* If we did not use the input buffer, we now have
10701072
to copy the tail from the caller's buffer into the
@@ -1314,15 +1316,33 @@ PyDoc_STRVAR(Decompressor_needs_input_doc,
13141316
PyDoc_STRVAR(Decompressor_unused_data_doc,
13151317
"Data found after the end of the compressed stream.");
13161318

1319+
static PyObject *
1320+
Decompressor_unused_data_get(PyObject *op, void *Py_UNUSED(closure))
1321+
{
1322+
Decompressor *self = Decompressor_CAST(op);
1323+
if (!FT_ATOMIC_LOAD_CHAR_RELAXED(self->eof)) {
1324+
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
1325+
}
1326+
PyMutex_Lock(&self->mutex);
1327+
assert(self->unused_data != NULL);
1328+
PyObject *result = Py_NewRef(self->unused_data);
1329+
PyMutex_Unlock(&self->mutex);
1330+
return result;
1331+
}
1332+
1333+
static PyGetSetDef Decompressor_getset[] = {
1334+
{"unused_data", Decompressor_unused_data_get, NULL,
1335+
Decompressor_unused_data_doc},
1336+
{NULL},
1337+
};
1338+
13171339
static PyMemberDef Decompressor_members[] = {
13181340
{"check", Py_T_INT, offsetof(Decompressor, check), Py_READONLY,
13191341
Decompressor_check_doc},
13201342
{"eof", Py_T_BOOL, offsetof(Decompressor, eof), Py_READONLY,
13211343
Decompressor_eof_doc},
13221344
{"needs_input", Py_T_BOOL, offsetof(Decompressor, needs_input), Py_READONLY,
13231345
Decompressor_needs_input_doc},
1324-
{"unused_data", Py_T_OBJECT_EX, offsetof(Decompressor, unused_data), Py_READONLY,
1325-
Decompressor_unused_data_doc},
13261346
{NULL}
13271347
};
13281348

@@ -1332,6 +1352,7 @@ static PyType_Slot lzma_decompressor_type_slots[] = {
13321352
{Py_tp_new, _lzma_LZMADecompressor},
13331353
{Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
13341354
{Py_tp_members, Decompressor_members},
1355+
{Py_tp_getset, Decompressor_getset},
13351356
{0, 0}
13361357
};
13371358

0 commit comments

Comments
 (0)