Skip to content

Commit ff88078

Browse files
committed
gh-143196: fix heap-buffer-overflow in JSON encoder indent cache
- Validate that _current_indent_level is 0 when indent is set, preventing uninitialized cache access and re-entrant __mul__ attacks - Convert integer indent to string of spaces in encoder_new, matching the Python-level behavior
1 parent 61ee048 commit ff88078

File tree

2 files changed

+98
-2
lines changed

2 files changed

+98
-2
lines changed

Lib/test/test_json/test_speedups.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,60 @@ def test(name):
8080
def test_unsortable_keys(self):
8181
with self.assertRaises(TypeError):
8282
self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'})
83+
84+
def test_indent_argument_to_encoder(self):
85+
# gh-143196: indent must be str, int, or None
86+
# int is converted to spaces
87+
enc = self.json.encoder.c_make_encoder(
88+
None, lambda obj: obj, lambda obj: obj,
89+
4, ':', ', ', False, False, False,
90+
)
91+
result = enc({'a': 1}, 0)
92+
self.assertIn(' ', result[0]) # 4 spaces
93+
94+
# Negative int should raise ValueError
95+
with self.assertRaisesRegex(
96+
ValueError,
97+
r'make_encoder\(\) argument 4 must be a non-negative int',
98+
):
99+
self.json.encoder.c_make_encoder(None, None, None, -1, ': ', ', ',
100+
False, False, False)
101+
102+
# Other types should raise TypeError
103+
with self.assertRaisesRegex(
104+
TypeError,
105+
r'make_encoder\(\) argument 4 must be str, int, or None, not list',
106+
):
107+
self.json.encoder.c_make_encoder(None, None, None, [' '],
108+
': ', ', ', False, False, False)
109+
110+
def test_nonzero_indent_level_with_indent(self):
111+
# gh-143196: _current_indent_level must be 0 when indent is set
112+
# This prevents heap-buffer-overflow from uninitialized cache access
113+
# and also prevents re-entrant __mul__ attacks since PySequence_Repeat
114+
# is only called when indent_level != 0
115+
enc = self.json.encoder.c_make_encoder(
116+
None, lambda obj: obj, lambda obj: obj,
117+
' ', ':', ', ', False, False, False,
118+
)
119+
# indent_level=0 should work
120+
enc([None], 0)
121+
# indent_level!=0 should raise ValueError
122+
with self.assertRaisesRegex(
123+
ValueError,
124+
r'_current_indent_level must be 0 when indent is set',
125+
):
126+
enc([None], 1)
127+
128+
# Verify that str subclasses with custom __mul__ are safe because
129+
# __mul__ is never called when indent_level=0
130+
class CustomIndent(str):
131+
def __mul__(self, count):
132+
raise RuntimeError("__mul__ should not be called")
133+
134+
enc2 = self.json.encoder.c_make_encoder(
135+
None, lambda obj: obj, lambda obj: obj,
136+
CustomIndent(' '), ':', ', ', False, False, False,
137+
)
138+
# This should work - __mul__ is not called when indent_level=0
139+
enc2({'a': 1}, 0)

Modules/_json.c

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,14 +1319,47 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
13191319
return NULL;
13201320
}
13211321

1322+
/* Convert indent to str if it's not None or already a string */
1323+
if (indent != Py_None && !PyUnicode_Check(indent)) {
1324+
Py_ssize_t indent_level;
1325+
if (!PyIndex_Check(indent)) {
1326+
PyErr_Format(PyExc_TypeError,
1327+
"make_encoder() argument 4 must be str, int, or None, "
1328+
"not %.200s", Py_TYPE(indent)->tp_name);
1329+
return NULL;
1330+
}
1331+
indent_level = PyNumber_AsSsize_t(indent, PyExc_ValueError);
1332+
if (indent_level == -1 && PyErr_Occurred()) {
1333+
return NULL;
1334+
}
1335+
if (indent_level < 0) {
1336+
PyErr_SetString(PyExc_ValueError,
1337+
"make_encoder() argument 4 must be a non-negative int");
1338+
return NULL;
1339+
}
1340+
/* Create a string of spaces: ' ' * indent_level */
1341+
indent = PyUnicode_New(indent_level, ' ');
1342+
if (indent == NULL) {
1343+
return NULL;
1344+
}
1345+
if (indent_level > 0) {
1346+
memset(PyUnicode_1BYTE_DATA(indent), ' ', indent_level);
1347+
}
1348+
}
1349+
else {
1350+
Py_INCREF(indent);
1351+
}
1352+
13221353
s = (PyEncoderObject *)type->tp_alloc(type, 0);
1323-
if (s == NULL)
1354+
if (s == NULL) {
1355+
Py_DECREF(indent);
13241356
return NULL;
1357+
}
13251358

13261359
s->markers = Py_NewRef(markers);
13271360
s->defaultfn = Py_NewRef(defaultfn);
13281361
s->encoder = Py_NewRef(encoder);
1329-
s->indent = Py_NewRef(indent);
1362+
s->indent = indent; /* Already incref'd or newly created */
13301363
s->key_separator = Py_NewRef(key_separator);
13311364
s->item_separator = Py_NewRef(item_separator);
13321365
s->sort_keys = sort_keys;
@@ -1453,6 +1486,12 @@ encoder_call(PyObject *op, PyObject *args, PyObject *kwds)
14531486

14541487
PyObject *indent_cache = NULL;
14551488
if (self->indent != Py_None) {
1489+
if (indent_level != 0) {
1490+
PyErr_SetString(PyExc_ValueError,
1491+
"_current_indent_level must be 0 when indent is set");
1492+
PyUnicodeWriter_Discard(writer);
1493+
return NULL;
1494+
}
14561495
indent_cache = create_indent_cache(self, indent_level);
14571496
if (indent_cache == NULL) {
14581497
PyUnicodeWriter_Discard(writer);

0 commit comments

Comments
 (0)