From 2b5be5f8920929b5259f7950e33adcdeb5768960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 26 Apr 2025 15:48:25 +0200 Subject: [PATCH 1/7] fix UAF in `xml.etree.ElementTree.Element.__deepcopy__` --- Lib/test/test_xml_etree.py | 18 ++++++++++++++ ...-04-26-15-50-12.gh-issue-133009.etBuz5.rst | 3 +++ Modules/_elementtree.c | 24 +++++++++++++++---- 3 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 5fe9d6884106ad..07c97e5cb566d4 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2960,6 +2960,24 @@ def element_factory(x, y): del b gc_collect() + def test_deepcopy(self): + # Prevent crashes when __deepcopy__() mutates the element being copied. + # See https://github.com/python/cpython/issues/133009. + class X(ET.Element): + def __deepcopy__(self, memo): + root.clear() + return self + + root = ET.Element('a') + evil = X('x') + root.extend([evil, ET.Element('y')]) + if is_python_implementation(): + self.assertRaises(RuntimeError, copy.deepcopy, root) + else: + c = copy.deepcopy(root) + # In the C implementation, we can still copy the evil element. + self.assertListEqual(list(c), [evil]) + class MutationDeleteElementPath(str): def __new__(cls, elem, *args): diff --git a/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst new file mode 100644 index 00000000000000..1f7155c6a40d00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-26-15-50-12.gh-issue-133009.etBuz5.rst @@ -0,0 +1,3 @@ +:mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.__deepcopy__ +` when the element is concurrently mutated. +Patch by Bénédikt Tran. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 24b567b6caa260..4149f8494ebbc0 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -813,11 +813,16 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); + // Since the 'tag' attribute is undeletable, deepcopy() should be safe. tag = deepcopy(st, self->tag, memo); + assert(self->tag != NULL); if (!tag) return NULL; if (self->extra && self->extra->attrib) { + // While deepcopy(self->extra->attrib) may cause 'self->extra' to be + // NULL, we do not use it afterawrds without checking this, so no need + // to temporarily incref 'self->extra->attrib'. attrib = deepcopy(st, self->extra->attrib, memo); if (!attrib) { Py_DECREF(tag); @@ -835,12 +840,16 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (!element) return NULL; + // Since the 'text' attribute is undeletable, deepcopy() should be safe. text = deepcopy(st, JOIN_OBJ(self->text), memo); + assert(JOIN_OBJ(self->text) != NULL); if (!text) goto error; _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); + // Since the 'tail' attribute is undeletable, deepcopy() should be safe. tail = deepcopy(st, JOIN_OBJ(self->tail), memo); + assert(JOIN_OBJ(self->tail) != NULL); if (!tail) goto error; _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); @@ -850,9 +859,11 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (element_resize(element, self->extra->length) < 0) goto error; - // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' - for (i = 0; i < self->extra->length; i++) { - PyObject* child = deepcopy(st, self->extra->children[i], memo); + // TODO(picnixz): should we protect against mutations from 'memo'? + for (i = 0; self->extra && i < self->extra->length; i++) { + PyObject* itemi = Py_NewRef(self->extra->children[i]); + PyObject* child = deepcopy(st, itemi, memo); + Py_DECREF(itemi); if (!child || !Element_Check(st, child)) { if (child) { raise_type_error(child); @@ -865,7 +876,12 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) } assert(!element->extra->length); - element->extra->length = self->extra->length; + /* + * The original 'self->extra' may be gone at this point if deepcopy() + * had side-effects. However, 'i' is the number of copied items that + * we were able to successfully copy. + */ + element->extra->length = i; } /* add object to memo dictionary (so deepcopy won't visit it again) */ From 721481d75c2700fe548df8a52a20d1cef75173a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Apr 2025 11:48:26 +0200 Subject: [PATCH 2/7] always protect against possible mutations in `__deepcopy__` --- Modules/_elementtree.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 4149f8494ebbc0..63945fa26adfa0 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -805,6 +805,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) { Py_ssize_t i; ElementObject* element; + PyObject* tmp; PyObject* tag; PyObject* attrib; PyObject* text; @@ -813,17 +814,17 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); - // Since the 'tag' attribute is undeletable, deepcopy() should be safe. - tag = deepcopy(st, self->tag, memo); + tmp = Py_NewRef(self->tag); + tag = deepcopy(st, tmp, memo); + Py_DECREF(tmp); assert(self->tag != NULL); if (!tag) return NULL; if (self->extra && self->extra->attrib) { - // While deepcopy(self->extra->attrib) may cause 'self->extra' to be - // NULL, we do not use it afterawrds without checking this, so no need - // to temporarily incref 'self->extra->attrib'. + tmp = Py_NewRef(self->extra->attrib); attrib = deepcopy(st, self->extra->attrib, memo); + Py_DECREF(tmp); if (!attrib) { Py_DECREF(tag); return NULL; @@ -840,16 +841,16 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (!element) return NULL; - // Since the 'text' attribute is undeletable, deepcopy() should be safe. - text = deepcopy(st, JOIN_OBJ(self->text), memo); - assert(JOIN_OBJ(self->text) != NULL); + tmp = Py_NewRef(JOIN_OBJ(self->text)); + text = deepcopy(st, tmp, memo); + Py_DECREF(tmp); if (!text) goto error; _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); - // Since the 'tail' attribute is undeletable, deepcopy() should be safe. - tail = deepcopy(st, JOIN_OBJ(self->tail), memo); - assert(JOIN_OBJ(self->tail) != NULL); + tmp = Py_NewRef(JOIN_OBJ(self->tail)); + tail = deepcopy(st, tmp, memo); + Py_DECREF(tmp); if (!tail) goto error; _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); @@ -859,11 +860,10 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (element_resize(element, self->extra->length) < 0) goto error; - // TODO(picnixz): should we protect against mutations from 'memo'? for (i = 0; self->extra && i < self->extra->length; i++) { - PyObject* itemi = Py_NewRef(self->extra->children[i]); - PyObject* child = deepcopy(st, itemi, memo); - Py_DECREF(itemi); + tmp = Py_NewRef(self->extra->children[i]); + PyObject* child = deepcopy(st, tmp, memo); + Py_DECREF(tmp); if (!child || !Element_Check(st, child)) { if (child) { raise_type_error(child); From cef51469dbdb4b54ce3488092acc06b96a357caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Apr 2025 11:48:51 +0200 Subject: [PATCH 3/7] remove bad assertion --- Modules/_elementtree.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 63945fa26adfa0..951253550d8adf 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -817,7 +817,6 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) tmp = Py_NewRef(self->tag); tag = deepcopy(st, tmp, memo); Py_DECREF(tmp); - assert(self->tag != NULL); if (!tag) return NULL; From 8297c8a5508c345424a1bf829806467471262862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 28 Apr 2025 01:12:35 +0200 Subject: [PATCH 4/7] Update Modules/_elementtree.c --- Modules/_elementtree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 951253550d8adf..1b1952858ef40e 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -822,7 +822,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (self->extra && self->extra->attrib) { tmp = Py_NewRef(self->extra->attrib); - attrib = deepcopy(st, self->extra->attrib, memo); + attrib = deepcopy(st, tmp, memo); Py_DECREF(tmp); if (!attrib) { Py_DECREF(tag); From 7f8ed289617d7da1f35ebff579a218ec1ca283be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 28 Apr 2025 02:10:21 +0200 Subject: [PATCH 5/7] use `Py_CLEAR` to be on the safe side --- Modules/_elementtree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 1b1952858ef40e..91cf66ebf007fb 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -816,14 +816,14 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) elementtreestate *st = get_elementtree_state_by_type(tp); tmp = Py_NewRef(self->tag); tag = deepcopy(st, tmp, memo); - Py_DECREF(tmp); + Py_CLEAR(tmp); if (!tag) return NULL; if (self->extra && self->extra->attrib) { tmp = Py_NewRef(self->extra->attrib); attrib = deepcopy(st, tmp, memo); - Py_DECREF(tmp); + Py_CLEAR(tmp); if (!attrib) { Py_DECREF(tag); return NULL; @@ -842,14 +842,14 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) tmp = Py_NewRef(JOIN_OBJ(self->text)); text = deepcopy(st, tmp, memo); - Py_DECREF(tmp); + Py_CLEAR(tmp); if (!text) goto error; _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); tmp = Py_NewRef(JOIN_OBJ(self->tail)); tail = deepcopy(st, tmp, memo); - Py_DECREF(tmp); + Py_CLEAR(tmp); if (!tail) goto error; _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); @@ -862,7 +862,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) for (i = 0; self->extra && i < self->extra->length; i++) { tmp = Py_NewRef(self->extra->children[i]); PyObject* child = deepcopy(st, tmp, memo); - Py_DECREF(tmp); + Py_CLEAR(tmp); if (!child || !Element_Check(st, child)) { if (child) { raise_type_error(child); From c5badac0f046fe7eac207eac8920dbe5aa40d7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 6 May 2025 11:39:36 +0200 Subject: [PATCH 6/7] avoid killing deepcopy() optimization --- Modules/_elementtree.c | 46 ++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 91cf66ebf007fb..153fc838aa9c25 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -805,7 +805,6 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) { Py_ssize_t i; ElementObject* element; - PyObject* tmp; PyObject* tag; PyObject* attrib; PyObject* text; @@ -814,16 +813,14 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); - tmp = Py_NewRef(self->tag); - tag = deepcopy(st, tmp, memo); - Py_CLEAR(tmp); + // The deepcopy() helper takes care of incrementing the refcount + // of the object to copy so to avoid use-after-frees. + tag = deepcopy(st, self->tag, memo); if (!tag) return NULL; if (self->extra && self->extra->attrib) { - tmp = Py_NewRef(self->extra->attrib); - attrib = deepcopy(st, tmp, memo); - Py_CLEAR(tmp); + attrib = deepcopy(st, self->extra->attrib, memo); if (!attrib) { Py_DECREF(tag); return NULL; @@ -840,16 +837,12 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) if (!element) return NULL; - tmp = Py_NewRef(JOIN_OBJ(self->text)); - text = deepcopy(st, tmp, memo); - Py_CLEAR(tmp); + text = deepcopy(st, JOIN_OBJ(self->text), memo); if (!text) goto error; _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); - tmp = Py_NewRef(JOIN_OBJ(self->tail)); - tail = deepcopy(st, tmp, memo); - Py_CLEAR(tmp); + tail = deepcopy(st, JOIN_OBJ(self->tail), memo); if (!tail) goto error; _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); @@ -860,9 +853,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) goto error; for (i = 0; self->extra && i < self->extra->length; i++) { - tmp = Py_NewRef(self->extra->children[i]); - PyObject* child = deepcopy(st, tmp, memo); - Py_CLEAR(tmp); + PyObject* child = deepcopy(st, self->extra->children[i], memo); if (!child || !Element_Check(st, child)) { if (child) { raise_type_error(child); @@ -877,7 +868,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra->length); /* * The original 'self->extra' may be gone at this point if deepcopy() - * had side-effects. However, 'i' is the number of copied items that + * has side-effects. However, 'i' is the number of copied items that * we were able to successfully copy. */ element->extra->length = i; @@ -914,6 +905,8 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) if (Py_REFCNT(object) == 1) { if (PyDict_CheckExact(object)) { + // Exact dictionaries do not execute arbitrary code as it's + // impossible to override their __iter__() method. PyObject *key, *value; Py_ssize_t pos = 0; int simple = 1; @@ -923,13 +916,20 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) break; } } - if (simple) + if (simple) { + // This does not call object.__copy__(), so it's still safe. return PyDict_Copy(object); + } /* Fall through to general case */ } else if (Element_CheckExact(st, object)) { - return _elementtree_Element___deepcopy___impl( - (ElementObject *)object, memo); + // The __deepcopy__() call may call arbitrary code even if the + // object to copy is a built-in XML element (one of its children + // may mutate 'object' in its own __deepcopy__() implementation). + ElementObject *tmp = (ElementObject *)Py_NewRef(object); + PyObject *res = _elementtree_Element___deepcopy___impl(tmp, memo); + Py_DECREF(tmp); + return res; } } @@ -940,8 +940,10 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) return NULL; } - PyObject *args[2] = {object, memo}; - return PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + PyObject *args[2] = {Py_NewRef(object), memo}; + PyObject *res = PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + Py_DECREF(args[0]); + return res; } From d585b098a6545cf0c1444ffb48e89795fd5575d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 9 May 2025 14:42:16 +0200 Subject: [PATCH 7/7] handle more evil mutations --- Lib/test/test_xml_etree.py | 30 ++++++++++++++++++++++++++-- Modules/_elementtree.c | 40 ++++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 07c97e5cb566d4..8f2779520070d2 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2960,8 +2960,8 @@ def element_factory(x, y): del b gc_collect() - def test_deepcopy(self): - # Prevent crashes when __deepcopy__() mutates the element being copied. + def test_deepcopy_clear(self): + # Prevent crashes when __deepcopy__() clears the children list. # See https://github.com/python/cpython/issues/133009. class X(ET.Element): def __deepcopy__(self, memo): @@ -2972,12 +2972,38 @@ def __deepcopy__(self, memo): evil = X('x') root.extend([evil, ET.Element('y')]) if is_python_implementation(): + # Mutating a list over which we iterate raises an error. self.assertRaises(RuntimeError, copy.deepcopy, root) else: c = copy.deepcopy(root) # In the C implementation, we can still copy the evil element. self.assertListEqual(list(c), [evil]) + def test_deepcopy_grow(self): + # Prevent crashes when __deepcopy__() mutates the children list. + # See https://github.com/python/cpython/issues/133009. + a = ET.Element('a') + b = ET.Element('b') + c = ET.Element('c') + + class X(ET.Element): + def __deepcopy__(self, memo): + root.append(a) + root.append(b) + return self + + root = ET.Element('top') + evil1, evil2 = X('1'), X('2') + root.extend([evil1, c, evil2]) + children = list(copy.deepcopy(root)) + # mock deep copies + self.assertIs(children[0], evil1) + self.assertIs(children[2], evil2) + # true deep copies + self.assertEqual(children[1].tag, c.tag) + self.assertEqual([c.tag for c in children[3:]], + [a.tag, b.tag, a.tag, b.tag]) + class MutationDeleteElementPath(str): def __new__(cls, elem, *args): diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index f3493f6466da7c..fe24629f9f6f70 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -847,8 +847,11 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra || !element->extra->length); if (self->extra) { - if (element_resize(element, self->extra->length) < 0) + Py_ssize_t expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + assert(!element->extra->length); goto error; + } for (i = 0; self->extra && i < self->extra->length; i++) { PyObject* child = deepcopy(st, self->extra->children[i], memo); @@ -860,15 +863,23 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) element->extra->length = i; goto error; } + if (self->extra && expected_count != self->extra->length) { + // 'self->extra' got mutated and 'element' may not have + // sufficient space to hold the next iteration's item. + expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + Py_DECREF(child); + element->extra->length = i; + goto error; + } + } element->extra->children[i] = child; } assert(!element->extra->length); - /* - * The original 'self->extra' may be gone at this point if deepcopy() - * has side-effects. However, 'i' is the number of copied items that - * we were able to successfully copy. - */ + // The original 'self->extra' may be gone at this point if deepcopy() + // has side-effects. However, 'i' is the number of copied items that + // we were able to successfully copy. element->extra->length = i; } @@ -903,8 +914,6 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) if (Py_REFCNT(object) == 1) { if (PyDict_CheckExact(object)) { - // Exact dictionaries do not execute arbitrary code as it's - // impossible to override their __iter__() method. PyObject *key, *value; Py_ssize_t pos = 0; int simple = 1; @@ -915,7 +924,6 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) } } if (simple) { - // This does not call object.__copy__(), so it's still safe. return PyDict_Copy(object); } /* Fall through to general case */ @@ -923,10 +931,11 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) else if (Element_CheckExact(st, object)) { // The __deepcopy__() call may call arbitrary code even if the // object to copy is a built-in XML element (one of its children - // may mutate 'object' in its own __deepcopy__() implementation). - ElementObject *tmp = (ElementObject *)Py_NewRef(object); - PyObject *res = _elementtree_Element___deepcopy___impl(tmp, memo); - Py_DECREF(tmp); + // any of its parents in its own __deepcopy__() implementation). + Py_INCREF(object); + PyObject *res = _elementtree_Element___deepcopy___impl( + (ElementObject *)object, memo); + Py_DECREF(object); return res; } } @@ -938,9 +947,10 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) return NULL; } - PyObject *args[2] = {Py_NewRef(object), memo}; + Py_INCREF(object); + PyObject *args[2] = {object, memo}; PyObject *res = PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); - Py_DECREF(args[0]); + Py_DECREF(object); return res; }