Skip to content

Use-after-free in _Py_strhex_impl via re-entrant sep.__len__ in bytearray.hex #143195

@jackfromeast

Description

@jackfromeast

What happened?

_Py_strhex_impl calls PyObject_Length(sep) before hexlifying, and a crafted separator with __len__ that clears the bytearray frees its storage while the loop continues to read from the original buffer, triggering a use-after-free.

Proof of Concept:

t = bytearray(b'\xAA')

class S(bytes):
    def __len__(self):
        t.clear()
        return 1

t.hex(S(b':'))
ba = bytearray(b'A' * 1024)
mv = memoryview(ba)

class BadSep(bytes):
    def __len__(self):
        mv.release()
        ba.clear()
        return 1

mv.hex(BadSep(b':'))
Affected Versions
Python Version Status Exit Code
Python 3.9.24+ (heads/3.9:111bbc15b26, Oct 27 2025, 21:34:13) ASAN 1
Python 3.10.19+ (heads/3.10:014261980b1, Oct 27 2025, 21:19:00) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.11.14+ (heads/3.11:88f3f5b5f11, Oct 27 2025, 21:20:35) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.12.12+ (heads/3.12:8cb2092bd8c, Oct 27 2025, 21:27:07) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.13.9+ (heads/3.13:9c8eade20c6, Oct 27 2025, 21:28:49) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.14.0+ (heads/3.14:2e216728038, Oct 27 2025, 21:30:55) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Python 3.15.0a1+ (heads/main:f5394c257ce, Oct 27 2025, 21:32:37) [Clang 18.1.3 (1ubuntu1)] ASAN 1
Vulnerable Code
static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
                                 PyObject* sep, int bytes_per_sep_group,
                                 const int return_bytes)
{
    assert(arglen >= 0);

    Py_UCS1 sep_char = 0;
    if (sep) {
        // Reentrant Call to obj's __len__ method
        Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
        if (seplen < 0) {
            return NULL;
        }
        if (seplen != 1) {
            PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
            return NULL;
        }
        if (PyUnicode_Check(sep)) {
            if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
                PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
                return NULL;
            }
            sep_char = PyUnicode_READ_CHAR(sep, 0);
        }
        else if (PyBytes_Check(sep)) {
            sep_char = PyBytes_AS_STRING(sep)[0];
        }
        else {
            PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
            return NULL;
        }
        if (sep_char > 127 && !return_bytes) {
            PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
            return NULL;
        }
    }
    else {
        bytes_per_sep_group = 0;
    }

    unsigned int abs_bytes_per_sep = Py_ABS(bytes_per_sep_group);
    Py_ssize_t resultlen = 0;
    if (bytes_per_sep_group && arglen > 0) {
        /* How many sep characters we'll be inserting. */
        resultlen = (arglen - 1) / abs_bytes_per_sep;
    }
    /* Bounds checking for our Py_ssize_t indices. */
    if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
        return PyErr_NoMemory();
    }
    resultlen += arglen * 2;

    if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
        bytes_per_sep_group = 0;
        abs_bytes_per_sep = 0;
    }

    PyObject *retval;
    Py_UCS1 *retbuf;
    if (return_bytes) {
        /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
        retval = PyBytes_FromStringAndSize(NULL, resultlen);
        if (!retval) {
            return NULL;
        }
        retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
    }
    else {
        retval = PyUnicode_New(resultlen, 127);
        if (!retval) {
            return NULL;
        }
        retbuf = PyUnicode_1BYTE_DATA(retval);
    }

    /* Hexlify */
    Py_ssize_t i, j;
    unsigned char c;

    if (bytes_per_sep_group == 0) {
        for (i = j = 0; i < arglen; ++i) {
            assert((j + 1) < resultlen);
            c = argbuf[i];
            retbuf[j++] = Py_hexdigits[c >> 4];
            retbuf[j++] = Py_hexdigits[c & 0x0f];
        }
        assert(j == resultlen);
    }
    else {
        /* The number of complete chunk+sep periods */
        Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
        Py_ssize_t chunk;
        unsigned int k;

        if (bytes_per_sep_group < 0) {
            i = j = 0;
            for (chunk = 0; chunk < chunks; chunk++) {
                for (k = 0; k < abs_bytes_per_sep; k++) {
                    c = argbuf[i++];
                    retbuf[j++] = Py_hexdigits[c >> 4];
                    retbuf[j++] = Py_hexdigits[c & 0x0f];
                }
                retbuf[j++] = sep_char;
            }
            while (i < arglen) {
                c = argbuf[i++];
                retbuf[j++] = Py_hexdigits[c >> 4];
                retbuf[j++] = Py_hexdigits[c & 0x0f];
            }
            assert(j == resultlen);
        }
        else {
            i = arglen - 1;
            j = resultlen - 1;
            for (chunk = 0; chunk < chunks; chunk++) {
                for (k = 0; k < abs_bytes_per_sep; k++) {
                    // Crash: argbuf has been freed
                    c = argbuf[i--];
                    retbuf[j--] = Py_hexdigits[c & 0x0f];
                    retbuf[j--] = Py_hexdigits[c >> 4];
                }
                retbuf[j--] = sep_char;
            }
            while (i >= 0) {
                c = argbuf[i--];
                retbuf[j--] = Py_hexdigits[c & 0x0f];
                retbuf[j--] = Py_hexdigits[c >> 4];
            }
            assert(j == -1);
        }
    }

#ifdef Py_DEBUG
    if (!return_bytes) {
        assert(_PyUnicode_CheckConsistency(retval, 1));
    }
#endif

    return retval;
}
Sanitizer Output
==3528017==ERROR: AddressSanitizer: heap-use-after-free on address 0x51900003838f at pc 0x5d3d20ff03ec bp 0x7ffc59d34240 sp 0x7ffc59d34230
READ of size 1 at 0x51900003838f thread T0
    #0 0x5d3d20ff03eb in _Py_strhex_impl Python/pystrhex.c:122
    #1 0x5d3d20ff075d in _Py_strhex_with_sep Python/pystrhex.c:163
    #2 0x5d3d20bd3e00 in bytearray_hex_impl Objects/bytearrayobject.c:2534
    #3 0x5d3d20bd3f94 in bytearray_hex Objects/clinic/bytearrayobject.c.h:1714
    #4 0x5d3d20c17703 in method_vectorcall_FASTCALL_KEYWORDS Objects/descrobject.c:421
    #5 0x5d3d20bf7f19 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #6 0x5d3d20bf800c in PyObject_Vectorcall Objects/call.c:327
    #7 0x5d3d20e7628e in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #8 0x5d3d20eba08c in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #9 0x5d3d20eba380 in _PyEval_Vector Python/ceval.c:2001
    #10 0x5d3d20eba630 in PyEval_EvalCode Python/ceval.c:884
    #11 0x5d3d20fb183c in run_eval_code_obj Python/pythonrun.c:1365
    #12 0x5d3d20fb1a58 in run_mod Python/pythonrun.c:1459
    #13 0x5d3d20fb28af in pyrun_file Python/pythonrun.c:1293
    #14 0x5d3d20fb5555 in _PyRun_SimpleFileObject Python/pythonrun.c:521
    #15 0x5d3d20fb582b in _PyRun_AnyFileObject Python/pythonrun.c:81
    #16 0x5d3d21006a82 in pymain_run_file_obj Modules/main.c:410
    #17 0x5d3d21006ce9 in pymain_run_file Modules/main.c:429
    #18 0x5d3d210084e7 in pymain_run_python Modules/main.c:691
    #19 0x5d3d21008b77 in Py_RunMain Modules/main.c:772
    #20 0x5d3d21008d63 in pymain_main Modules/main.c:802
    #21 0x5d3d210090e8 in Py_BytesMain Modules/main.c:826
    #22 0x5d3d20a8c655 in main Programs/python.c:15
    #23 0x74a66442a1c9 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58
    #24 0x74a66442a28a in __libc_start_main_impl ../csu/libc-start.c:360
    #25 0x5d3d20a8c584 in _start (/home/jackfromeast/Desktop/entropy/targets/cpythonxx/3.15/python+0x2df584) (BuildId: f7e252f8868b92f2840a64868d70018a726f8bd2)

0x51900003838f is located 1039 bytes inside of 1049-byte region [0x519000037f80,0x519000038399)
freed by thread T0 here:
    #0 0x74a6648fc778 in realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:85
    #1 0x5d3d20cbf3b9 in _PyMem_RawRealloc Objects/obmalloc.c:85
    #2 0x5d3d20cc15cb in _PyMem_DebugRawRealloc Objects/obmalloc.c:3010
    #3 0x5d3d20cc190a in _PyMem_DebugRealloc Objects/obmalloc.c:3108
    #4 0x5d3d20ce83ce in PyMem_Realloc Objects/obmalloc.c:1063
    #5 0x5d3d20bd0c9e in bytearray_resize_lock_held Objects/bytearrayobject.c:258
    #6 0x5d3d20bde2e6 in PyByteArray_Resize Objects/bytearrayobject.c:278
    #7 0x5d3d20be0028 in bytearray_clear_impl Objects/bytearrayobject.c:1260
    #8 0x5d3d20be0049 in bytearray_clear Objects/clinic/bytearrayobject.c.h:227
    #9 0x5d3d20c1760b in method_vectorcall_NOARGS Objects/descrobject.c:448
    #10 0x5d3d20bf7f19 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #11 0x5d3d20bf800c in PyObject_Vectorcall Objects/call.c:327
    #12 0x5d3d20e7628e in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #13 0x5d3d20eba08c in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #14 0x5d3d20eba380 in _PyEval_Vector Python/ceval.c:2001
    #15 0x5d3d20bf7a52 in _PyFunction_Vectorcall Objects/call.c:413
    #16 0x5d3d20d0a652 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #17 0x5d3d20d0a767 in vectorcall_unbound Objects/typeobject.c:3033
    #18 0x5d3d20d2ba5d in vectorcall_method Objects/typeobject.c:3104
    #19 0x5d3d20d2c302 in slot_sq_length Objects/typeobject.c:10279
    #20 0x5d3d20bc889f in PyObject_Size Objects/abstract.c:66
    #21 0x5d3d20fefc16 in _Py_strhex_impl Python/pystrhex.c:15
    #22 0x5d3d20ff075d in _Py_strhex_with_sep Python/pystrhex.c:163
    #23 0x5d3d20bd3e00 in bytearray_hex_impl Objects/bytearrayobject.c:2534
    #24 0x5d3d20bd3f94 in bytearray_hex Objects/clinic/bytearrayobject.c.h:1714
    #25 0x5d3d20c17703 in method_vectorcall_FASTCALL_KEYWORDS Objects/descrobject.c:421
    #26 0x5d3d20bf7f19 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:169
    #27 0x5d3d20bf800c in PyObject_Vectorcall Objects/call.c:327
    #28 0x5d3d20e7628e in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #29 0x5d3d20eba08c in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121

previously allocated by thread T0 here:
    #0 0x74a6648fd9c7 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
    #1 0x5d3d20cbf36b in _PyMem_RawMalloc Objects/obmalloc.c:63
    #2 0x5d3d20cbe73c in _PyMem_DebugRawAlloc Objects/obmalloc.c:2887
    #3 0x5d3d20cc1736 in _PyMem_DebugRawRealloc Objects/obmalloc.c:2963
    #4 0x5d3d20cc190a in _PyMem_DebugRealloc Objects/obmalloc.c:3108
    #5 0x5d3d20ce83ce in PyMem_Realloc Objects/obmalloc.c:1063
    #6 0x5d3d20bd0c9e in bytearray_resize_lock_held Objects/bytearrayobject.c:258
    #7 0x5d3d20bde2e6 in PyByteArray_Resize Objects/bytearrayobject.c:278
    #8 0x5d3d20bde8ea in bytearray___init___impl Objects/bytearrayobject.c:978
    #9 0x5d3d20bdf363 in bytearray___init__ Objects/clinic/bytearrayobject.c.h:102
    #10 0x5d3d20d1e4a7 in type_call Objects/typeobject.c:2460
    #11 0x5d3d20bf7d0b in _PyObject_MakeTpCall Objects/call.c:242
    #12 0x5d3d20bf7fb3 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:167
    #13 0x5d3d20bf800c in PyObject_Vectorcall Objects/call.c:327
    #14 0x5d3d20e7628e in _PyEval_EvalFrameDefault Python/generated_cases.c.h:1620
    #15 0x5d3d20eba08c in _PyEval_EvalFrame Include/internal/pycore_ceval.h:121
    #16 0x5d3d20eba380 in _PyEval_Vector Python/ceval.c:2001
    #17 0x5d3d20eba630 in PyEval_EvalCode Python/ceval.c:884
    #18 0x5d3d20fb183c in run_eval_code_obj Python/pythonrun.c:1365
    #19 0x5d3d20fb1a58 in run_mod Python/pythonrun.c:1459
    #20 0x5d3d20fb28af in pyrun_file Python/pythonrun.c:1293
    #21 0x5d3d20fb5555 in _PyRun_SimpleFileObject Python/pythonrun.c:521
    #22 0x5d3d20fb582b in _PyRun_AnyFileObject Python/pythonrun.c:81
    #23 0x5d3d21006a82 in pymain_run_file_obj Modules/main.c:410
    #24 0x5d3d21006ce9 in pymain_run_file Modules/main.c:429
    #25 0x5d3d210084e7 in pymain_run_python Modules/main.c:691
    #26 0x5d3d21008b77 in Py_RunMain Modules/main.c:772
    #27 0x5d3d21008d63 in pymain_main Modules/main.c:802
    #28 0x5d3d210090e8 in Py_BytesMain Modules/main.c:826
    #29 0x5d3d20a8c655 in main Programs/python.c:15

SUMMARY: AddressSanitizer: heap-use-after-free Python/pystrhex.c:122 in _Py_strhex_impl
Shadow bytes around the buggy address:
  0x519000038100: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x519000038180: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x519000038200: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x519000038280: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  0x519000038300: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
=>0x519000038380: fd[fd]fd fd fa fa fa fa fa fa fa fa fa fa fa fa
  0x519000038400: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x519000038480: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x519000038500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x519000038580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x519000038600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==3528017==ABORTING

Linked PRs

Metadata

Metadata

Assignees

Labels

interpreter-core(Objects, Python, Grammar, and Parser dirs)type-crashA hard crash of the interpreter, possibly with a core dump

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions