From 469f3c7b5c0e344265cf989615f1afb569820391 Mon Sep 17 00:00:00 2001 From: "Ilya (Marshal)" Date: Tue, 17 Jun 2025 22:50:44 +0200 Subject: [PATCH] Fix panic on non UTF-8 string --- pytests/test_dag_cbor.py | 8 ++++++++ src/lib.rs | 10 ++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pytests/test_dag_cbor.py b/pytests/test_dag_cbor.py index f099e3f..44dfcf7 100644 --- a/pytests/test_dag_cbor.py +++ b/pytests/test_dag_cbor.py @@ -135,3 +135,11 @@ def test_recursion_limit_exceed_on_nested_maps() -> None: libipld.decode_dag_cbor(dag_cbor) assert 'in DAG-CBOR decoding' in str(exc_info.value) + + +def test_dag_cbor_decode_invalid_utf8() -> None: + with pytest.raises(ValueError) as exc_info: + libipld.decode_dag_cbor(bytes.fromhex('62c328')) + + + assert 'Invalid UTF-8 string' in str(exc_info.value) diff --git a/src/lib.rs b/src/lib.rs index 290eb2d..de050ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,11 +96,13 @@ fn get_bytes_from_py_any<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8] } } -fn string_new_bound<'py>(py: Python<'py>, s: &[u8]) -> Bound<'py, PyString> { +fn string_new_bound<'py>(py: Python<'py>, s: &[u8]) -> Result> { + std::str::from_utf8(s).map_err(|e| anyhow!("Invalid UTF-8 string: {}", e))?; + let ptr = s.as_ptr() as *const c_char; let len = s.len() as ffi::Py_ssize_t; unsafe { - Bound::from_owned_ptr(py, ffi::PyUnicode_FromStringAndSize(ptr, len)).downcast_into_unchecked() + Ok(Bound::from_owned_ptr(py, ffi::PyUnicode_FromStringAndSize(ptr, len)).downcast_into_unchecked()) } } @@ -129,7 +131,7 @@ fn decode_dag_cbor_to_pyobject( } MajorKind::TextString => { let len = decode::read_uint(r, major)?; - string_new_bound(py, &decode::read_bytes(r, len)?).into_pyobject(py)?.into() + string_new_bound(py, &decode::read_bytes(r, len)?)?.into_pyobject(py)?.into() } MajorKind::Array => { let len: ffi::Py_ssize_t = decode_len(decode::read_uint(r, major)?)?.try_into()?; @@ -167,7 +169,7 @@ fn decode_dag_cbor_to_pyobject( } } - let key_py = string_new_bound(py, key.as_slice()).into_pyobject(py)?; + let key_py = string_new_bound(py, key.as_slice())?.into_pyobject(py)?; prev_key = Some(key); let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?;