diff --git a/pytests/test_dag_cbor.py b/pytests/test_dag_cbor.py index b5d1dd1..9157d29 100644 --- a/pytests/test_dag_cbor.py +++ b/pytests/test_dag_cbor.py @@ -137,6 +137,15 @@ def test_recursion_limit_exceed_on_nested_maps() -> None: assert 'in DAG-CBOR decoding' in str(exc_info.value) + +def test_dag_cbor_decode_invalid_utf8() -> None: + with pytest.raises(ValueError) as exc_info: + libipld.decode_dag_cbor(bytes.fromhex('62c328')) + + + assert 'Invalid UTF-8 string' in str(exc_info.value) + + def test_dab_cbor_decode_map_int_key() -> None: dag_cbor = bytes.fromhex('a10000') with pytest.raises(ValueError) as exc_info: @@ -151,3 +160,4 @@ def test_dab_cbor_encode_map_int_key() -> None: libipld.encode_dag_cbor(obj) assert 'Map keys must be strings' in str(exc_info.value) + diff --git a/src/lib.rs b/src/lib.rs index 483c906..0e2e511 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -102,11 +102,13 @@ fn get_bytes_from_py_any<'py>(obj: &'py Bound<'py, PyAny>) -> PyResult<&'py [u8] } } -fn string_new_bound<'py>(py: Python<'py>, s: &[u8]) -> Bound<'py, PyString> { +fn string_new_bound<'py>(py: Python<'py>, s: &[u8]) -> Result> { + std::str::from_utf8(s).map_err(|e| anyhow!("Invalid UTF-8 string: {}", e))?; + let ptr = s.as_ptr() as *const c_char; let len = s.len() as ffi::Py_ssize_t; unsafe { - Bound::from_owned_ptr(py, ffi::PyUnicode_FromStringAndSize(ptr, len)).downcast_into_unchecked() + Ok(Bound::from_owned_ptr(py, ffi::PyUnicode_FromStringAndSize(ptr, len)).downcast_into_unchecked()) } } @@ -135,7 +137,7 @@ fn decode_dag_cbor_to_pyobject( } MajorKind::TextString => { let len = decode::read_uint(r, major)?; - string_new_bound(py, &decode::read_bytes(r, len)?).into_pyobject(py)?.into() + string_new_bound(py, &decode::read_bytes(r, len)?)?.into_pyobject(py)?.into() } MajorKind::Array => { let len: ffi::Py_ssize_t = decode_len(decode::read_uint(r, major)?)?.try_into()?; @@ -173,7 +175,7 @@ fn decode_dag_cbor_to_pyobject( } } - let key_py = string_new_bound(py, key.as_slice()).into_pyobject(py)?; + let key_py = string_new_bound(py, key.as_slice())?.into_pyobject(py)?; prev_key = Some(key); let value_py = decode_dag_cbor_to_pyobject(py, r, depth + 1)?;