diff --git a/Cargo.toml b/Cargo.toml index 01e1dbb..998a1d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libipld" -version = "3.3.1" +version = "3.3.2" edition = "2021" license = "MIT" description = "Python binding to the Rust IPLD library" diff --git a/pytests/test_dag_cbor.py b/pytests/test_dag_cbor.py index d2b5046..5c2bff1 100644 --- a/pytests/test_dag_cbor.py +++ b/pytests/test_dag_cbor.py @@ -267,3 +267,13 @@ def test_encode_tag_negative_bignum() -> None: libipld.encode_dag_cbor(bignum) assert 'number out of range' in str(exc_info.value).lower() + + +def test_roundtrip_valid_cid_with_short_tag() -> None: + encoded_hex = 'd82a582500015512205891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03' + encoded_bytes = bytes.fromhex(encoded_hex) + + decoded = libipld.decode_dag_cbor(encoded_bytes) + encoded = libipld.encode_dag_cbor(decoded) + + assert encoded == encoded_bytes diff --git a/src/lib.rs b/src/lib.rs index 8b19c16..0dfbff2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,6 +64,18 @@ impl<'de> dec::Read<'de> for SliceReader<'de> { } } +struct PrefixedCidBytes<'a>(&'a [u8]); + +impl<'a> Encode for PrefixedCidBytes<'a> { + fn encode(&self, w: &mut W) -> Result<(), enc::Error> { + // length prefix for bytes: 1 (leading 0) + payload + types::Bytes::bounded(1 + self.0.len(), w)?; + w.push(&[0x00])?; + w.push(self.0)?; + Ok(()) + } +} + fn cid_hash_to_pydict<'py>(py: Python<'py>, cid: &Cid) -> PyResult> { let hash = cid.hash(); let dict_obj = PyDict::new(py); @@ -251,14 +263,19 @@ where let cid = >::decode(r)?.0; - if cid.len() <= 1 { - return Err(anyhow!("CID is empty or too short")); - } else if Cid::try_from(&cid[1..]).is_err() { - // Parse the CID for validation. They have a zero byte at the front, strip it off. + // we expect CIDs to have a leading zero byte + if cid.len() <= 1 || cid[0] != 0 { + return Err(anyhow!("Invalid CID")); + } + + let cid_without_prefix = &cid[1..]; + if Cid::try_from(cid_without_prefix).is_err() { return Err(anyhow!("Invalid CID")); } - PyBytes::new(py, cid).into_pyobject(py)?.into() + PyBytes::new(py, cid_without_prefix) + .into_pyobject(py)? + .into() } major::SIMPLE => match byte { // FIXME(MarshalX): should be more clear for bool? @@ -384,11 +401,13 @@ where Ok(()) } else if let Ok(b) = obj.cast::() { // FIXME (MarshalX): it's not efficient to try to parse it as CID - let cid = Cid::try_from(b.as_bytes()); + let bytes = b.as_bytes(); + let cid = Cid::try_from(bytes); if cid.is_ok() { - types::Tag(42, b.as_bytes()).encode(w)?; + // by providing custom encoding we avoid extra allocation + types::Tag(42, PrefixedCidBytes(bytes)).encode(w)?; } else { - types::Bytes(b.as_bytes()).encode(w)?; + types::Bytes(bytes).encode(w)?; } Ok(())