diff --git a/vortex-error/Cargo.toml b/vortex-error/Cargo.toml index 859183b1bdd..88d9850bfbc 100644 --- a/vortex-error/Cargo.toml +++ b/vortex-error/Cargo.toml @@ -18,7 +18,6 @@ all-features = true [features] flatbuffers = ["dep:flatbuffers"] -python = ["dep:pyo3"] serde = ["dep:serde_json"] [dependencies] diff --git a/vortex-error/src/lib.rs b/vortex-error/src/lib.rs index 85c83782416..51d0264d13b 100644 --- a/vortex-error/src/lib.rs +++ b/vortex-error/src/lib.rs @@ -6,9 +6,6 @@ //! This crate defines error & result types for Vortex. //! It also contains a variety of useful macros for error handling. -#[cfg(feature = "python")] -pub mod python; - use std::backtrace::Backtrace; use std::borrow::Cow; use std::convert::Infallible; diff --git a/vortex-error/src/python.rs b/vortex-error/src/python.rs deleted file mode 100644 index 6d3dad0af5c..00000000000 --- a/vortex-error/src/python.rs +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Python bindings for Vortex errors. - -use std::backtrace::Backtrace; - -use pyo3::PyErr; -#[cfg(feature = "python")] -use pyo3::exceptions::PyValueError; - -use crate::VortexError; - -impl From for PyErr { - fn from(value: VortexError) -> Self { - PyValueError::new_err(value.to_string()) - } -} - -impl From for VortexError { - fn from(value: PyErr) -> Self { - VortexError::InvalidArgument(value.to_string().into(), Box::new(Backtrace::capture())) - } -} diff --git a/vortex-python/Cargo.toml b/vortex-python/Cargo.toml index 5f9a85d0835..47986dc7fb1 100644 --- a/vortex-python/Cargo.toml +++ b/vortex-python/Cargo.toml @@ -43,7 +43,7 @@ tokio = { workspace = true, features = ["fs", "rt-multi-thread"] } # This feature makes the underlying tracing logs to be emitted as `log` events tracing = { workspace = true, features = ["std", "log"] } url = { workspace = true } -vortex = { workspace = true, features = ["object_store", "python", "tokio"] } +vortex = { workspace = true, features = ["object_store", "tokio"] } [dev-dependencies] vortex-array = { workspace = true, features = ["_test-harness"] } diff --git a/vortex-python/src/arrays/builtins/struct_.rs b/vortex-python/src/arrays/builtins/struct_.rs index 53547a25132..0098948eb3d 100644 --- a/vortex-python/src/arrays/builtins/struct_.rs +++ b/vortex-python/src/arrays/builtins/struct_.rs @@ -12,6 +12,7 @@ use crate::arrays::PyArrayRef; use crate::arrays::native::AsArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; +use crate::error::PyVortexError; /// Concrete class for arrays with `vortex.struct` encoding. #[pyclass(name = "StructArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -24,7 +25,7 @@ impl EncodingSubclass for PyStructArray { #[pymethods] impl PyStructArray { /// Returns the given field of the struct array. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyResult { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result { let field = self_.as_array_ref().field_by_name(name)?.clone(); Ok(PyArrayRef::from(field)) } diff --git a/vortex-python/src/arrays/compressed.rs b/vortex-python/src/arrays/compressed.rs index c294d7ee5c0..ba68a56b834 100644 --- a/vortex-python/src/arrays/compressed.rs +++ b/vortex-python/src/arrays/compressed.rs @@ -19,6 +19,7 @@ use crate::PyVortex; use crate::arrays::PyArrayRef; use crate::arrays::native::EncodingSubclass; use crate::arrays::native::PyNativeArray; +use crate::error::PyVortexError; /// Concrete class for arrays with `vortex.alp` encoding. #[pyclass(name = "AlpArray", module = "vortex", extends=PyNativeArray, frozen)] @@ -87,7 +88,7 @@ impl EncodingSubclass for PyZigZagArray { #[pymethods] impl PyZigZagArray { #[staticmethod] - pub fn encode(array: PyArrayRef) -> PyResult { + pub fn encode(array: PyArrayRef) -> Result { Ok(PyVortex( zigzag_encode(array.inner().clone().to_primitive())?.into_array(), )) diff --git a/vortex-python/src/arrays/from_arrow.rs b/vortex-python/src/arrays/from_arrow.rs index 0578cd9f0bc..14a57db5940 100644 --- a/vortex-python/src/arrays/from_arrow.rs +++ b/vortex-python/src/arrays/from_arrow.rs @@ -7,7 +7,6 @@ use arrow_array::make_array; use arrow_data::ArrayData as ArrowArrayData; use arrow_schema::DataType; use arrow_schema::Field; -use itertools::Itertools; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::ArrayRef; @@ -17,13 +16,13 @@ use vortex::array::arrow::FromArrowArray; use vortex::dtype::DType; use vortex::dtype::arrow::FromArrowType; use vortex::error::VortexError; -use vortex::error::VortexResult; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; +use crate::error::PyVortexError; /// Convert an Arrow object to a Vortex array. -pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyResult { +pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> Result { let pa = obj.py().import("pyarrow")?; let pa_array = pa.getattr("Array")?; let chunked_array = pa.getattr("ChunkedArray")?; @@ -56,15 +55,14 @@ pub(super) fn from_arrow(obj: &Borrowed<'_, '_, PyAny>) -> PyResult let dtype = DType::from_arrow(array_stream.schema()); let chunks = array_stream .into_iter() - .map(|b| b.map_err(VortexError::from)) - .map_ok(|b| ArrayRef::from_arrow(b, false)) - .collect::>>()?; + .map(|b| -> Result<_, PyVortexError> { + Ok(ArrayRef::from_arrow(b.map_err(VortexError::from)?, false)) + }) + .collect::, _>>()?; Ok(PyArrayRef::from( ChunkedArray::try_new(chunks, dtype)?.into_array(), )) } else { - Err(PyValueError::new_err( - "Cannot convert object to Vortex array", - )) + Err(PyValueError::new_err("Cannot convert object to Vortex array").into()) } } diff --git a/vortex-python/src/arrays/into_array.rs b/vortex-python/src/arrays/into_array.rs index 83e42794463..65c32bf1ceb 100644 --- a/vortex-python/src/arrays/into_array.rs +++ b/vortex-python/src/arrays/into_array.rs @@ -9,6 +9,7 @@ use pyo3::Borrowed; use pyo3::FromPyObject; use pyo3::PyAny; use pyo3::PyErr; +use pyo3::exceptions::PyRuntimeError; use pyo3::exceptions::PyTypeError; use pyo3::types::PyAnyMethods; use vortex::array::ArrayRef; @@ -17,6 +18,7 @@ use vortex::array::iter::ArrayIteratorAdapter; use vortex::array::iter::ArrayIteratorExt; use vortex::dtype::DType; use vortex::dtype::arrow::FromArrowType as _; +use vortex::error::VortexError; use vortex::error::VortexResult; use crate::PyVortex; @@ -64,9 +66,14 @@ impl<'py> FromPyObject<'_, 'py> for PyIntoArray { let vortex_iter = arrow_stream .into_iter() .map(|batch_result| -> VortexResult<_> { - Ok(ArrayRef::from_arrow(batch_result?, false)) + Ok(ArrayRef::from_arrow( + batch_result.map_err(VortexError::from)?, + false, + )) }); - let array = ArrayIteratorAdapter::new(dtype, vortex_iter).read_all()?; + let array = ArrayIteratorAdapter::new(dtype, vortex_iter) + .read_all() + .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; return Ok(PyIntoArray(PyVortex(array))); } diff --git a/vortex-python/src/arrays/mod.rs b/vortex-python/src/arrays/mod.rs index d33af824064..6a9a907a5e7 100644 --- a/vortex-python/src/arrays/mod.rs +++ b/vortex-python/src/arrays/mod.rs @@ -33,7 +33,6 @@ use vortex::dtype::DType; use vortex::dtype::Nullability; use vortex::dtype::PType; use vortex::dtype::match_each_integer_ptype; -use vortex::error::VortexError; use vortex::ipc::messages::EncoderMessage; use vortex::ipc::messages::MessageEncoder; @@ -43,6 +42,7 @@ use crate::arrays::py::PyPythonArray; use crate::arrays::py::PythonArray; use crate::arrow::ToPyArrow; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::python_repr::PythonRepr; use crate::scalar::PyScalar; @@ -112,7 +112,7 @@ impl<'py> FromPyObject<'_, 'py> for PyArrayRef { impl<'py> IntoPyObject<'py> for PyArrayRef { type Target = PyAny; type Output = Bound<'py, PyAny>; - type Error = VortexError; + type Error = PyVortexError; fn into_pyobject(self, py: Python<'py>) -> Result { // If the ArrayRef is a PyArrayInstance, extract the Python object. @@ -214,7 +214,7 @@ impl PyArray { /// ------- /// :class:`~vortex.Array` #[staticmethod] - fn from_arrow(obj: Bound<'_, PyAny>) -> PyResult { + fn from_arrow(obj: Bound<'_, PyAny>) -> Result { from_arrow::from_arrow(&obj.as_borrowed()) } @@ -257,7 +257,10 @@ impl PyArray { /// ``` #[staticmethod] #[pyo3(signature = (range, *, dtype = None))] - fn from_range(range: Bound, dtype: Option>) -> PyResult { + fn from_range( + range: Bound, + dtype: Option>, + ) -> Result { let range = range.cast::()?; let start = range.start()?; let stop = range.stop()?; @@ -268,7 +271,8 @@ impl PyArray { let DType::Primitive(ptype, ..) = &dtype else { return Err(PyValueError::new_err( "Cannot construct non-numeric array from a range.", - )); + ) + .into()); }; (*ptype, dtype) } else { @@ -313,7 +317,9 @@ impl PyArray { /// ] /// ``` /// - fn to_arrow_array<'py>(self_: &'py Bound<'py, Self>) -> PyResult> { + fn to_arrow_array<'py>( + self_: &'py Bound<'py, Self>, + ) -> Result, PyVortexError> { // NOTE(ngates): for struct arrays, we could also return a RecordBatchStreamReader. let array = PyArrayRef::extract(self_.as_any().as_borrowed())?.into_inner(); let py = self_.py(); @@ -326,8 +332,10 @@ impl PyArray { let chunks = chunked_array .chunks() .iter() - .map(|chunk| PyResult::Ok(chunk.clone().into_arrow(&arrow_dtype)?)) - .collect::>>()?; + .map(|chunk| -> Result<_, PyVortexError> { + Ok(chunk.clone().into_arrow(&arrow_dtype)?) + }) + .collect::, _>>()?; let pa_data_type = arrow_dtype.clone().to_pyarrow(py)?; let chunks = chunks @@ -339,11 +347,11 @@ impl PyArray { PyDict::from_sequence(&PyList::new(py, vec![("type", pa_data_type)])?.into_any())?; // Combine into a chunked array - PyModule::import(py, "pyarrow")?.call_method( + Ok(PyModule::import(py, "pyarrow")?.call_method( "chunked_array", (PyList::new(py, chunks)?,), Some(&kwargs), - ) + )?) } else { Ok(array .clone() @@ -417,42 +425,42 @@ impl PyArray { } ///Rust docs are *not* copied into Python for __lt__: https://github.com/PyO3/pyo3/issues/4326 - fn __lt__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __lt__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&slf, &*other, Operator::Lt)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __le__: https://github.com/PyO3/pyo3/issues/4326 - fn __le__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __le__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Lte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __eq__: https://github.com/PyO3/pyo3/issues/4326 - fn __eq__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __eq__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Eq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ne__: https://github.com/PyO3/pyo3/issues/4326 - fn __ne__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __ne__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::NotEq)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __ge__: https://github.com/PyO3/pyo3/issues/4326 - fn __ge__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __ge__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gte)?; Ok(PyArrayRef::from(inner)) } ///Rust docs are *not* copied into Python for __gt__: https://github.com/PyO3/pyo3/issues/4326 - fn __gt__(slf: Bound, other: PyArrayRef) -> PyResult { + fn __gt__(slf: Bound, other: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let inner = compare(&*slf, &*other, Operator::Gt)?; Ok(PyArrayRef::from(inner)) @@ -486,7 +494,7 @@ impl PyArray { /// 5 /// ] /// ``` - fn filter(slf: Bound, mask: PyArrayRef) -> PyResult { + fn filter(slf: Bound, mask: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); let mask = (&*mask as &dyn Array).to_bool().to_mask_fill_null_false(); let inner = vortex::compute::filter(&*slf, &mask)?; @@ -612,14 +620,15 @@ impl PyArray { /// "a" /// ] /// ``` - fn take(slf: Bound, indices: PyArrayRef) -> PyResult { + fn take(slf: Bound, indices: PyArrayRef) -> Result { let slf = PyArrayRef::extract(slf.as_any().as_borrowed())?.into_inner(); if !indices.dtype().is_int() { return Err(PyValueError::new_err(format!( "indices: expected int or uint array, but found: {}", indices.dtype().python_repr() - ))); + )) + .into()); } let inner = take(&slf, &*indices)?; @@ -669,7 +678,7 @@ impl PyArray { .to_string()) } - fn serialize(slf: &Bound, ctx: &PyArrayContext) -> PyResult>> { + fn serialize(slf: &Bound, ctx: &PyArrayContext) -> Result>, PyVortexError> { // FIXME(ngates): do not copy to vec, use buffer protocol let array = PyArrayRef::extract(slf.as_any().as_borrowed())?; Ok(array diff --git a/vortex-python/src/arrays/py/array.rs b/vortex-python/src/arrays/py/array.rs index 3d471ec52de..ab65e3fece1 100644 --- a/vortex-python/src/arrays/py/array.rs +++ b/vortex-python/src/arrays/py/array.rs @@ -11,9 +11,9 @@ use pyo3::prelude::*; use vortex::array::stats::ArrayStats; use vortex::array::vtable::ArrayId; use vortex::dtype::DType; -use vortex::error::VortexError; use crate::arrays::py::PyPythonArray; +use crate::error::PyVortexError; /// Wrapper struct encapsulating a Vortex array implemented using a Python object. /// @@ -48,7 +48,7 @@ impl<'py> FromPyObject<'_, 'py> for PythonArray { impl<'py> IntoPyObject<'py> for PythonArray { type Target = PyAny; type Output = Bound<'py, PyAny>; - type Error = VortexError; + type Error = PyVortexError; fn into_pyobject(self, py: Python<'py>) -> Result { Ok(self.object.bind(py).to_owned()) diff --git a/vortex-python/src/arrays/py/mod.rs b/vortex-python/src/arrays/py/mod.rs index de02a866c2d..0b2f88dd20b 100644 --- a/vortex-python/src/arrays/py/mod.rs +++ b/vortex-python/src/arrays/py/mod.rs @@ -12,11 +12,12 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; pub(crate) use python::*; use vortex::array::vtable::ArrayId; -use vortex::error::VortexResult; pub(crate) use vtable::*; +use crate::error::PyVortexError; + /// Extract the array id from a Python class `id` attribute. -pub fn id_from_obj(cls: &Bound) -> VortexResult { +pub fn id_from_obj(cls: &Bound) -> Result { Ok(ArrayId::new_arc( cls.getattr("id") .map_err(|_| { diff --git a/vortex-python/src/arrays/py/python.rs b/vortex-python/src/arrays/py/python.rs index e5e5c5c1859..c8160255d66 100644 --- a/vortex-python/src/arrays/py/python.rs +++ b/vortex-python/src/arrays/py/python.rs @@ -10,6 +10,7 @@ use vortex::dtype::DType; use crate::arrays::PyArray; use crate::arrays::py::id_from_obj; use crate::dtype::PyDType; +use crate::error::PyVortexError; /// Base class for implementing a Vortex encoding in Python. /// @@ -31,7 +32,7 @@ impl PyPythonArray { cls: &Bound<'_, PyType>, len: usize, dtype: PyDType, - ) -> PyResult> { + ) -> Result, PyVortexError> { let id = id_from_obj(cls)?; Ok(PyClassInitializer::from(PyArray).add_subclass(Self { id, diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 7469f244f68..db10203c4c9 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -65,13 +65,17 @@ impl VTable for PythonVTable { fn metadata(array: &PythonArray) -> VortexResult { Python::attach(|py| { let obj = array.object.bind(py); - if !obj.hasattr(intern!(py, "metadata"))? { + if !obj + .hasattr(intern!(py, "metadata")) + .map_err(|e| vortex_err!("{}", e))? + { // The class does not have a metadata attribute so does not support serialization. return Ok(RawMetadata(vec![])); } let bytes = obj - .call_method("__vx_metadata__", (), None)? + .call_method("__vx_metadata__", (), None) + .map_err(|e| vortex_err!("{}", e))? .cast::() .map_err(|_| vortex_err!("Expected array metadata to be Python bytes"))? .as_bytes() diff --git a/vortex-python/src/compress.rs b/vortex-python/src/compress.rs index 093fdba7512..5d2c3bd93c9 100644 --- a/vortex-python/src/compress.rs +++ b/vortex-python/src/compress.rs @@ -5,6 +5,7 @@ use pyo3::prelude::*; use vortex::compressor::BtrBlocksCompressor; use crate::arrays::PyArrayRef; +use crate::error::PyVortexError; use crate::install_module; pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { @@ -49,7 +50,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// >>> str(vx.compress(a)) /// 'vortex.alp(f64?, len=1000)' #[pyfunction] -pub fn compress(array: PyArrayRef) -> PyResult { +pub fn compress(array: PyArrayRef) -> Result { let compressed = BtrBlocksCompressor::default().compress(array.inner())?; Ok(PyArrayRef::from(compressed)) } diff --git a/vortex-python/src/dataset.rs b/vortex-python/src/dataset.rs index b959d3b5362..b8f769974b8 100644 --- a/vortex-python/src/dataset.rs +++ b/vortex-python/src/dataset.rs @@ -29,6 +29,7 @@ use crate::TOKIO_RUNTIME; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::arrow::ToPyArrow; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::object_store_urls::object_store_from_url; @@ -133,7 +134,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, - ) -> PyResult { + ) -> Result { let array = read_array_from_reader( &self.vxf, projection_from_python(columns)?, @@ -151,7 +152,7 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> PyResult> { + ) -> Result, PyVortexError> { let mut scan = self_ .vxf .scan()? @@ -167,7 +168,7 @@ impl PyVortexDataset { let reader: Box = Box::new(scan.into_record_batch_reader(schema, &*RUNTIME)?); - reader.into_pyarrow(self_.py()) + Ok(reader.into_pyarrow(self_.py())?) } /// The number of rows matching the filter. @@ -177,13 +178,15 @@ impl PyVortexDataset { row_filter: Option<&Bound<'_, PyExpr>>, split_by: Option, row_range: Option<(u64, u64)>, - ) -> PyResult { + ) -> Result { if row_filter.is_none() { let row_count = match row_range { Some(range) => range.1 - range.0, None => self_.vxf.row_count(), }; - return row_count.try_into().map_err(PyValueError::new_err); + return row_count + .try_into() + .map_err(|e| PyValueError::new_err(e).into()); } let mut scan = self_ @@ -200,15 +203,14 @@ impl PyVortexDataset { let n_rows: usize = scan .into_array_iter(&*RUNTIME)? .map_ok(|array| array.len()) - .process_results(|iter| iter.sum()) - .map_err(|err| PyValueError::new_err(format!("vortex error: {}", err)))?; + .process_results(|iter| iter.sum())?; Ok(n_rows) } /// The natural splits of this Dataset. #[pyo3(signature = (*))] - pub fn splits(&self) -> VortexResult> { + pub fn splits(&self) -> Result, PyVortexError> { Ok(self .vxf .splits()? @@ -219,6 +221,6 @@ impl PyVortexDataset { } #[pyfunction] -pub fn dataset_from_url(py: Python, url: &str) -> PyResult { +pub fn dataset_from_url(py: Python, url: &str) -> Result { Ok(py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?) } diff --git a/vortex-python/src/dtype/factory.rs b/vortex-python/src/dtype/factory.rs index 07200ad720a..200ac642c4a 100644 --- a/vortex-python/src/dtype/factory.rs +++ b/vortex-python/src/dtype/factory.rs @@ -21,6 +21,7 @@ use vortex::dtype::PType; use vortex::dtype::StructFields; use crate::dtype::PyDType; +use crate::error::PyVortexError; /// Construct the data type for a column containing only the null value. /// @@ -260,9 +261,9 @@ pub(super) fn dtype_decimal( precision: u8, scale: i8, nullable: bool, -) -> PyResult> { +) -> Result, PyVortexError> { let decimal_type = DType::Decimal(DecimalDType::try_new(precision, scale)?, nullable.into()); - PyDType::init(py, decimal_type) + Ok(PyDType::init(py, decimal_type)?) } /// Construct a UTF-8-encoded string data type. diff --git a/vortex-python/src/dtype/mod.rs b/vortex-python/src/dtype/mod.rs index 405d08a9280..1d3e42f63b0 100644 --- a/vortex-python/src/dtype/mod.rs +++ b/vortex-python/src/dtype/mod.rs @@ -47,6 +47,7 @@ use crate::dtype::null::PyNullDType; use crate::dtype::primitive::PyPrimitiveDType; use crate::dtype::struct_::PyStructDType; use crate::dtype::utf8::PyUtf8DType; +use crate::error::PyVortexError; use crate::install_module; use crate::python_repr::PythonRepr; @@ -153,12 +154,12 @@ impl PyDType { #[pymethods] impl PyDType { - fn to_arrow_type(&self, py: Python) -> PyResult> { - self.0.to_arrow_dtype()?.to_pyarrow(py) + fn to_arrow_type(&self, py: Python) -> Result, PyVortexError> { + Ok(self.0.to_arrow_dtype()?.to_pyarrow(py)?) } - fn to_arrow_schema(&self, py: Python) -> PyResult> { - self.0.to_arrow_schema()?.to_pyarrow(py) + fn to_arrow_schema(&self, py: Python) -> Result, PyVortexError> { + Ok(self.0.to_arrow_schema()?.to_pyarrow(py)?) } fn __str__(&self) -> String { diff --git a/vortex-python/src/error.rs b/vortex-python/src/error.rs new file mode 100644 index 00000000000..ff283472e8f --- /dev/null +++ b/vortex-python/src/error.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use pyo3::CastError; +use pyo3::PyErr; +use pyo3::exceptions::PyRuntimeError; +use vortex::error::VortexError; + +/// Error type to merge [`VortexError`] and [`PyErr`]. +pub enum PyVortexError { + Py(PyErr), + Vortex(VortexError), +} + +impl From for PyVortexError { + fn from(value: PyErr) -> Self { + Self::Py(value) + } +} + +impl From for PyVortexError { + fn from(value: VortexError) -> Self { + Self::Vortex(value) + } +} + +impl<'py, 'a> From> for PyVortexError { + fn from(value: CastError<'py, 'a>) -> Self { + Self::Py(value.into()) + } +} + +impl From for PyErr { + fn from(value: PyVortexError) -> Self { + match value { + PyVortexError::Py(py) => py, + PyVortexError::Vortex(vx) => PyRuntimeError::new_err(vx.to_string()), + } + } +} diff --git a/vortex-python/src/expr/mod.rs b/vortex-python/src/expr/mod.rs index 3d74c005361..cf47f260844 100644 --- a/vortex-python/src/expr/mod.rs +++ b/vortex-python/src/expr/mod.rs @@ -22,6 +22,7 @@ use vortex::expr::not; use crate::arrays::PyArrayRef; use crate::arrays::into_array::PyIntoArray; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::scalar::factory::scalar_helper; @@ -213,7 +214,7 @@ impl PyExpr { /// vortex.open : Open an on-disk Vortex array for scanning with an expression. /// vortex.VortexFile : An on-disk Vortex array ready to scan with an expression. /// vortex.VortexFile.scan : Scan an on-disk Vortex array with an expression. - fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> PyResult { + fn evaluate(self_: PyRef<'_, Self>, array: PyIntoArray) -> Result { Ok(PyArrayRef::from(self_.evaluate(array.inner())?)) } } diff --git a/vortex-python/src/file.rs b/vortex-python/src/file.rs index 1b3f1f94eb4..3cefde9f0d0 100644 --- a/vortex-python/src/file.rs +++ b/vortex-python/src/file.rs @@ -31,6 +31,7 @@ use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dataset::PyVortexDataset; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -49,7 +50,11 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { #[pyfunction] #[pyo3(signature = (path, *, without_segment_cache = false))] -pub fn open(py: Python, path: &str, without_segment_cache: bool) -> PyResult { +pub fn open( + py: Python, + path: &str, + without_segment_cache: bool, +) -> Result { let vxf = py.detach(|| { TOKIO_RUNTIME.block_on(async move { let mut options = SESSION.open_options(); @@ -89,7 +94,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> PyResult { + ) -> Result { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -109,7 +114,7 @@ impl PyVortexFile { expr: Option, indices: Option, batch_size: Option, - ) -> PyResult { + ) -> Result { let builder = slf.get().scan_builder( projection.map(|p| p.0), expr.map(|e| e.into_inner()), @@ -131,7 +136,7 @@ impl PyVortexFile { projection: Option, expr: Option, batch_size: Option, - ) -> PyResult> { + ) -> Result, PyVortexError> { let vxf = slf.get().vxf.clone(); let reader = slf.py().detach(|| { @@ -149,15 +154,15 @@ impl PyVortexFile { })?; let rbr: Box = Box::new(reader); - rbr.into_pyarrow(slf.py()) + Ok(rbr.into_pyarrow(slf.py())?) } - fn to_dataset(slf: Bound) -> PyResult { + fn to_dataset(slf: Bound) -> Result { Ok(PyVortexDataset::try_new(slf.get().vxf.clone())?) } #[pyo3(signature = (*))] - pub fn splits(&self) -> VortexResult> { + pub fn splits(&self) -> Result, PyVortexError> { Ok(self .vxf .splits()? diff --git a/vortex-python/src/io.rs b/vortex-python/src/io.rs index 4b70ff366d6..4f42504cb0b 100644 --- a/vortex-python/src/io.rs +++ b/vortex-python/src/io.rs @@ -29,6 +29,7 @@ use crate::arrays::PyArray; use crate::arrays::PyArrayRef; use crate::arrow::FromPyArrow; use crate::dataset::PyVortexDataset; +use crate::error::PyVortexError; use crate::expr::PyExpr; use crate::install_module; use crate::iter::PyArrayIterator; @@ -111,7 +112,7 @@ pub fn read_url<'py>( row_filter: Option<&Bound<'py, PyExpr>>, indices: Option, row_range: Option<(u64, u64)>, -) -> PyResult { +) -> Result { let dataset = py.detach(|| TOKIO_RUNTIME.block_on(PyVortexDataset::from_url(url)))?; dataset.to_array(projection, row_filter, indices, row_range) } @@ -168,7 +169,7 @@ pub fn read_url<'py>( /// :func:`vortex.io.VortexWriteOptions` #[pyfunction] #[pyo3(signature = (iter, path))] -pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> PyResult<()> { +pub fn write(py: Python, iter: PyIntoArrayIterator, path: &str) -> Result<(), PyVortexError> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; @@ -282,7 +283,12 @@ impl PyVortexWriteOptions { /// /// :func:`vortex.io.write` #[pyo3(signature = (iter, path))] - pub fn write_path(&self, py: Python, iter: PyIntoArrayIterator, path: &str) -> PyResult<()> { + pub fn write_path( + &self, + py: Python, + iter: PyIntoArrayIterator, + path: &str, + ) -> Result<(), PyVortexError> { py.detach(|| { TOKIO_RUNTIME.block_on(async move { let file = File::create(path).await?; diff --git a/vortex-python/src/iter/mod.rs b/vortex-python/src/iter/mod.rs index 77db4badf58..23a108114e6 100644 --- a/vortex-python/src/iter/mod.rs +++ b/vortex-python/src/iter/mod.rs @@ -29,6 +29,7 @@ use vortex::dtype::DType; use crate::arrays::PyArrayRef; use crate::arrow::IntoPyArrow; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::install_module; use crate::iter::python::PythonArrayIterator; @@ -81,7 +82,7 @@ impl PyArrayIterator { } /// Returns the next chunk from the iterator. - fn __next__(&self, py: Python) -> PyResult> { + fn __next__(&self, py: Python) -> Result, PyVortexError> { py.detach(|| { Ok(self .iter @@ -95,7 +96,7 @@ impl PyArrayIterator { /// Read all chunks into a single :class:`vortex.Array`. If there are multiple chunks, /// this will be a :class:`vortex.ChunkedArray`, otherwise it will be a single array. - fn read_all(&self, py: Python) -> PyResult { + fn read_all(&self, py: Python) -> Result { let array = py.detach(|| { if let Some(iter) = self.iter.lock().take() { iter.read_all() @@ -110,7 +111,7 @@ impl PyArrayIterator { /// Convert the :class:`vortex.ArrayIterator` into a :class:`pyarrow.RecordBatchReader`. /// /// Note that this performs the conversion on the current thread. - fn to_arrow(slf: Bound) -> PyResult> { + fn to_arrow(slf: Bound) -> Result, PyVortexError> { let schema = Arc::new(slf.get().dtype().to_arrow_schema()?); let data_type = DataType::Struct(schema.fields().clone()); @@ -132,7 +133,7 @@ impl PyArrayIterator { schema, )); - record_batch_reader.into_pyarrow(slf.py()) + Ok(record_batch_reader.into_pyarrow(slf.py())?) } /// Create a :class:`vortex.ArrayIterator` from an iterator of :class:`vortex.Array`. diff --git a/vortex-python/src/iter/python.rs b/vortex-python/src/iter/python.rs index 194bb2c7a5b..2c946f70367 100644 --- a/vortex-python/src/iter/python.rs +++ b/vortex-python/src/iter/python.rs @@ -9,6 +9,7 @@ use vortex::array::ArrayRef; use vortex::array::iter::ArrayIterator; use vortex::dtype::DType; use vortex::error::VortexResult; +use vortex::error::vortex_err; use crate::arrays::PyArrayRef; @@ -51,7 +52,7 @@ impl Iterator for PythonArrayIterator { Ok(array) } }) - .map_err(|pyerr| pyerr.into()) + .map_err(|pyerr| vortex_err!("{}", pyerr)) }) }) } diff --git a/vortex-python/src/lib.rs b/vortex-python/src/lib.rs index 48f0f59f083..e47814be016 100644 --- a/vortex-python/src/lib.rs +++ b/vortex-python/src/lib.rs @@ -12,6 +12,7 @@ mod arrow; mod compress; mod dataset; pub(crate) mod dtype; +mod error; mod expr; mod file; mod io; diff --git a/vortex-python/src/registry.rs b/vortex-python/src/registry.rs index 3e9edebfddd..d93fb79fa86 100644 --- a/vortex-python/src/registry.rs +++ b/vortex-python/src/registry.rs @@ -10,6 +10,7 @@ use vortex::array::session::ArraySessionExt; use crate::SESSION; use crate::arrays::py::PythonVTable; use crate::arrays::py::id_from_obj; +use crate::error::PyVortexError; use crate::install_module; /// Register serde functions and classes. @@ -27,7 +28,7 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// /// It's not currently possible to register a layout encoding from Python. #[pyfunction] -pub(crate) fn register(cls: &Bound) -> PyResult<()> { +pub(crate) fn register(cls: &Bound) -> Result<(), PyVortexError> { let id = id_from_obj(cls)?; // TODO(ngates): we would need to register the Python class object in a PyVortexSession // to call back into it during deserialize operations. diff --git a/vortex-python/src/scalar/factory.rs b/vortex-python/src/scalar/factory.rs index db7176fc0de..f590a56f0c4 100644 --- a/vortex-python/src/scalar/factory.rs +++ b/vortex-python/src/scalar/factory.rs @@ -21,6 +21,7 @@ use vortex::dtype::StructFields; use vortex::scalar::Scalar; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::scalar::PyScalar; use crate::scalar::bool; @@ -38,7 +39,10 @@ pub fn scalar<'py>( ) } -pub fn scalar_helper(value: &Bound<'_, PyAny>, dtype: Option<&DType>) -> PyResult { +pub fn scalar_helper( + value: &Bound<'_, PyAny>, + dtype: Option<&DType>, +) -> Result { let scalar = scalar_helper_inner(value, dtype)?; // If a dtype was provided, attempt to cast the scalar to that dtype. diff --git a/vortex-python/src/scalar/struct_.rs b/vortex-python/src/scalar/struct_.rs index 64924361753..0197de9d2f9 100644 --- a/vortex-python/src/scalar/struct_.rs +++ b/vortex-python/src/scalar/struct_.rs @@ -5,13 +5,13 @@ use pyo3::IntoPyObject; use pyo3::Py; use pyo3::PyAny; use pyo3::PyRef; -use pyo3::PyResult; use pyo3::pyclass; use pyo3::pymethods; use vortex::error::vortex_err; use vortex::scalar::StructScalar; use crate::PyVortex; +use crate::error::PyVortexError; use crate::scalar::AsScalarRef; use crate::scalar::PyScalar; use crate::scalar::ScalarSubclass; @@ -27,11 +27,13 @@ impl ScalarSubclass for PyStructScalar { #[pymethods] impl PyStructScalar { /// Return the child scalar with the given field name. - pub fn field(self_: PyRef<'_, Self>, name: &str) -> PyResult> { + pub fn field(self_: PyRef<'_, Self>, name: &str) -> Result, PyVortexError> { let scalar = self_.as_scalar_ref(); let child = scalar .field(name) .ok_or_else(|| vortex_err!("No field {name}"))?; - PyVortex(&child).into_pyobject(self_.py()).map(|v| v.into()) + Ok(PyVortex(&child) + .into_pyobject(self_.py()) + .map(|v| v.into())?) } } diff --git a/vortex-python/src/scan.rs b/vortex-python/src/scan.rs index 53427372da2..db302178312 100644 --- a/vortex-python/src/scan.rs +++ b/vortex-python/src/scan.rs @@ -8,6 +8,7 @@ use vortex::array::ArrayRef; use vortex::scan::RepeatedScan; use crate::RUNTIME; +use crate::error::PyVortexError; use crate::install_module; use crate::iter::PyArrayIterator; use crate::scalar::PyScalar; @@ -35,7 +36,7 @@ impl PyRepeatedScan { slf: Bound, start: Option, stop: Option, - ) -> PyResult { + ) -> Result { let row_count = slf.get().row_count; let row_range = match (start, stop) { (Some(start), Some(stop)) => Some(start..stop), @@ -49,13 +50,14 @@ impl PyRepeatedScan { ))) } - fn scalar_at(slf: Bound, index: u64) -> PyResult> { + fn scalar_at(slf: Bound, index: u64) -> Result, PyVortexError> { let row_count = slf.get().row_count; if index >= row_count { return Err(PyIndexError::new_err(format!( "Index out of bounds: {} >= {}", index, row_count - ))); + )) + .into()); } for batch in slf @@ -68,12 +70,9 @@ impl PyRepeatedScan { continue; } let scalar = array.scalar_at(0); - return PyScalar::init(slf.py(), scalar); + return Ok(PyScalar::init(slf.py(), scalar)?); } - Err(PyIndexError::new_err(format!( - "Index {} not found in the scan", - index - ))) + Err(PyIndexError::new_err(format!("Index {} not found in the scan", index)).into()) } } diff --git a/vortex-python/src/serde/mod.rs b/vortex-python/src/serde/mod.rs index 32d697e6f4b..51017a31ad0 100644 --- a/vortex-python/src/serde/mod.rs +++ b/vortex-python/src/serde/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod parts; use bytes::Bytes; use pyo3::Bound; use pyo3::Python; +use pyo3::buffer::PyBuffer; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::array::session::ArraySessionExt; @@ -16,6 +17,7 @@ use vortex::ipc::messages::PollRead; use crate::SESSION; use crate::arrays::PyArrayRef; +use crate::error::PyVortexError; use crate::install_module; use crate::serde::context::PyArrayContext; use crate::serde::parts::PyArrayParts; @@ -51,17 +53,20 @@ pub(crate) fn init(py: Python, parent: &Bound) -> PyResult<()> { /// Array /// The decoded Vortex array #[pyfunction] -fn decode_ipc_array(array_bytes: Vec, dtype_bytes: Vec) -> PyResult { +fn decode_ipc_array( + array_bytes: Vec, + dtype_bytes: Vec, +) -> Result { let mut decoder = MessageDecoder::default(); let mut dtype_buf = Bytes::from(dtype_bytes); let dtype = match decoder.read_next(&mut dtype_buf)? { PollRead::Some(DecoderMessage::DType(dtype)) => dtype, PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected DType message")); + return Err(PyValueError::new_err("Expected DType message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete DType message")); + return Err(PyValueError::new_err("Incomplete DType message").into()); } }; @@ -71,10 +76,10 @@ fn decode_ipc_array(array_bytes: Vec, dtype_bytes: Vec) -> PyResult { - return Err(PyValueError::new_err("Expected Array message")); + return Err(PyValueError::new_err("Expected Array message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete Array message")); + return Err(PyValueError::new_err("Incomplete Array message").into()); } }; @@ -102,9 +107,7 @@ fn decode_ipc_array_buffers<'py>( py: Python<'py>, array_buffers: Vec>, dtype_buffers: Vec>, -) -> PyResult { - use pyo3::buffer::PyBuffer; - +) -> Result { let mut decoder = MessageDecoder::default(); // Concatenate dtype buffers @@ -125,10 +128,10 @@ fn decode_ipc_array_buffers<'py>( let dtype = match decoder.read_next(&mut dtype_buf)? { PollRead::Some(DecoderMessage::DType(dtype)) => dtype, PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected DType message")); + return Err(PyValueError::new_err("Expected DType message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete DType message")); + return Err(PyValueError::new_err("Incomplete DType message").into()); } }; @@ -151,10 +154,10 @@ fn decode_ipc_array_buffers<'py>( parts.decode(&dtype, row_count, &ctx, SESSION.arrays().registry())? } PollRead::Some(_) => { - return Err(PyValueError::new_err("Expected Array message")); + return Err(PyValueError::new_err("Expected Array message").into()); } PollRead::NeedMore(_) => { - return Err(PyValueError::new_err("Incomplete Array message")); + return Err(PyValueError::new_err("Incomplete Array message").into()); } }; diff --git a/vortex-python/src/serde/parts.rs b/vortex-python/src/serde/parts.rs index 900ec95ff02..0b62b4b14e7 100644 --- a/vortex-python/src/serde/parts.rs +++ b/vortex-python/src/serde/parts.rs @@ -6,7 +6,6 @@ use std::ops::Deref; use pyo3::Bound; use pyo3::PyAny; use pyo3::PyRef; -use pyo3::PyResult; use pyo3::Python; use pyo3::prelude::PyAnyMethods; use pyo3::pyclass; @@ -18,6 +17,7 @@ use vortex::buffer::ByteBuffer; use crate::SESSION; use crate::arrays::PyArrayRef; use crate::dtype::PyDType; +use crate::error::PyVortexError; use crate::serde::context::PyArrayContext; /// ArrayParts is a parsed representation of a serialized array. @@ -44,7 +44,7 @@ impl From for PyArrayParts { impl PyArrayParts { /// Parse a serialized array into its parts. #[staticmethod] - fn parse(data: &[u8]) -> PyResult { + fn parse(data: &[u8]) -> Result { // TODO(ngates): create a buffer from a slice of bytes? let buffer = ByteBuffer::copy_from(data); Ok(PyArrayParts(ArrayParts::try_from(buffer)?)) @@ -55,7 +55,12 @@ impl PyArrayParts { /// # Returns /// /// The decoded array. - fn decode(&self, ctx: &PyArrayContext, dtype: PyDType, len: usize) -> PyResult { + fn decode( + &self, + ctx: &PyArrayContext, + dtype: PyDType, + len: usize, + ) -> Result { Ok(PyArrayRef::from(self.0.decode( dtype.inner(), len, @@ -79,7 +84,10 @@ impl PyArrayParts { /// Return the buffers of the array, currently as :class:`pyarrow.Buffer`. // TODO(ngates): ideally we'd use the buffer protocol, but that requires the 3.11 ABI. #[getter] - fn buffers<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult>> { + fn buffers<'py>( + slf: PyRef<'py, Self>, + py: Python<'py>, + ) -> Result>, PyVortexError> { if slf.nbuffers() == 0 { return Ok(Vec::new()); } diff --git a/vortex/Cargo.toml b/vortex/Cargo.toml index 31f6daf0932..20fff1917f8 100644 --- a/vortex/Cargo.toml +++ b/vortex/Cargo.toml @@ -72,7 +72,6 @@ default = ["files", "zstd"] files = ["dep:vortex-file"] memmap2 = ["vortex-buffer/memmap2"] object_store = ["vortex-file/object_store", "vortex-io/object_store"] -python = ["vortex-error/python"] tokio = ["vortex-file/tokio"] zstd = ["dep:vortex-zstd", "vortex-file/zstd", "vortex-layout/zstd"] pretty = ["vortex-array/table-display"]