From 1510913f5a0b403b69c9dc76ac8b32da41a30ae8 Mon Sep 17 00:00:00 2001 From: Anders Bogsnes Date: Fri, 18 Jul 2025 17:21:49 +0200 Subject: [PATCH 1/2] Add import check for optional dependency on pyiceberg_core Added a NotInstalledException check when pyiceberg_core is imported for pyarrow_transforms This will raise a helpful error message when endusers try to use methods that depend on pyiceberg_core but haven't installed the optional dependency --- pyiceberg/transforms.py | 34 +++++++++++++++++++++++----------- tests/test_transforms.py | 14 ++++++++++++++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 4c461e9ed7..89851c03ef 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -17,7 +17,9 @@ import base64 import datetime as py_datetime +import importlib import struct +import types from abc import ABC, abstractmethod from enum import IntEnum from functools import singledispatch @@ -28,6 +30,7 @@ import mmh3 from pydantic import Field, PositiveInt, PrivateAttr +from pyiceberg.exceptions import NotInstalledError from pyiceberg.expressions import ( BoundEqualTo, BoundGreaterThan, @@ -106,6 +109,17 @@ TRUNCATE_PARSER = ParseNumberFromBrackets(TRUNCATE) +def _try_import(module_name: str, extras_name: str | None = None) -> types.ModuleType: + try: + return importlib.import_module(module_name) + except ImportError: + if extras_name: + msg = f"{module_name} needs to be installed. `pip install pyiceberg[{extras_name}]`" + else: + msg = f"{module_name} needs to be installed." + raise NotInstalledError(msg) from None + + def _transform_literal(func: Callable[[L], L], lit: Literal[L]) -> Literal[L]: """Small helper to upwrap the value from the literal, and wrap it again.""" return literal(func(lit.value)) @@ -382,8 +396,7 @@ def __repr__(self) -> str: return f"BucketTransform(num_buckets={self._num_buckets})" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - from pyiceberg_core import transform as pyiceberg_core_transform - + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.bucket, self._num_buckets) @property @@ -509,9 +522,8 @@ def __repr__(self) -> str: return "YearTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - import pyarrow as pa - from pyiceberg_core import transform as pyiceberg_core_transform - + pa = _try_import("pyarrow") + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.year, expected_type=pa.int32()) @@ -570,8 +582,8 @@ def __repr__(self) -> str: return "MonthTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - import pyarrow as pa - from pyiceberg_core import transform as pyiceberg_core_transform + pa = _try_import("pyarrow") + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.month, expected_type=pa.int32()) @@ -639,8 +651,8 @@ def __repr__(self) -> str: return "DayTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - import pyarrow as pa - from pyiceberg_core import transform as pyiceberg_core_transform + pa = _try_import("pyarrow") + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.day, expected_type=pa.int32()) @@ -692,7 +704,7 @@ def __repr__(self) -> str: return "HourTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - from pyiceberg_core import transform as pyiceberg_core_transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.hour) @@ -915,7 +927,7 @@ def __repr__(self) -> str: return f"TruncateTransform(width={self._width})" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - from pyiceberg_core import transform as pyiceberg_core_transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.truncate, self._width) diff --git a/tests/test_transforms.py b/tests/test_transforms.py index f8d3ac9d10..7a7d4a6d8e 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -30,7 +30,9 @@ RootModel, WithJsonSchema, ) +from pytest_mock import MockFixture +from pyiceberg.exceptions import NotInstalledError from pyiceberg.expressions import ( AlwaysFalse, BooleanExpression, @@ -1668,3 +1670,15 @@ def test_truncate_pyarrow_transforms( ) -> None: transform: Transform[Any, Any] = TruncateTransform(width=width) assert expected == transform.pyarrow_transform(source_type)(input_arr) + + +@pytest.mark.parametrize( + "transform", [BucketTransform(num_buckets=5), TruncateTransform(width=5), YearTransform(), MonthTransform(), DayTransform()] +) +def test_calling_pyarrow_transform_without_pyiceberg_core_installed_correctly_raises_not_imported_error( + transform, mocker: MockFixture +) -> None: + mocker.patch.dict("sys.modules", {"pyiceberg_core": None}) + + with pytest.raises(NotInstalledError): + transform.pyarrow_transform(StringType()) From 2b82d9f4308ecebc3983261e481db94c2aa8dec9 Mon Sep 17 00:00:00 2001 From: Anders Bogsnes Date: Fri, 18 Jul 2025 23:25:29 +0200 Subject: [PATCH 2/2] Implemented review comments --- pyiceberg/transforms.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 89851c03ef..3f5a8d8998 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -109,12 +109,12 @@ TRUNCATE_PARSER = ParseNumberFromBrackets(TRUNCATE) -def _try_import(module_name: str, extras_name: str | None = None) -> types.ModuleType: +def _try_import(module_name: str, extras_name: Optional[str] = None) -> types.ModuleType: try: return importlib.import_module(module_name) except ImportError: if extras_name: - msg = f"{module_name} needs to be installed. `pip install pyiceberg[{extras_name}]`" + msg = f'{module_name} needs to be installed. pip install "pyiceberg[{extras_name}]"' else: msg = f"{module_name} needs to be installed." raise NotInstalledError(msg) from None @@ -396,7 +396,7 @@ def __repr__(self) -> str: return f"BucketTransform(num_buckets={self._num_buckets})" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.bucket, self._num_buckets) @property @@ -523,7 +523,7 @@ def __repr__(self) -> str: def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": pa = _try_import("pyarrow") - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.year, expected_type=pa.int32()) @@ -583,7 +583,7 @@ def __repr__(self) -> str: def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": pa = _try_import("pyarrow") - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.month, expected_type=pa.int32()) @@ -651,8 +651,8 @@ def __repr__(self) -> str: return "DayTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - pa = _try_import("pyarrow") - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pa = _try_import("pyarrow", extras_name="pyarrow") + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.day, expected_type=pa.int32()) @@ -704,7 +704,7 @@ def __repr__(self) -> str: return "HourTransform()" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.hour) @@ -927,7 +927,7 @@ def __repr__(self) -> str: return f"TruncateTransform(width={self._width})" def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": - pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg_core").transform + pyiceberg_core_transform = _try_import("pyiceberg_core", extras_name="pyiceberg-core").transform return _pyiceberg_transform_wrapper(pyiceberg_core_transform.truncate, self._width)