From 2c52bf9274facf04c9b21905dd3bb364789fcced Mon Sep 17 00:00:00 2001 From: JE-Chen <33644111+JE-Chen@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:09:34 +0800 Subject: [PATCH 1/6] Add ns TimeType Add ns TimeType - Remove ns warning - Add ns downcast --- pyiceberg/io/pyarrow.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 999813d0c2..a9b702fc26 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1068,20 +1068,13 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: return StringType() elif pa.types.is_date32(primitive): return DateType() - elif isinstance(primitive, pa.Time64Type) and primitive.unit == "us": + elif isinstance(primitive, pa.Time64Type) and primitive.unit in ["us", "ns"]: return TimeType() elif pa.types.is_timestamp(primitive): primitive = cast(pa.TimestampType, primitive) - if primitive.unit in ("s", "ms", "us"): + if primitive.unit in ("s", "ms", "us", "ns"): # Supported types, will be upcast automatically to 'us' pass - elif primitive.unit == "ns": - if self._downcast_ns_timestamp_to_us: - logger.warning("Iceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'.") - else: - raise TypeError( - "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." - ) else: raise TypeError(f"Unsupported precision for timestamp type: {primitive.unit}") From b3e83cf8ee693f4b1740969dce434f8e533524db Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Wed, 25 Sep 2024 21:17:03 +0800 Subject: [PATCH 2/6] Remove ns raise TypeError test, because it will no longer raise a TypeError Remove ns raise TypeError test, because it will no longer raise a TypeError E Failed: DID NOT RAISE --- tests/io/test_pyarrow_visitor.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index 9e6df720c6..28ec065bd7 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -187,13 +187,6 @@ def test_pyarrow_timestamp_to_iceberg(precision: str) -> None: def test_pyarrow_timestamp_invalid_units() -> None: pyarrow_type = pa.timestamp(unit="ns") - with pytest.raises( - TypeError, - match=re.escape( - "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." - ), - ): - visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_timestamp_tz_to_iceberg() -> None: @@ -209,19 +202,10 @@ def test_pyarrow_timestamp_tz_to_iceberg() -> None: def test_pyarrow_timestamp_tz_invalid_units() -> None: pyarrow_type = pa.timestamp(unit="ns", tz="UTC") - with pytest.raises( - TypeError, - match=re.escape( - "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." - ), - ): - visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_timestamp_tz_invalid_tz() -> None: pyarrow_type = pa.timestamp(unit="us", tz="US/Pacific") - with pytest.raises(TypeError, match=re.escape("Unsupported type: timestamp[us, tz=US/Pacific]")): - visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_string_to_iceberg() -> None: From ee3cd59154c857aba318e168be2a1d2eb10ab1d3 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Wed, 25 Sep 2024 23:41:45 +0800 Subject: [PATCH 3/6] Revert "Remove ns raise TypeError test, because it will no longer raise a TypeError" This reverts commit b3e83cf8ee693f4b1740969dce434f8e533524db. --- tests/io/test_pyarrow_visitor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index 28ec065bd7..9e6df720c6 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -187,6 +187,13 @@ def test_pyarrow_timestamp_to_iceberg(precision: str) -> None: def test_pyarrow_timestamp_invalid_units() -> None: pyarrow_type = pa.timestamp(unit="ns") + with pytest.raises( + TypeError, + match=re.escape( + "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." + ), + ): + visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_timestamp_tz_to_iceberg() -> None: @@ -202,10 +209,19 @@ def test_pyarrow_timestamp_tz_to_iceberg() -> None: def test_pyarrow_timestamp_tz_invalid_units() -> None: pyarrow_type = pa.timestamp(unit="ns", tz="UTC") + with pytest.raises( + TypeError, + match=re.escape( + "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." + ), + ): + visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_timestamp_tz_invalid_tz() -> None: pyarrow_type = pa.timestamp(unit="us", tz="US/Pacific") + with pytest.raises(TypeError, match=re.escape("Unsupported type: timestamp[us, tz=US/Pacific]")): + visit_pyarrow(pyarrow_type, _ConvertToIceberg()) def test_pyarrow_string_to_iceberg() -> None: From a031c0dc5a65527e131737fa2d309b38bfac4ad1 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Fri, 27 Sep 2024 02:13:04 +0800 Subject: [PATCH 4/6] Add downcast condition on Time64Type Add downcast condition on Time64Type --- pyiceberg/io/pyarrow.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index a9b702fc26..7781b96d0f 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1068,13 +1068,29 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: return StringType() elif pa.types.is_date32(primitive): return DateType() - elif isinstance(primitive, pa.Time64Type) and primitive.unit in ["us", "ns"]: - return TimeType() + elif isinstance(primitive, pa.Time64Type): + if primitive.unit =="us": + return TimeType() + elif primitive.unit == "ns": + if self._downcast_ns_timestamp_to_us: + logger.warning("Iceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'.") + else: + raise TypeError( + "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." + ) + primitive = cast(pa.Time64Type, primitive) elif pa.types.is_timestamp(primitive): primitive = cast(pa.TimestampType, primitive) - if primitive.unit in ("s", "ms", "us", "ns"): + if primitive.unit in ("s", "ms", "us"): # Supported types, will be upcast automatically to 'us' pass + elif primitive.unit == "ns": + if self._downcast_ns_timestamp_to_us: + logger.warning("Iceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'.") + else: + raise TypeError( + "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." + ) else: raise TypeError(f"Unsupported precision for timestamp type: {primitive.unit}") From 37c8198426321ad7938f34e89526cf5df63f3637 Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Tue, 1 Oct 2024 14:18:46 +0800 Subject: [PATCH 5/6] Return TImeType not raise error here Return TImeType not raise error here --- pyiceberg/io/pyarrow.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 7781b96d0f..473815a144 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1075,10 +1075,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: if self._downcast_ns_timestamp_to_us: logger.warning("Iceberg does not yet support 'ns' timestamp precision. Downcasting to 'us'.") else: - raise TypeError( - "Iceberg does not yet support 'ns' timestamp precision. Use 'downcast-ns-timestamp-to-us-on-write' configuration property to automatically downcast 'ns' to 'us' on write." - ) - primitive = cast(pa.Time64Type, primitive) + return TimeType() elif pa.types.is_timestamp(primitive): primitive = cast(pa.TimestampType, primitive) if primitive.unit in ("s", "ms", "us"): From 2c926387bf3b8ab6009fba2ff590be31b8525f25 Mon Sep 17 00:00:00 2001 From: JE-Chen <33644111+JE-Chen@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:57:32 +0800 Subject: [PATCH 6/6] ruff reformat ruff reformat --- pyiceberg/io/pyarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 473815a144..a8e3daf320 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1069,7 +1069,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: elif pa.types.is_date32(primitive): return DateType() elif isinstance(primitive, pa.Time64Type): - if primitive.unit =="us": + if primitive.unit == "us": return TimeType() elif primitive.unit == "ns": if self._downcast_ns_timestamp_to_us: