From 6a42c35c1afc6817ebde3d9b4f043cc4e5e84780 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Sat, 30 Mar 2024 19:24:35 -0400 Subject: [PATCH] minor fixes --- pyiceberg/table/__init__.py | 10 ++++++---- tests/integration/test_reads.py | 1 - tests/integration/test_writes.py | 1 - 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 55795db380..0f4e334d8d 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -1056,8 +1056,9 @@ def append(self, df: pa.Table) -> None: _check_schema_compatible(self.schema(), other_schema=df.schema) # cast if the two schemas are compatible but not equal - if schema_to_pyarrow(self.schema()) != df.schema: - df = df.cast(schema_to_pyarrow(self.schema())) + table_arrow_schema = schema_to_pyarrow(self.schema()) + if table_arrow_schema != df.schema: + df = df.cast(table_arrow_schema) merge = _MergingSnapshotProducer(operation=Operation.APPEND, table=self) @@ -1096,8 +1097,9 @@ def overwrite(self, df: pa.Table, overwrite_filter: BooleanExpression = ALWAYS_T _check_schema_compatible(self.schema(), other_schema=df.schema) # cast if the two schemas are compatible but not equal - if schema_to_pyarrow(self.schema()) != df.schema: - df = df.cast(schema_to_pyarrow(self.schema())) + table_arrow_schema = schema_to_pyarrow(self.schema()) + if table_arrow_schema != df.schema: + df = df.cast(table_arrow_schema) merge = _MergingSnapshotProducer( operation=Operation.OVERWRITE if self.current_snapshot() is not None else Operation.APPEND, diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index c03bc78a18..ebebe3d558 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -230,7 +230,6 @@ def test_ray_nan_rewritten(catalog: Catalog) -> None: def test_ray_not_nan_count(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan(row_filter=NotNaN("col_numeric"), selected_fields=("idx",)).to_ray() - print(ray_dataset.take()) assert ray_dataset.count() == 2 diff --git a/tests/integration/test_writes.py b/tests/integration/test_writes.py index b138b2c7e2..887a519437 100644 --- a/tests/integration/test_writes.py +++ b/tests/integration/test_writes.py @@ -481,7 +481,6 @@ def test_write_parquet_other_properties( properties: Dict[str, Any], expected_kwargs: Dict[str, Any], ) -> None: - print(type(mocker)) identifier = "default.test_write_parquet_other_properties" # The properties we test cannot be checked on the resulting Parquet file, so we spy on the ParquetWriter call instead