diff --git a/pyiceberg/table/inspect.py b/pyiceberg/table/inspect.py index 91bdb2f29d..09d1d7231a 100644 --- a/pyiceberg/table/inspect.py +++ b/pyiceberg/table/inspect.py @@ -161,7 +161,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: entries = [] snapshot = self._get_snapshot(snapshot_id) for manifest in snapshot.manifests(self.tbl.io): - for entry in manifest.fetch_manifest_entry(io=self.tbl.io): + for entry in manifest.fetch_manifest_entry(io=self.tbl.io, discard_deleted=False): column_sizes = entry.data_file.column_sizes or {} value_counts = entry.data_file.value_counts or {} null_value_counts = entry.data_file.null_value_counts or {} diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index 75fe92a69a..a2a5fe45bc 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -164,6 +164,8 @@ def test_inspect_entries( # Write some data tbl.append(arrow_table_with_null) + # Generate a DELETE entry + tbl.overwrite(arrow_table_with_null) def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: assert df.column_names == [ @@ -185,6 +187,8 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non lhs = df.to_pandas() rhs = spark_df.toPandas() + assert len(lhs) == len(rhs) + for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): if column == "data_file":