Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyiceberg/table/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
entries = []
snapshot = self._get_snapshot(snapshot_id)
for manifest in snapshot.manifests(self.tbl.io):
for entry in manifest.fetch_manifest_entry(io=self.tbl.io):
for entry in manifest.fetch_manifest_entry(io=self.tbl.io, discard_deleted=False):
column_sizes = entry.data_file.column_sizes or {}
value_counts = entry.data_file.value_counts or {}
null_value_counts = entry.data_file.null_value_counts or {}
Expand Down
4 changes: 4 additions & 0 deletions tests/integration/test_inspect_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def test_inspect_entries(

# Write some data
tbl.append(arrow_table_with_null)
# Generate a DELETE entry
tbl.overwrite(arrow_table_with_null)

def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None:
assert df.column_names == [
Expand All @@ -185,6 +187,8 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non

lhs = df.to_pandas()
rhs = spark_df.toPandas()
assert len(lhs) == len(rhs)

for column in df.column_names:
for left, right in zip(lhs[column].to_list(), rhs[column].to_list()):
if column == "data_file":
Expand Down