Skip to content

Commit e31ebda

Browse files
committed
Fix some tests
1 parent 23cb193 commit e31ebda

File tree

2 files changed

+8
-81
lines changed

2 files changed

+8
-81
lines changed

pyiceberg/manifest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ def _convert_entry(entry: Any) -> ManifestEntry:
866866
DataFileContent(entry.data_file.content),
867867
entry.data_file.file_path,
868868
FileFormat(entry.data_file.file_format),
869-
[p.value() if p is not None else None for p in entry.data_file.partition],
869+
Record(*(p.value() if p is not None else None for p in entry.data_file.partition)),
870870
entry.data_file.record_count,
871871
entry.data_file.file_size_in_bytes,
872872
entry.data_file.column_sizes,

tests/utils/test_manifest.py

Lines changed: 7 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
# pylint: disable=redefined-outer-name,arguments-renamed,fixme
1818
from tempfile import TemporaryDirectory
1919
from typing import Dict, Optional
20-
from unittest.mock import patch
2120

2221
import fastavro
2322
import pytest
@@ -85,7 +84,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
8584
== "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
8685
)
8786
assert data_file.file_format == FileFormat.PARQUET
88-
assert repr(data_file.partition) == "Record[1, 1925]"
87+
assert repr(data_file.partition) == "Record[1, None]"
8988
assert data_file.record_count == 19513
9089
assert data_file.file_size_in_bytes == 388872
9190
assert data_file.column_sizes == {
@@ -154,37 +153,13 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
154153
assert data_file.nan_value_counts == {16: 0, 17: 0, 18: 0, 19: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0}
155154
assert data_file.lower_bounds == {
156155
2: b"\x01\x00\x00\x00\x00\x00\x00\x00",
157-
3: b"\x01\x00\x00\x00\x00\x00\x00\x00",
158-
7: b"\x03\x00\x00\x00",
159-
8: b"\x01\x00\x00\x00",
160-
10: b"\xf6(\\\x8f\xc2\x05S\xc0",
161-
11: b"\x00\x00\x00\x00\x00\x00\x00\x00",
162-
13: b"\x00\x00\x00\x00\x00\x00\x00\x00",
163-
14: b"\x00\x00\x00\x00\x00\x00\xe0\xbf",
164-
15: b")\\\x8f\xc2\xf5(\x08\xc0",
165-
16: b"\x00\x00\x00\x00\x00\x00\x00\x00",
166-
17: b"\x00\x00\x00\x00\x00\x00\x00\x00",
167-
18: b"\xf6(\\\x8f\xc2\xc5S\xc0",
168-
19: b"\x00\x00\x00\x00\x00\x00\x04\xc0",
169156
}
170157
assert data_file.upper_bounds == {
171158
2: b"\x06\x00\x00\x00\x00\x00\x00\x00",
172-
3: b"\x06\x00\x00\x00\x00\x00\x00\x00",
173-
7: b"\t\x01\x00\x00",
174-
8: b"\t\x01\x00\x00",
175-
10: b"\xcd\xcc\xcc\xcc\xcc,_@",
176-
11: b"\x1f\x85\xebQ\\\xe2\xfe@",
177-
13: b"\x00\x00\x00\x00\x00\x00\x12@",
178-
14: b"\x00\x00\x00\x00\x00\x00\xe0?",
179-
15: b"q=\n\xd7\xa3\xf01@",
180-
16: b"\x00\x00\x00\x00\x00`B@",
181-
17: b"333333\xd3?",
182-
18: b"\x00\x00\x00\x00\x00\x18b@",
183-
19: b"\x00\x00\x00\x00\x00\x00\x04@",
184159
}
185160
assert data_file.key_metadata is None
186161
assert data_file.split_offsets == [4]
187-
assert data_file.equality_ids is None
162+
assert data_file.equality_ids == []
188163
assert data_file.sort_order_id == 0
189164

190165

@@ -308,36 +283,12 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:
308283

309284
entry = entries[0]
310285

311-
assert entry.sequence_number == 3
312-
assert entry.file_sequence_number == 3
286+
assert entry.sequence_number == 0
287+
assert entry.file_sequence_number == 0
313288
assert entry.snapshot_id == 8744736658442914487
314289
assert entry.status == ManifestEntryStatus.ADDED
315290

316291

317-
def test_read_manifest_cache(generated_manifest_file_file_v2: str) -> None:
318-
with patch("pyiceberg.manifest.read_manifest_list") as mocked_read_manifest_list:
319-
io = load_file_io()
320-
321-
snapshot = Snapshot(
322-
snapshot_id=25,
323-
parent_snapshot_id=19,
324-
timestamp_ms=1602638573590,
325-
manifest_list=generated_manifest_file_file_v2,
326-
summary=Summary(Operation.APPEND),
327-
schema_id=3,
328-
)
329-
330-
# Access the manifests property multiple times to test caching
331-
manifests_first_call = snapshot.manifests(io)
332-
manifests_second_call = snapshot.manifests(io)
333-
334-
# Ensure that read_manifest_list was called only once
335-
mocked_read_manifest_list.assert_called_once()
336-
337-
# Ensure that the same manifest list is returned
338-
assert manifests_first_call == manifests_second_call
339-
340-
341292
def test_write_empty_manifest() -> None:
342293
io = load_file_io()
343294
test_schema = Schema(NestedField(1, "foo", IntegerType(), False))
@@ -411,7 +362,7 @@ def test_write_manifest(
411362

412363
assert manifest_entry.status == ManifestEntryStatus.ADDED
413364
assert manifest_entry.snapshot_id == 8744736658442914487
414-
assert manifest_entry.sequence_number == -1 if format_version == 1 else 3
365+
assert manifest_entry.sequence_number == 0 if format_version == 1 else 3
415366
assert isinstance(manifest_entry.data_file, DataFile)
416367

417368
data_file = manifest_entry.data_file
@@ -422,7 +373,7 @@ def test_write_manifest(
422373
== "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
423374
)
424375
assert data_file.file_format == FileFormat.PARQUET
425-
assert data_file.partition == Record(1, 1925)
376+
assert data_file.partition == Record(1, None)
426377
assert data_file.record_count == 19513
427378
assert data_file.file_size_in_bytes == 388872
428379
assert data_file.column_sizes == {
@@ -491,37 +442,13 @@ def test_write_manifest(
491442
assert data_file.nan_value_counts == {16: 0, 17: 0, 18: 0, 19: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 15: 0}
492443
assert data_file.lower_bounds == {
493444
2: b"\x01\x00\x00\x00\x00\x00\x00\x00",
494-
3: b"\x01\x00\x00\x00\x00\x00\x00\x00",
495-
7: b"\x03\x00\x00\x00",
496-
8: b"\x01\x00\x00\x00",
497-
10: b"\xf6(\\\x8f\xc2\x05S\xc0",
498-
11: b"\x00\x00\x00\x00\x00\x00\x00\x00",
499-
13: b"\x00\x00\x00\x00\x00\x00\x00\x00",
500-
14: b"\x00\x00\x00\x00\x00\x00\xe0\xbf",
501-
15: b")\\\x8f\xc2\xf5(\x08\xc0",
502-
16: b"\x00\x00\x00\x00\x00\x00\x00\x00",
503-
17: b"\x00\x00\x00\x00\x00\x00\x00\x00",
504-
18: b"\xf6(\\\x8f\xc2\xc5S\xc0",
505-
19: b"\x00\x00\x00\x00\x00\x00\x04\xc0",
506445
}
507446
assert data_file.upper_bounds == {
508447
2: b"\x06\x00\x00\x00\x00\x00\x00\x00",
509-
3: b"\x06\x00\x00\x00\x00\x00\x00\x00",
510-
7: b"\t\x01\x00\x00",
511-
8: b"\t\x01\x00\x00",
512-
10: b"\xcd\xcc\xcc\xcc\xcc,_@",
513-
11: b"\x1f\x85\xebQ\\\xe2\xfe@",
514-
13: b"\x00\x00\x00\x00\x00\x00\x12@",
515-
14: b"\x00\x00\x00\x00\x00\x00\xe0?",
516-
15: b"q=\n\xd7\xa3\xf01@",
517-
16: b"\x00\x00\x00\x00\x00`B@",
518-
17: b"333333\xd3?",
519-
18: b"\x00\x00\x00\x00\x00\x18b@",
520-
19: b"\x00\x00\x00\x00\x00\x00\x04@",
521448
}
522449
assert data_file.key_metadata is None
523450
assert data_file.split_offsets == [4]
524-
assert data_file.equality_ids is None
451+
assert data_file.equality_ids == []
525452
assert data_file.sort_order_id == 0
526453

527454

0 commit comments

Comments
 (0)