1717# pylint: disable=redefined-outer-name,arguments-renamed,fixme
1818from tempfile import TemporaryDirectory
1919from typing import Dict , Optional
20- from unittest .mock import patch
2120
2221import fastavro
2322import pytest
@@ -85,7 +84,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
8584 == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
8685 )
8786 assert data_file .file_format == FileFormat .PARQUET
88- assert repr (data_file .partition ) == "Record[1, 1925 ]"
87+ assert repr (data_file .partition ) == "Record[1, None ]"
8988 assert data_file .record_count == 19513
9089 assert data_file .file_size_in_bytes == 388872
9190 assert data_file .column_sizes == {
@@ -154,37 +153,13 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
154153 assert data_file .nan_value_counts == {16 : 0 , 17 : 0 , 18 : 0 , 19 : 0 , 10 : 0 , 11 : 0 , 12 : 0 , 13 : 0 , 14 : 0 , 15 : 0 }
155154 assert data_file .lower_bounds == {
156155 2 : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
157- 3 : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
158- 7 : b"\x03 \x00 \x00 \x00 " ,
159- 8 : b"\x01 \x00 \x00 \x00 " ,
160- 10 : b"\xf6 (\\ \x8f \xc2 \x05 S\xc0 " ,
161- 11 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
162- 13 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
163- 14 : b"\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \xbf " ,
164- 15 : b")\\ \x8f \xc2 \xf5 (\x08 \xc0 " ,
165- 16 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
166- 17 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
167- 18 : b"\xf6 (\\ \x8f \xc2 \xc5 S\xc0 " ,
168- 19 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 \xc0 " ,
169156 }
170157 assert data_file .upper_bounds == {
171158 2 : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
172- 3 : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
173- 7 : b"\t \x01 \x00 \x00 " ,
174- 8 : b"\t \x01 \x00 \x00 " ,
175- 10 : b"\xcd \xcc \xcc \xcc \xcc ,_@" ,
176- 11 : b"\x1f \x85 \xeb Q\\ \xe2 \xfe @" ,
177- 13 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x12 @" ,
178- 14 : b"\x00 \x00 \x00 \x00 \x00 \x00 \xe0 ?" ,
179- 15 : b"q=\n \xd7 \xa3 \xf0 1@" ,
180- 16 : b"\x00 \x00 \x00 \x00 \x00 `B@" ,
181- 17 : b"333333\xd3 ?" ,
182- 18 : b"\x00 \x00 \x00 \x00 \x00 \x18 b@" ,
183- 19 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 @" ,
184159 }
185160 assert data_file .key_metadata is None
186161 assert data_file .split_offsets == [4 ]
187- assert data_file .equality_ids is None
162+ assert data_file .equality_ids == []
188163 assert data_file .sort_order_id == 0
189164
190165
@@ -308,36 +283,12 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:
308283
309284 entry = entries [0 ]
310285
311- assert entry .sequence_number == 3
312- assert entry .file_sequence_number == 3
286+ assert entry .sequence_number == 0
287+ assert entry .file_sequence_number == 0
313288 assert entry .snapshot_id == 8744736658442914487
314289 assert entry .status == ManifestEntryStatus .ADDED
315290
316291
317- def test_read_manifest_cache (generated_manifest_file_file_v2 : str ) -> None :
318- with patch ("pyiceberg.manifest.read_manifest_list" ) as mocked_read_manifest_list :
319- io = load_file_io ()
320-
321- snapshot = Snapshot (
322- snapshot_id = 25 ,
323- parent_snapshot_id = 19 ,
324- timestamp_ms = 1602638573590 ,
325- manifest_list = generated_manifest_file_file_v2 ,
326- summary = Summary (Operation .APPEND ),
327- schema_id = 3 ,
328- )
329-
330- # Access the manifests property multiple times to test caching
331- manifests_first_call = snapshot .manifests (io )
332- manifests_second_call = snapshot .manifests (io )
333-
334- # Ensure that read_manifest_list was called only once
335- mocked_read_manifest_list .assert_called_once ()
336-
337- # Ensure that the same manifest list is returned
338- assert manifests_first_call == manifests_second_call
339-
340-
341292def test_write_empty_manifest () -> None :
342293 io = load_file_io ()
343294 test_schema = Schema (NestedField (1 , "foo" , IntegerType (), False ))
@@ -411,7 +362,7 @@ def test_write_manifest(
411362
412363 assert manifest_entry .status == ManifestEntryStatus .ADDED
413364 assert manifest_entry .snapshot_id == 8744736658442914487
414- assert manifest_entry .sequence_number == - 1 if format_version == 1 else 3
365+ assert manifest_entry .sequence_number == 0 if format_version == 1 else 3
415366 assert isinstance (manifest_entry .data_file , DataFile )
416367
417368 data_file = manifest_entry .data_file
@@ -422,7 +373,7 @@ def test_write_manifest(
422373 == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"
423374 )
424375 assert data_file .file_format == FileFormat .PARQUET
425- assert data_file .partition == Record (1 , 1925 )
376+ assert data_file .partition == Record (1 , None )
426377 assert data_file .record_count == 19513
427378 assert data_file .file_size_in_bytes == 388872
428379 assert data_file .column_sizes == {
@@ -491,37 +442,13 @@ def test_write_manifest(
491442 assert data_file .nan_value_counts == {16 : 0 , 17 : 0 , 18 : 0 , 19 : 0 , 10 : 0 , 11 : 0 , 12 : 0 , 13 : 0 , 14 : 0 , 15 : 0 }
492443 assert data_file .lower_bounds == {
493444 2 : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
494- 3 : b"\x01 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
495- 7 : b"\x03 \x00 \x00 \x00 " ,
496- 8 : b"\x01 \x00 \x00 \x00 " ,
497- 10 : b"\xf6 (\\ \x8f \xc2 \x05 S\xc0 " ,
498- 11 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
499- 13 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
500- 14 : b"\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \xbf " ,
501- 15 : b")\\ \x8f \xc2 \xf5 (\x08 \xc0 " ,
502- 16 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
503- 17 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
504- 18 : b"\xf6 (\\ \x8f \xc2 \xc5 S\xc0 " ,
505- 19 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 \xc0 " ,
506445 }
507446 assert data_file .upper_bounds == {
508447 2 : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
509- 3 : b"\x06 \x00 \x00 \x00 \x00 \x00 \x00 \x00 " ,
510- 7 : b"\t \x01 \x00 \x00 " ,
511- 8 : b"\t \x01 \x00 \x00 " ,
512- 10 : b"\xcd \xcc \xcc \xcc \xcc ,_@" ,
513- 11 : b"\x1f \x85 \xeb Q\\ \xe2 \xfe @" ,
514- 13 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x12 @" ,
515- 14 : b"\x00 \x00 \x00 \x00 \x00 \x00 \xe0 ?" ,
516- 15 : b"q=\n \xd7 \xa3 \xf0 1@" ,
517- 16 : b"\x00 \x00 \x00 \x00 \x00 `B@" ,
518- 17 : b"333333\xd3 ?" ,
519- 18 : b"\x00 \x00 \x00 \x00 \x00 \x18 b@" ,
520- 19 : b"\x00 \x00 \x00 \x00 \x00 \x00 \x04 @" ,
521448 }
522449 assert data_file .key_metadata is None
523450 assert data_file .split_offsets == [4 ]
524- assert data_file .equality_ids is None
451+ assert data_file .equality_ids == []
525452 assert data_file .sort_order_id == 0
526453
527454
0 commit comments