3737from cachetools .keys import hashkey
3838from pydantic_core import to_json
3939
40+ from pyiceberg .avro .codecs import AVRO_CODEC_KEY , AvroCompressionCodec
4041from pyiceberg .avro .file import AvroFile , AvroOutputFile
4142from pyiceberg .conversions import to_bytes
4243from pyiceberg .exceptions import ValidationError
@@ -799,8 +800,16 @@ class ManifestWriter(ABC):
799800 _min_sequence_number : Optional [int ]
800801 _partitions : List [Record ]
801802 _reused_entry_wrapper : ManifestEntry
803+ _compression : AvroCompressionCodec
802804
803- def __init__ (self , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int ) -> None :
805+ def __init__ (
806+ self ,
807+ spec : PartitionSpec ,
808+ schema : Schema ,
809+ output_file : OutputFile ,
810+ snapshot_id : int ,
811+ avro_compression : AvroCompressionCodec ,
812+ ) -> None :
804813 self .closed = False
805814 self ._spec = spec
806815 self ._schema = schema
@@ -815,6 +824,11 @@ def __init__(self, spec: PartitionSpec, schema: Schema, output_file: OutputFile,
815824 self ._deleted_rows = 0
816825 self ._min_sequence_number = None
817826 self ._partitions = []
827+ < << << << Updated upstream
828+ == == == =
829+ self ._reused_entry_wrapper = ManifestEntry ()
830+ self ._compression = avro_compression
831+ >> >> >> > Stashed changes
818832
819833 def __enter__ (self ) -> ManifestWriter :
820834 """Open the writer."""
@@ -850,6 +864,7 @@ def _meta(self) -> Dict[str, str]:
850864 "partition-spec" : to_json (self ._spec .fields ).decode ("utf-8" ),
851865 "partition-spec-id" : str (self ._spec .spec_id ),
852866 "format-version" : str (self .version ),
867+ AVRO_CODEC_KEY : self ._compression ,
853868 }
854869
855870 def _with_partition (self , format_version : TableVersion ) -> Schema :
@@ -961,13 +976,15 @@ def existing(self, entry: ManifestEntry) -> ManifestWriter:
961976
962977
963978class ManifestWriterV1 (ManifestWriter ):
964- def __init__ (self , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int ):
965- super ().__init__ (
966- spec ,
967- schema ,
968- output_file ,
969- snapshot_id ,
970- )
979+ def __init__ (
980+ self ,
981+ spec : PartitionSpec ,
982+ schema : Schema ,
983+ output_file : OutputFile ,
984+ snapshot_id : int ,
985+ avro_compression : AvroCompressionCodec ,
986+ ):
987+ super ().__init__ (spec , schema , output_file , snapshot_id , avro_compression )
971988
972989 def content (self ) -> ManifestContent :
973990 return ManifestContent .DATA
@@ -981,8 +998,15 @@ def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
981998
982999
9831000class ManifestWriterV2 (ManifestWriter ):
984- def __init__ (self , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int ):
985- super ().__init__ (spec , schema , output_file , snapshot_id )
1001+ def __init__ (
1002+ self ,
1003+ spec : PartitionSpec ,
1004+ schema : Schema ,
1005+ output_file : OutputFile ,
1006+ snapshot_id : int ,
1007+ avro_compression : AvroCompressionCodec ,
1008+ ):
1009+ super ().__init__ (spec , schema , output_file , snapshot_id , avro_compression )
9861010
9871011 def content (self ) -> ManifestContent :
9881012 return ManifestContent .DATA
@@ -1008,12 +1032,17 @@ def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
10081032
10091033
10101034def write_manifest (
1011- format_version : TableVersion , spec : PartitionSpec , schema : Schema , output_file : OutputFile , snapshot_id : int
1035+ format_version : TableVersion ,
1036+ spec : PartitionSpec ,
1037+ schema : Schema ,
1038+ output_file : OutputFile ,
1039+ snapshot_id : int ,
1040+ avro_compression : AvroCompressionCodec ,
10121041) -> ManifestWriter :
10131042 if format_version == 1 :
1014- return ManifestWriterV1 (spec , schema , output_file , snapshot_id )
1043+ return ManifestWriterV1 (spec , schema , output_file , snapshot_id , avro_compression )
10151044 elif format_version == 2 :
1016- return ManifestWriterV2 (spec , schema , output_file , snapshot_id )
1045+ return ManifestWriterV2 (spec , schema , output_file , snapshot_id , avro_compression )
10171046 else :
10181047 raise ValueError (f"Cannot write manifest for table version: { format_version } " )
10191048
@@ -1063,14 +1092,21 @@ def add_manifests(self, manifest_files: List[ManifestFile]) -> ManifestListWrite
10631092
10641093
10651094class ManifestListWriterV1 (ManifestListWriter ):
1066- def __init__ (self , output_file : OutputFile , snapshot_id : int , parent_snapshot_id : Optional [int ]):
1095+ def __init__ (
1096+ self ,
1097+ output_file : OutputFile ,
1098+ snapshot_id : int ,
1099+ parent_snapshot_id : Optional [int ],
1100+ compression : AvroCompressionCodec ,
1101+ ):
10671102 super ().__init__ (
10681103 format_version = 1 ,
10691104 output_file = output_file ,
10701105 meta = {
10711106 "snapshot-id" : str (snapshot_id ),
10721107 "parent-snapshot-id" : str (parent_snapshot_id ) if parent_snapshot_id is not None else "null" ,
10731108 "format-version" : "1" ,
1109+ AVRO_CODEC_KEY : compression ,
10741110 },
10751111 )
10761112
@@ -1084,7 +1120,14 @@ class ManifestListWriterV2(ManifestListWriter):
10841120 _commit_snapshot_id : int
10851121 _sequence_number : int
10861122
1087- def __init__ (self , output_file : OutputFile , snapshot_id : int , parent_snapshot_id : Optional [int ], sequence_number : int ):
1123+ def __init__ (
1124+ self ,
1125+ output_file : OutputFile ,
1126+ snapshot_id : int ,
1127+ parent_snapshot_id : Optional [int ],
1128+ sequence_number : int ,
1129+ compression : AvroCompressionCodec ,
1130+ ):
10881131 super ().__init__ (
10891132 format_version = 2 ,
10901133 output_file = output_file ,
@@ -1093,6 +1136,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id
10931136 "parent-snapshot-id" : str (parent_snapshot_id ) if parent_snapshot_id is not None else "null" ,
10941137 "sequence-number" : str (sequence_number ),
10951138 "format-version" : "2" ,
1139+ AVRO_CODEC_KEY : compression ,
10961140 },
10971141 )
10981142 self ._commit_snapshot_id = snapshot_id
@@ -1127,12 +1171,13 @@ def write_manifest_list(
11271171 snapshot_id : int ,
11281172 parent_snapshot_id : Optional [int ],
11291173 sequence_number : Optional [int ],
1174+ avro_compression : AvroCompressionCodec ,
11301175) -> ManifestListWriter :
11311176 if format_version == 1 :
1132- return ManifestListWriterV1 (output_file , snapshot_id , parent_snapshot_id )
1177+ return ManifestListWriterV1 (output_file , snapshot_id , parent_snapshot_id , avro_compression )
11331178 elif format_version == 2 :
11341179 if sequence_number is None :
11351180 raise ValueError (f"Sequence-number is required for V2 tables: { sequence_number } " )
1136- return ManifestListWriterV2 (output_file , snapshot_id , parent_snapshot_id , sequence_number )
1181+ return ManifestListWriterV2 (output_file , snapshot_id , parent_snapshot_id , sequence_number , avro_compression )
11371182 else :
11381183 raise ValueError (f"Cannot write manifest list for table version: { format_version } " )
0 commit comments