@@ -38,7 +38,7 @@ def run_spark_commands(spark: SparkSession, sqls: List[str]) -> None:
3838@pytest .mark .integration
3939@pytest .mark .parametrize ("format_version" , [1 , 2 ])
4040def test_partitioned_table_delete_full_file (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
41- identifier = ' default.table_partitioned_delete'
41+ identifier = " default.table_partitioned_delete"
4242
4343 run_spark_commands (
4444 spark ,
@@ -66,14 +66,14 @@ def test_partitioned_table_delete_full_file(spark: SparkSession, session_catalog
6666 tbl .delete (EqualTo ("number_partitioned" , 10 ))
6767
6868 # No overwrite operation
69- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' append' , ' delete' ]
70- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [11 , 11 ], ' number' : [20 , 30 ]}
69+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " append" , " delete" ]
70+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [11 , 11 ], " number" : [20 , 30 ]}
7171
7272
7373@pytest .mark .integration
7474@pytest .mark .parametrize ("format_version" , [1 , 2 ])
7575def test_partitioned_table_rewrite (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
76- identifier = ' default.table_partitioned_delete'
76+ identifier = " default.table_partitioned_delete"
7777
7878 run_spark_commands (
7979 spark ,
@@ -101,14 +101,14 @@ def test_partitioned_table_rewrite(spark: SparkSession, session_catalog: RestCat
101101 tbl .delete (EqualTo ("number" , 20 ))
102102
103103 # We don't delete a whole partition, so there is only a overwrite
104- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' append' , ' overwrite' ]
105- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [11 , 10 ], ' number' : [30 , 30 ]}
104+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " append" , " overwrite" ]
105+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [11 , 10 ], " number" : [30 , 30 ]}
106106
107107
108108@pytest .mark .integration
109109@pytest .mark .parametrize ("format_version" , [1 , 2 ])
110110def test_partitioned_table_no_match (spark : SparkSession , session_catalog : RestCatalog , format_version : int ) -> None :
111- identifier = ' default.table_partitioned_delete'
111+ identifier = " default.table_partitioned_delete"
112112
113113 run_spark_commands (
114114 spark ,
@@ -132,13 +132,13 @@ def test_partitioned_table_no_match(spark: SparkSession, session_catalog: RestCa
132132 tbl = session_catalog .load_table (identifier )
133133 tbl .delete (EqualTo ("number_partitioned" , 22 )) # Does not affect any data
134134
135- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' ]
136- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [10 , 10 ], ' number' : [20 , 30 ]}
135+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" ]
136+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [10 , 10 ], " number" : [20 , 30 ]}
137137
138138
139139@pytest .mark .integration
140140def test_partitioned_table_positional_deletes (spark : SparkSession , session_catalog : RestCatalog ) -> None :
141- identifier = ' default.table_partitioned_delete'
141+ identifier = " default.table_partitioned_delete"
142142
143143 run_spark_commands (
144144 spark ,
@@ -180,13 +180,13 @@ def test_partitioned_table_positional_deletes(spark: SparkSession, session_catal
180180
181181 # One positional delete has been added, but an OVERWRITE status is set
182182 # https://github.com/apache/iceberg/issues/10122
183- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [' append' , ' overwrite' , ' overwrite' ]
184- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [10 ], ' number' : [20 ]}
183+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()] == [" append" , " overwrite" , " overwrite" ]
184+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [10 ], " number" : [20 ]}
185185
186186
187187@pytest .mark .integration
188188def test_partitioned_table_positional_deletes_sequence_number (spark : SparkSession , session_catalog : RestCatalog ) -> None :
189- identifier = ' default.table_partitioned_delete_sequence_number'
189+ identifier = " default.table_partitioned_delete_sequence_number"
190190
191191 # This test case is a bit more complex. Here we run a MoR delete on a file, we make sure that
192192 # the manifest gets rewritten (but not the data file with a MoR), and check if the delete is still there
@@ -234,40 +234,40 @@ def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSessio
234234 assert len (snapshots ) == 3
235235
236236 # Snapshots produced by Spark
237- assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()[0 :2 ]] == [' append' , ' overwrite' ]
237+ assert [snapshot .summary .operation .value for snapshot in tbl .snapshots ()[0 :2 ]] == [" append" , " overwrite" ]
238238
239239 # Will rewrite one parquet file
240240 assert snapshots [2 ].summary == Summary (
241241 Operation .OVERWRITE ,
242242 ** {
243- ' added-files-size' : ' 1145' ,
244- ' added-data-files' : '1' ,
245- ' added-records' : '2' ,
246- ' changed-partition-count' : '1' ,
247- ' total-files-size' : snapshots [2 ].summary [' total-files-size' ],
248- ' total-delete-files' : '0' ,
249- ' total-data-files' : '1' ,
250- ' total-position-deletes' : '0' ,
251- ' total-records' : '2' ,
252- ' total-equality-deletes' : '0' ,
253- ' deleted-data-files' : '2' ,
254- ' removed-delete-files' : '1' ,
255- ' deleted-records' : '5' ,
256- ' removed-files-size' : snapshots [2 ].summary [' removed-files-size' ],
257- ' removed-position-deletes' : '1' ,
243+ " added-files-size" : " 1145" ,
244+ " added-data-files" : "1" ,
245+ " added-records" : "2" ,
246+ " changed-partition-count" : "1" ,
247+ " total-files-size" : snapshots [2 ].summary [" total-files-size" ],
248+ " total-delete-files" : "0" ,
249+ " total-data-files" : "1" ,
250+ " total-position-deletes" : "0" ,
251+ " total-records" : "2" ,
252+ " total-equality-deletes" : "0" ,
253+ " deleted-data-files" : "2" ,
254+ " removed-delete-files" : "1" ,
255+ " deleted-records" : "5" ,
256+ " removed-files-size" : snapshots [2 ].summary [" removed-files-size" ],
257+ " removed-position-deletes" : "1" ,
258258 },
259259 )
260260
261- assert tbl .scan ().to_arrow ().to_pydict () == {' number_partitioned' : [20 , 20 , 10 ], ' number' : [200 , 202 , 100 ]}
261+ assert tbl .scan ().to_arrow ().to_pydict () == {" number_partitioned" : [20 , 20 , 10 ], " number" : [200 , 202 , 100 ]}
262262
263263
264264@pytest .mark .integration
265265def test_delete_no_match (session_catalog : RestCatalog ) -> None :
266266 arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
267267 arrow_tbl = pa .Table .from_pylist (
268268 [
269- {' ints' : 1 },
270- {' ints' : 3 },
269+ {" ints" : 1 },
270+ {" ints" : 3 },
271271 ],
272272 schema = arrow_schema ,
273273 )
@@ -286,7 +286,7 @@ def test_delete_no_match(session_catalog: RestCatalog) -> None:
286286
287287 assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [Operation .APPEND ]
288288
289- tbl .delete (' ints == 2' ) # Only 1 and 3 in the file, but is between the lower and upper bound
289+ tbl .delete (" ints == 2" ) # Only 1 and 3 in the file, but is between the lower and upper bound
290290
291291 assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [Operation .APPEND ]
292292
@@ -296,8 +296,8 @@ def test_delete_overwrite(session_catalog: RestCatalog) -> None:
296296 arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
297297 arrow_tbl = pa .Table .from_pylist (
298298 [
299- {' ints' : 1 },
300- {' ints' : 2 },
299+ {" ints" : 1 },
300+ {" ints" : 2 },
301301 ],
302302 schema = arrow_schema ,
303303 )
@@ -318,28 +318,28 @@ def test_delete_overwrite(session_catalog: RestCatalog) -> None:
318318
319319 arrow_tbl_overwrite = pa .Table .from_pylist (
320320 [
321- {' ints' : 3 },
322- {' ints' : 4 },
321+ {" ints" : 3 },
322+ {" ints" : 4 },
323323 ],
324324 schema = arrow_schema ,
325325 )
326- tbl .overwrite (arrow_tbl_overwrite , ' ints == 2' ) # Should rewrite one file
326+ tbl .overwrite (arrow_tbl_overwrite , " ints == 2" ) # Should rewrite one file
327327
328328 assert [snapshot .summary .operation for snapshot in tbl .snapshots ()] == [
329329 Operation .APPEND ,
330330 Operation .OVERWRITE ,
331331 Operation .APPEND ,
332332 ]
333333
334- assert tbl .scan ().to_arrow ()[' ints' ].to_pylist () == [3 , 4 , 1 ]
334+ assert tbl .scan ().to_arrow ()[" ints" ].to_pylist () == [3 , 4 , 1 ]
335335
336336
337337@pytest .mark .integration
338338def test_delete_truncate (session_catalog : RestCatalog ) -> None :
339339 arrow_schema = pa .schema ([pa .field ("ints" , pa .int32 ())])
340340 arrow_tbl = pa .Table .from_pylist (
341341 [
342- {' ints' : 1 },
342+ {" ints" : 1 },
343343 ],
344344 schema = arrow_schema ,
345345 )
0 commit comments