@@ -100,10 +100,8 @@ def _inspect_files_asserts(df: pa.Table, spark_df: DataFrame) -> None:
100100 assert isinstance (value .as_py (), int )
101101
102102 for split_offsets in df ["split_offsets" ]:
103- assert isinstance (split_offsets .as_py (), list )
104-
105- for file_format in df ["file_format" ]:
106- assert file_format .as_py () == "PARQUET"
103+ if split_offsets .as_py () is not None :
104+ assert isinstance (split_offsets .as_py (), list )
107105
108106 for file_path in df ["file_path" ]:
109107 assert file_path .as_py ().startswith ("s3://" )
@@ -985,3 +983,49 @@ def test_inspect_all_files(
985983 _inspect_files_asserts (all_files_df , spark .table (f"{ identifier } .all_files" ))
986984 _inspect_files_asserts (all_data_files_df , spark .table (f"{ identifier } .all_data_files" ))
987985 _inspect_files_asserts (all_delete_files_df , spark .table (f"{ identifier } .all_delete_files" ))
986+
987+
988+ @pytest .mark .integration
989+ def test_inspect_files_format_version_3 (spark : SparkSession , session_catalog : Catalog , arrow_table_with_null : pa .Table ) -> None :
990+ identifier = "default.table_metadata_files"
991+
992+ tbl = _create_table (
993+ session_catalog ,
994+ identifier ,
995+ properties = {
996+ "format-version" : "3" ,
997+ "write.delete.mode" : "merge-on-read" ,
998+ "write.update.mode" : "merge-on-read" ,
999+ "write.merge.mode" : "merge-on-read" ,
1000+ },
1001+ )
1002+
1003+ insert_data_sql = f"""INSERT INTO { identifier } VALUES
1004+ (false, 'a', 'aaaaaaaaaaaaaaaaaaaaaa', 1, 1, 0.0, 0.0, TIMESTAMP('2023-01-01 19:25:00'), TIMESTAMP('2023-01-01 19:25:00+00:00'), DATE('2023-01-01'), X'01', X'00000000000000000000000000000000'),
1005+ (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL),
1006+ (true, 'z', 'zzzzzzzzzzzzzzzzzzzzzz', 9, 9, 0.9, 0.9, TIMESTAMP('2023-03-01 19:25:00'), TIMESTAMP('2023-03-01 19:25:00+00:00'), DATE('2023-03-01'), X'12', X'11111111111111111111111111111111');
1007+ """
1008+
1009+ spark .sql (insert_data_sql )
1010+ spark .sql (insert_data_sql )
1011+ spark .sql (f"UPDATE { identifier } SET int = 2 WHERE int = 1" )
1012+ spark .sql (f"DELETE FROM { identifier } WHERE int = 9" )
1013+ spark .table (identifier ).show (20 , False )
1014+
1015+ tbl .refresh ()
1016+
1017+ files_df = tbl .inspect .files ()
1018+ data_files_df = tbl .inspect .data_files ()
1019+ delete_files_df = tbl .inspect .delete_files ()
1020+
1021+ all_files_df = tbl .inspect .all_files ()
1022+ all_data_files_df = tbl .inspect .all_data_files ()
1023+ all_delete_files_df = tbl .inspect .all_delete_files ()
1024+
1025+ _inspect_files_asserts (files_df , spark .table (f"{ identifier } .files" ))
1026+ _inspect_files_asserts (data_files_df , spark .table (f"{ identifier } .data_files" ))
1027+ _inspect_files_asserts (delete_files_df , spark .table (f"{ identifier } .delete_files" ))
1028+
1029+ _inspect_files_asserts (all_files_df , spark .table (f"{ identifier } .all_files" ))
1030+ _inspect_files_asserts (all_data_files_df , spark .table (f"{ identifier } .all_data_files" ))
1031+ _inspect_files_asserts (all_delete_files_df , spark .table (f"{ identifier } .all_delete_files" ))
0 commit comments