@@ -728,19 +728,15 @@ def test_spark_writes_orc_pyiceberg_reads(spark: SparkSession, session_catalog:
728728 # Create Spark DataFrame
729729 spark_df = spark .createDataFrame (test_data , ["id" , "name" , "age" , "is_active" ])
730730
731- # Create table with Spark using ORC format
732- spark_df . writeTo ( identifier ). using ( "iceberg" ). createOrReplace ( )
731+ # Ensure a clean slate to avoid replacing a v2 table with v1
732+ spark . sql ( f"DROP TABLE IF EXISTS { identifier } " )
733733
734- # Configure table to use ORC format
735- spark .sql (
736- f"""
737- ALTER TABLE { identifier }
738- SET TBLPROPERTIES (
739- 'write.format.default' = 'orc',
740- 'format-version' = '{ format_version } '
741- )
742- """
743- )
734+ # Create table with Spark using ORC format and desired format-version
735+ spark_df .writeTo (identifier ) \
736+ .using ("iceberg" ) \
737+ .tableProperty ("write.format.default" , "orc" ) \
738+ .tableProperty ("format-version" , str (format_version )) \
739+ .createOrReplace ()
744740
745741 # Write data with ORC format using Spark
746742 spark_df .writeTo (identifier ).using ("iceberg" ).append ()
@@ -774,8 +770,11 @@ def test_spark_writes_orc_pyiceberg_reads(spark: SparkSession, session_catalog:
774770 assert pyiceberg_df ["age" ].dtype == "int64"
775771 assert pyiceberg_df ["is_active" ].dtype == "bool"
776772
777- # Cross-validate with Spark to ensure consistency
773+ # Cross-validate with Spark to ensure consistency (ensure deterministic ordering)
778774 spark_result = spark .sql (f"SELECT * FROM { identifier } " ).toPandas ()
775+ sort_cols = ["id" , "name" , "age" , "is_active" ]
776+ spark_result = spark_result .sort_values (by = sort_cols ).reset_index (drop = True )
777+ pyiceberg_df = pyiceberg_df .sort_values (by = sort_cols ).reset_index (drop = True )
779778 pandas .testing .assert_frame_equal (spark_result , pyiceberg_df , check_dtype = False )
780779
781780 # Verify the files are actually ORC format
0 commit comments