Skip to content

Commit 3649411

Browse files
author
Tom McCormick
committed
fix integration test
1 parent 5f171a2 commit 3649411

File tree

1 file changed

+12
-13
lines changed

1 file changed

+12
-13
lines changed

tests/integration/test_writes/test_writes.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -728,19 +728,15 @@ def test_spark_writes_orc_pyiceberg_reads(spark: SparkSession, session_catalog:
728728
# Create Spark DataFrame
729729
spark_df = spark.createDataFrame(test_data, ["id", "name", "age", "is_active"])
730730

731-
# Create table with Spark using ORC format
732-
spark_df.writeTo(identifier).using("iceberg").createOrReplace()
731+
# Ensure a clean slate to avoid replacing a v2 table with v1
732+
spark.sql(f"DROP TABLE IF EXISTS {identifier}")
733733

734-
# Configure table to use ORC format
735-
spark.sql(
736-
f"""
737-
ALTER TABLE {identifier}
738-
SET TBLPROPERTIES (
739-
'write.format.default' = 'orc',
740-
'format-version' = '{format_version}'
741-
)
742-
"""
743-
)
734+
# Create table with Spark using ORC format and desired format-version
735+
spark_df.writeTo(identifier) \
736+
.using("iceberg") \
737+
.tableProperty("write.format.default", "orc") \
738+
.tableProperty("format-version", str(format_version)) \
739+
.createOrReplace()
744740

745741
# Write data with ORC format using Spark
746742
spark_df.writeTo(identifier).using("iceberg").append()
@@ -774,8 +770,11 @@ def test_spark_writes_orc_pyiceberg_reads(spark: SparkSession, session_catalog:
774770
assert pyiceberg_df["age"].dtype == "int64"
775771
assert pyiceberg_df["is_active"].dtype == "bool"
776772

777-
# Cross-validate with Spark to ensure consistency
773+
# Cross-validate with Spark to ensure consistency (ensure deterministic ordering)
778774
spark_result = spark.sql(f"SELECT * FROM {identifier}").toPandas()
775+
sort_cols = ["id", "name", "age", "is_active"]
776+
spark_result = spark_result.sort_values(by=sort_cols).reset_index(drop=True)
777+
pyiceberg_df = pyiceberg_df.sort_values(by=sort_cols).reset_index(drop=True)
779778
pandas.testing.assert_frame_equal(spark_result, pyiceberg_df, check_dtype=False)
780779

781780
# Verify the files are actually ORC format

0 commit comments

Comments
 (0)