diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index b2739d8618..984883a84d 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -121,7 +121,10 @@ def __eq__(self, other: Any) -> bool: return False identifier_field_ids_is_equal = self.identifier_field_ids == other.identifier_field_ids - schema_is_equal = all(lhs == rhs for lhs, rhs in zip(self.columns, other.columns)) + + left = sorted(self.columns, key=lambda field: field.field_id) + right = sorted(other.columns, key=lambda field: field.field_id) + schema_is_equal = all(lhs == rhs for lhs, rhs in zip(left, right)) return identifier_field_ids_is_equal and schema_is_equal diff --git a/pyiceberg/types.py b/pyiceberg/types.py index 7e3862b7d3..98ce0a7c9c 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -393,7 +393,15 @@ def __hash__(self) -> int: def __eq__(self, other: Any) -> bool: """Compare the object if it is equal to another object.""" - return self.fields == other.fields if isinstance(other, StructType) else False + if not isinstance(other, StructType): + return False + + if len(self.fields) != len(other.fields): + return False + + left = sorted(self.fields, key=lambda field: field.field_id) + right = sorted(other.fields, key=lambda field: field.field_id) + return all(lhs == rhs for lhs, rhs in zip(left, right)) class ListType(IcebergType): diff --git a/tests/avro/test_resolver.py b/tests/avro/test_resolver.py index decd9060a4..ee1e444684 100644 --- a/tests/avro/test_resolver.py +++ b/tests/avro/test_resolver.py @@ -372,7 +372,7 @@ def test_writer_ordering() -> None: ), ) - expected = StructWriter(((1, DoubleWriter()), (0, StringWriter()))) + expected = StructWriter(((0, DoubleWriter()), (1, StringWriter()))) assert actual == expected diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index ac5d1ce050..5aced3f128 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -1730,19 +1730,17 @@ def test_move_nested_field_after_first(catalog: Catalog) -> None: with tbl.update_schema() as schema_update: schema_update.move_before("struct.data", "struct.count") - assert str(tbl.schema()) == str( - Schema( - NestedField(field_id=1, name="id", field_type=LongType(), required=True), - NestedField( - field_id=2, - name="struct", - field_type=StructType( - NestedField(field_id=4, name="data", field_type=StringType(), required=True), - NestedField(field_id=3, name="count", field_type=LongType(), required=True), - ), - required=True, + assert tbl.schema() == Schema( + NestedField(field_id=1, name="id", field_type=LongType(), required=True), + NestedField( + field_id=2, + name="struct", + field_type=StructType( + NestedField(field_id=4, name="data", field_type=StringType(), required=True), + NestedField(field_id=3, name="count", field_type=LongType(), required=True), ), - ) + required=True, + ), ) diff --git a/tests/test_schema.py b/tests/test_schema.py index 96109ce9c2..5f2626fd14 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -123,6 +123,15 @@ def test_schema_raise_on_duplicate_names() -> None: assert "Invalid schema, multiple fields for name baz: 3 and 4" in str(exc_info.value) +def test_schema_field_order_irrelevant() -> None: + foo = NestedField(field_id=1, name="foo", field_type=StringType()) + bar = NestedField(field_id=2, name="bar", field_type=IntegerType(), required=False) + left = schema.Schema(foo, bar) + right = schema.Schema(bar, foo) + assert left == right + assert left.as_struct() == right.as_struct() + + def test_schema_index_by_id_visitor(table_schema_nested: Schema) -> None: """Test index_by_id visitor function""" index = schema.index_by_id(table_schema_nested)