diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index f7e3c7c082..fb4a71112d 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1189,7 +1189,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: elif isinstance(primitive, pa.Decimal128Type): primitive = cast(pa.Decimal128Type, primitive) return DecimalType(primitive.precision, primitive.scale) - elif pa.types.is_string(primitive) or pa.types.is_large_string(primitive): + elif pa.types.is_string(primitive) or pa.types.is_large_string(primitive) or pa.types.is_string_view(primitive): return StringType() elif pa.types.is_date32(primitive): return DateType() @@ -1215,7 +1215,7 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType: elif primitive.tz is None: return TimestampType() - elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive): + elif pa.types.is_binary(primitive) or pa.types.is_large_binary(primitive) or pa.types.is_binary_view(primitive): return BinaryType() elif pa.types.is_fixed_size_binary(primitive): primitive = cast(pa.FixedSizeBinaryType, primitive) diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index d13822f5ce..9f5aff3f70 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -225,18 +225,18 @@ def test_pyarrow_timestamp_tz_invalid_tz() -> None: visit_pyarrow(pyarrow_type, _ConvertToIceberg()) -def test_pyarrow_string_to_iceberg() -> None: - pyarrow_type = pa.large_string() +@pytest.mark.parametrize("pyarrow_type", [pa.string(), pa.large_string(), pa.string_view()]) +def test_pyarrow_string_to_iceberg(pyarrow_type: pa.DataType) -> None: converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) assert converted_iceberg_type == StringType() - assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type + assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_string() -def test_pyarrow_variable_binary_to_iceberg() -> None: - pyarrow_type = pa.large_binary() +@pytest.mark.parametrize("pyarrow_type", [pa.binary(), pa.large_binary(), pa.binary_view()]) +def test_pyarrow_variable_binary_to_iceberg(pyarrow_type: pa.DataType) -> None: converted_iceberg_type = visit_pyarrow(pyarrow_type, _ConvertToIceberg()) assert converted_iceberg_type == BinaryType() - assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pyarrow_type + assert visit(converted_iceberg_type, _ConvertToArrowSchema()) == pa.large_binary() def test_pyarrow_struct_to_iceberg() -> None: