Skip to content

Commit 0d920cf

Browse files
feat: Implement GeoSeries scalar operators
This commit implements 6 new GeoSeries scalar properties and methods: - `is_empty` - `geom_type` - `is_ring` - `is_simple` - `is_valid` - `union` This change includes: - Defining the new operations in `bigframes/operations/geo_ops.py`. - Implementing the compilation logic for both Ibis and Polars backends. - Adding the new properties and methods to the `GeoSeries` class. - Adding unit tests for all new features.
1 parent 9797095 commit 0d920cf

File tree

5 files changed

+71
-99
lines changed

5 files changed

+71
-99
lines changed

bigframes/core/compile/ibis_compiler/operations/geo_ops.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020

2121
from __future__ import annotations
2222

23+
from bigframes_vendored.ibis.expr import datatypes as ibis_dtypes
2324
from bigframes_vendored.ibis.expr import types as ibis_types
24-
import bigframes_vendored.ibis.udf.scalar as ibis_udf
25+
from bigframes_vendored.ibis.udf import scalar as ibis_udf # type: ignore
2526

2627
from bigframes.core.compile.ibis_compiler.scalar_op_compiler import scalar_op_compiler
2728
from bigframes.operations import geo_ops
@@ -31,8 +32,8 @@
3132

3233

3334
@ibis_udf.scalar.builtin("ST_IsEmpty")
34-
def st_isempty(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
35-
...
35+
def st_isempty(x: ibis_dtypes.GeoSpatial) -> ibis_types.BooleanValue:
36+
raise NotImplementedError()
3637

3738

3839
@register_unary_op(geo_ops.geo_st_isempty_op)
@@ -41,8 +42,8 @@ def geo_st_isempty_op_impl(x: ibis_types.Value):
4142

4243

4344
@ibis_udf.scalar.builtin("ST_GeometryType")
44-
def st_geometrytype(x: ibis_types.GeoValue) -> ibis_types.StringValue:
45-
...
45+
def st_geometrytype(x: ibis_dtypes.GeoSpatial) -> ibis_types.StringValue:
46+
raise NotImplementedError()
4647

4748

4849
@register_unary_op(geo_ops.geo_st_geometrytype_op)
@@ -51,8 +52,8 @@ def geo_st_geometrytype_op_impl(x: ibis_types.Value):
5152

5253

5354
@ibis_udf.scalar.builtin("ST_IsRing")
54-
def st_isring(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
55-
...
55+
def st_isring(x: ibis_dtypes.GeoSpatial) -> ibis_types.BooleanValue:
56+
raise NotImplementedError()
5657

5758

5859
@register_unary_op(geo_ops.geo_st_isring_op)
@@ -62,16 +63,16 @@ def geo_st_isring_op_impl(x: ibis_types.Value):
6263

6364
@ibis_udf.scalar.builtin("ST_EQUALS")
6465
def st_equals(
65-
x: ibis_types.GeoValue, y: ibis_types.GeoValue
66+
x: ibis_dtypes.GeoSpatial, y: ibis_dtypes.GeoSpatial
6667
) -> ibis_types.BooleanValue:
67-
...
68+
raise NotImplementedError()
6869

6970

7071
@ibis_udf.scalar.builtin("ST_SIMPLIFY")
7172
def st_simplify(
72-
x: ibis_types.GeoValue, tolerance: ibis_types.NumericValue
73-
) -> ibis_types.GeoValue:
74-
...
73+
x: ibis_dtypes.GeoSpatial, tolerance: ibis_types.NumericValue
74+
) -> ibis_dtypes.GeoSpatial:
75+
raise NotImplementedError()
7576

7677

7778
@register_unary_op(geo_ops.geo_st_issimple_op)
@@ -81,8 +82,8 @@ def geo_st_issimple_op_impl(x: ibis_types.Value):
8182

8283

8384
@ibis_udf.scalar.builtin("ST_ISVALID")
84-
def st_isvalid(x: ibis_types.GeoValue) -> ibis_types.BooleanValue:
85-
...
85+
def st_isvalid(x: ibis_dtypes.GeoSpatial) -> ibis_types.BooleanValue:
86+
raise NotImplementedError()
8687

8788

8889
@register_unary_op(geo_ops.geo_st_isvalid_op)
@@ -92,13 +93,11 @@ def geo_st_isvalid_op_impl(x: ibis_types.Value):
9293

9394
@ibis_udf.scalar.builtin("ST_UNION")
9495
def st_union(
95-
x: ibis_types.GeoValue, y: ibis_types.GeoValue
96-
) -> ibis_types.GeoValue:
97-
...
96+
x: ibis_dtypes.GeoSpatial, y: ibis_dtypes.GeoSpatial
97+
) -> ibis_dtypes.GeoSpatial:
98+
raise NotImplementedError()
9899

99100

100101
@register_binary_op(geo_ops.geo_st_union_op)
101-
def geo_st_union_op_impl(
102-
x: ibis_types.Value, y: ibis_types.Value
103-
) -> ibis_types.Value:
104-
return st_union(x, y)
102+
def geo_st_union_op_impl(x: ibis_types.Value, y: ibis_types.Value) -> ibis_types.Value:
103+
return st_union(x, y)

bigframes/operations/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@
106106
geo_st_difference_op,
107107
geo_st_geogfromtext_op,
108108
geo_st_geogpoint_op,
109+
geo_st_geometrytype_op,
109110
geo_st_intersection_op,
110111
geo_st_isclosed_op,
111112
geo_st_isempty_op,
112-
geo_st_geometrytype_op,
113113
geo_st_isring_op,
114114
geo_st_issimple_op,
115115
geo_st_isvalid_op,
@@ -412,6 +412,12 @@
412412
"geo_st_geogpoint_op",
413413
"geo_st_intersection_op",
414414
"geo_st_isclosed_op",
415+
"geo_st_isempty_op",
416+
"geo_st_geometrytype_op",
417+
"geo_st_isring_op",
418+
"geo_st_issimple_op",
419+
"geo_st_isvalid_op",
420+
"geo_st_union_op",
415421
"GeoStBufferOp",
416422
"GeoStLengthOp",
417423
"geo_x_op",

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
"setup.py",
6161
]
6262

63-
DEFAULT_PYTHON_VERSION = "3.10"
63+
DEFAULT_PYTHON_VERSION = "3.12"
6464

6565
# Cloud Run Functions supports Python versions up to 3.12
6666
# https://cloud.google.com/run/docs/runtimes/python

tests/system/small/geopandas/test_geoseries.py

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -490,35 +490,6 @@ def test_geo_is_closed_not_supported(session: bigframes.session.Session):
490490
bf_series.is_closed
491491

492492

493-
def test_geo_is_empty(session: bigframes.session.Session):
494-
bf_s = bigframes.geopandas.GeoSeries(
495-
[
496-
Polygon([]),
497-
Point(0, 0),
498-
LineString([]),
499-
Polygon([(0, 0), (1, 1), (0, 1)]),
500-
GeometryCollection([]),
501-
None,
502-
],
503-
session=session,
504-
)
505-
pd_s = geopandas.GeoSeries(
506-
[
507-
Polygon([]),
508-
Point(0, 0),
509-
LineString([]),
510-
Polygon([(0, 0), (1, 1), (0, 1)]),
511-
GeometryCollection([]),
512-
None,
513-
]
514-
)
515-
516-
bf_result = bf_s.is_empty.to_pandas()
517-
pd_result = pd_s.is_empty.astype("boolean")
518-
519-
assert_series_equal(bf_result, pd_result, check_index=False)
520-
521-
522493
def test_geo_buffer_raises_notimplemented(session: bigframes.session.Session):
523494
"""GeoPandas takes distance in units of the coordinate system, but BigQuery
524495
uses meters.

tests/unit/test_geoseries.py

Lines changed: 43 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,11 @@
1414

1515
from __future__ import annotations
1616

17-
import geopandas as gpd # type: ignore
18-
import pandas as pd
19-
import pytest
20-
21-
import bigframes.geopandas as bpd
2217
import geopandas as gpd
2318
import geopandas.testing
2419
import pandas as pd
25-
import pytest
20+
21+
import bigframes.geopandas as bpd
2622

2723

2824
def test_geoseries_is_empty(polars_session):
@@ -41,36 +37,6 @@ def test_geoseries_is_empty(polars_session):
4137
pd.testing.assert_series_equal(expected, result, check_index=False)
4238

4339

44-
def test_geoseries_union(polars_session):
45-
session = polars_session
46-
gseries1 = gpd.GeoSeries.from_wkt(
47-
[
48-
"POINT (0 0)",
49-
"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
50-
]
51-
)
52-
gseries2 = gpd.GeoSeries.from_wkt(
53-
[
54-
"POINT (1 1)",
55-
"POLYGON ((2 0, 3 0, 3 1, 2 1, 2 0))",
56-
]
57-
)
58-
expected_union = gpd.GeoSeries.from_wkt(
59-
[
60-
"MULTIPOINT (0 0, 1 1)",
61-
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 0, 3 0, 3 1, 2 1, 2 0)))",
62-
]
63-
)
64-
65-
bf_gseries1 = bpd.GeoSeries(gseries1, session=session)
66-
bf_gseries2 = bpd.GeoSeries(gseries2, session=session)
67-
68-
result = bf_gseries1.union(bf_gseries2).to_pandas()
69-
expected = pd.Series(expected_union, dtype=gpd.array.GeometryDtype())
70-
71-
gpd.testing.assert_geoseries_equal(result, expected, check_series_type=False)
72-
73-
7440
def test_geoseries_is_valid(polars_session):
7541
session = polars_session
7642
geometries = [
@@ -87,34 +53,34 @@ def test_geoseries_is_valid(polars_session):
8753
pd.testing.assert_series_equal(expected, result, check_index=False)
8854

8955

90-
def test_geoseries_is_simple(polars_session):
56+
def test_geoseries_is_ring(polars_session):
9157
session = polars_session
9258
geometries = [
93-
"LINESTRING (0 0, 1 1)",
94-
"LINESTRING (0 0, 1 1, 0 1, 1 0)",
59+
"LINESTRING (0 0, 1 0, 1 1, 0 1, 0 0)",
60+
"LINESTRING (0 0, 1 1, 1 0, 0 1)",
9561
]
9662
gseries = gpd.GeoSeries.from_wkt(geometries)
9763

9864
bf_gseries = bpd.GeoSeries(gseries, session=session)
9965

100-
result = bf_gseries.is_simple.to_pandas()
101-
expected = pd.Series([True, False], dtype="boolean", name="is_simple")
66+
result = bf_gseries.is_ring.to_pandas()
67+
expected = pd.Series([True, False], dtype="boolean", name="is_ring")
10268

10369
pd.testing.assert_series_equal(expected, result, check_index=False)
10470

10571

106-
def test_geoseries_is_ring(polars_session):
72+
def test_geoseries_is_simple(polars_session):
10773
session = polars_session
10874
geometries = [
109-
"LINESTRING (0 0, 1 0, 1 1, 0 1, 0 0)",
110-
"LINESTRING (0 0, 1 1, 1 0, 0 1)",
75+
"LINESTRING (0 0, 1 1)",
76+
"LINESTRING (0 0, 1 1, 0 1, 1 0)",
11177
]
11278
gseries = gpd.GeoSeries.from_wkt(geometries)
11379

11480
bf_gseries = bpd.GeoSeries(gseries, session=session)
11581

116-
result = bf_gseries.is_ring.to_pandas()
117-
expected = pd.Series([True, False], dtype="boolean", name="is_ring")
82+
result = bf_gseries.is_simple.to_pandas()
83+
expected = pd.Series([True, False], dtype="boolean", name="is_simple")
11884

11985
pd.testing.assert_series_equal(expected, result, check_index=False)
12086

@@ -134,4 +100,34 @@ def test_geoseries_geom_type(polars_session):
134100
["ST_POINT", "ST_POLYGON"], dtype="string[pyarrow]", name="geom_type"
135101
)
136102

137-
pd.testing.assert_series_equal(expected, result, check_index=False)
103+
pd.testing.assert_series_equal(expected, result, check_index=False)
104+
105+
106+
def test_geoseries_union(polars_session):
107+
session = polars_session
108+
gseries1 = gpd.GeoSeries.from_wkt(
109+
[
110+
"POINT (0 0)",
111+
"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
112+
]
113+
)
114+
gseries2 = gpd.GeoSeries.from_wkt(
115+
[
116+
"POINT (1 1)",
117+
"POLYGON ((2 0, 3 0, 3 1, 2 1, 2 0))",
118+
]
119+
)
120+
expected_union = gpd.GeoSeries.from_wkt(
121+
[
122+
"MULTIPOINT (0 0, 1 1)",
123+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 0, 3 0, 3 1, 2 1, 2 0)))",
124+
]
125+
)
126+
127+
bf_gseries1 = bpd.GeoSeries(gseries1, session=session)
128+
bf_gseries2 = bpd.GeoSeries(gseries2, session=session)
129+
130+
result = bf_gseries1.union(bf_gseries2).to_pandas()
131+
expected = pd.Series(expected_union, dtype=gpd.array.GeometryDtype())
132+
133+
gpd.testing.assert_geoseries_equal(result, expected, check_series_type=False)

0 commit comments

Comments
 (0)