From c21d45881385a5db5c05607974e3f1344a348478 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 17 Sep 2025 18:29:56 +0000 Subject: [PATCH 1/3] refactor: reorganize the sqlglot scalar compiler layout - part 2 --- bigframes/core/compile/sqlglot/__init__.py | 13 +- .../compile/sqlglot/expressions/array_ops.py | 68 ++ .../compile/sqlglot/expressions/blob_ops.py | 33 + .../sqlglot/expressions/comparison_ops.py | 59 ++ .../compile/sqlglot/expressions/date_ops.py | 61 ++ .../sqlglot/expressions/datetime_ops.py | 99 ++ .../sqlglot/expressions/generic_ops.py | 55 ++ .../compile/sqlglot/expressions/geo_ops.py | 84 ++ .../compile/sqlglot/expressions/json_ops.py | 68 ++ .../sqlglot/expressions/numeric_ops.py | 240 +++++ .../compile/sqlglot/expressions/string_ops.py | 304 ++++++ .../compile/sqlglot/expressions/struct_ops.py | 42 + .../sqlglot/expressions/timedelta_ops.py | 38 + .../sqlglot/expressions/unary_compiler.py | 892 ------------------ 14 files changed, 1163 insertions(+), 893 deletions(-) create mode 100644 bigframes/core/compile/sqlglot/expressions/array_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/blob_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/comparison_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/date_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/datetime_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/generic_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/geo_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/json_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/numeric_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/string_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/struct_ops.py create mode 100644 bigframes/core/compile/sqlglot/expressions/timedelta_ops.py delete mode 100644 bigframes/core/compile/sqlglot/expressions/unary_compiler.py diff --git a/bigframes/core/compile/sqlglot/__init__.py b/bigframes/core/compile/sqlglot/__init__.py index 8a1172b704..5fe8099043 100644 --- a/bigframes/core/compile/sqlglot/__init__.py +++ b/bigframes/core/compile/sqlglot/__init__.py @@ -14,7 +14,18 @@ from __future__ import annotations from bigframes.core.compile.sqlglot.compiler import SQLGlotCompiler +import bigframes.core.compile.sqlglot.expressions.array_ops # noqa: F401 import bigframes.core.compile.sqlglot.expressions.binary_compiler # noqa: F401 -import bigframes.core.compile.sqlglot.expressions.unary_compiler # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.blob_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.comparison_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.date_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.datetime_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.generic_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401 +import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401 __all__ = ["SQLGlotCompiler"] diff --git a/bigframes/core/compile/sqlglot/expressions/array_ops.py b/bigframes/core/compile/sqlglot/expressions/array_ops.py new file mode 100644 index 0000000000..57ff2ee459 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/array_ops.py @@ -0,0 +1,68 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing + +import sqlglot +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.ArrayToStringOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ArrayToStringOp) -> sge.Expression: + return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'") + + +@register_unary_op(ops.ArrayIndexOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ArrayIndexOp) -> sge.Expression: + return sge.Bracket( + this=expr.expr, + expressions=[sge.Literal.number(op.index)], + safe=True, + offset=False, + ) + + +@register_unary_op(ops.ArraySliceOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression: + slice_idx = sqlglot.to_identifier("slice_idx") + + conditions: typing.List[sge.Predicate] = [slice_idx >= op.start] + + if op.stop is not None: + conditions.append(slice_idx < op.stop) + + # local name for each element in the array + el = sqlglot.to_identifier("el") + + selected_elements = ( + sge.select(el) + .from_( + sge.Unnest( + expressions=[expr.expr], + alias=sge.TableAlias(columns=[el]), + offset=slice_idx, + ) + ) + .where(*conditions) + ) + + return sge.array(selected_elements) diff --git a/bigframes/core/compile/sqlglot/expressions/blob_ops.py b/bigframes/core/compile/sqlglot/expressions/blob_ops.py new file mode 100644 index 0000000000..58f905087d --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/blob_ops.py @@ -0,0 +1,33 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.obj_fetch_metadata_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.FETCH_METADATA", expr.expr) + + +@register_unary_op(ops.ObjGetAccessUrl) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("OBJ.GET_ACCESS_URL", expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/comparison_ops.py b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py new file mode 100644 index 0000000000..3bf94cf8ab --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/comparison_ops.py @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing + +import pandas as pd +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler +import bigframes.dtypes as dtypes + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.IsInOp, pass_op=True) +def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: + values = [] + is_numeric_expr = dtypes.is_numeric(expr.dtype) + for value in op.values: + if value is None: + continue + dtype = dtypes.bigframes_type(type(value)) + if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype): + values.append(sge.convert(value)) + + if op.match_nulls: + contains_nulls = any(_is_null(value) for value in op.values) + if contains_nulls: + return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In( + this=expr.expr, expressions=values + ) + + if len(values) == 0: + return sge.convert(False) + + return sge.func( + "COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False) + ) + + +# Helpers +def _is_null(value) -> bool: + # float NaN/inf should be treated as distinct from 'true' null values + return typing.cast(bool, pd.isna(value)) and not isinstance(value, float) diff --git a/bigframes/core/compile/sqlglot/expressions/date_ops.py b/bigframes/core/compile/sqlglot/expressions/date_ops.py new file mode 100644 index 0000000000..f5922ecc8d --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/date_ops.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.date_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Date(this=expr.expr) + + +@register_unary_op(ops.day_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="DAY"), expression=expr.expr) + + +@register_unary_op(ops.dayofweek_op) +def _(expr: TypedExpr) -> sge.Expression: + # Adjust the 1-based day-of-week index (from SQL) to a 0-based index. + return sge.Extract( + this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr + ) - sge.convert(1) + + +@register_unary_op(ops.dayofyear_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr) + + +@register_unary_op(ops.iso_day_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) + + +@register_unary_op(ops.iso_week_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr) + + +@register_unary_op(ops.iso_year_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/datetime_ops.py b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py new file mode 100644 index 0000000000..77f4233e1c --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/datetime_ops.py @@ -0,0 +1,99 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.FloorDtOp, pass_op=True) +def _(expr: TypedExpr, op: ops.FloorDtOp) -> sge.Expression: + # TODO: Remove this method when it is covered by ops.FloorOp + return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=op.freq)) + + +@register_unary_op(ops.hour_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr) + + +@register_unary_op(ops.minute_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr) + + +@register_unary_op(ops.month_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr) + + +@register_unary_op(ops.normalize_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY")) + + +@register_unary_op(ops.quarter_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr) + + +@register_unary_op(ops.second_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="SECOND"), expression=expr.expr) + + +@register_unary_op(ops.StrftimeOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrftimeOp) -> sge.Expression: + return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr) + + +@register_unary_op(ops.time_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("TIME", expr.expr) + + +@register_unary_op(ops.ToDatetimeOp) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME") + + +@register_unary_op(ops.ToTimestampOp) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("TIMESTAMP_SECONDS", expr.expr) + + +@register_unary_op(ops.UnixMicros) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("UNIX_MICROS", expr.expr) + + +@register_unary_op(ops.UnixMillis) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("UNIX_MILLIS", expr.expr) + + +@register_unary_op(ops.UnixSeconds, pass_op=True) +def _(expr: TypedExpr, op: ops.UnixSeconds) -> sge.Expression: + return sge.func("UNIX_SECONDS", expr.expr) + + +@register_unary_op(ops.year_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py new file mode 100644 index 0000000000..5ee4ede94a --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.AsTypeOp, pass_op=True) +def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression: + # TODO: Support more types for casting, such as JSON, etc. + return sge.Cast(this=expr.expr, to=op.to_type) + + +@register_unary_op(ops.hash_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("FARM_FINGERPRINT", expr.expr) + + +@register_unary_op(ops.isnull_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Is(this=expr.expr, expression=sge.Null()) + + +@register_unary_op(ops.MapOp, pass_op=True) +def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression: + return sge.Case( + this=expr.expr, + ifs=[ + sge.If(this=sge.convert(key), true=sge.convert(value)) + for key, value in op.mappings + ], + ) + + +@register_unary_op(ops.notnull_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) diff --git a/bigframes/core/compile/sqlglot/expressions/geo_ops.py b/bigframes/core/compile/sqlglot/expressions/geo_ops.py new file mode 100644 index 0000000000..53a50fab47 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/geo_ops.py @@ -0,0 +1,84 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.geo_area_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_AREA", expr.expr) + + +@register_unary_op(ops.geo_st_astext_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_ASTEXT", expr.expr) + + +@register_unary_op(ops.geo_st_boundary_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_BOUNDARY", expr.expr) + + +@register_unary_op(ops.GeoStBufferOp, pass_op=True) +def _(expr: TypedExpr, op: ops.GeoStBufferOp) -> sge.Expression: + return sge.func( + "ST_BUFFER", + expr.expr, + sge.convert(op.buffer_radius), + sge.convert(op.num_seg_quarter_circle), + sge.convert(op.use_spheroid), + ) + + +@register_unary_op(ops.geo_st_centroid_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_CENTROID", expr.expr) + + +@register_unary_op(ops.geo_st_convexhull_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_CONVEXHULL", expr.expr) + + +@register_unary_op(ops.geo_st_geogfromtext_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr) + + +@register_unary_op(ops.geo_st_isclosed_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ST_ISCLOSED", expr.expr) + + +@register_unary_op(ops.GeoStLengthOp, pass_op=True) +def _(expr: TypedExpr, op: ops.GeoStLengthOp) -> sge.Expression: + return sge.func("ST_LENGTH", expr.expr) + + +@register_unary_op(ops.geo_x_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("SAFE.ST_X", expr.expr) + + +@register_unary_op(ops.geo_y_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("SAFE.ST_Y", expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/json_ops.py b/bigframes/core/compile/sqlglot/expressions/json_ops.py new file mode 100644 index 0000000000..754e8d80eb --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/json_ops.py @@ -0,0 +1,68 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.JSONExtract, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONExtract) -> sge.Expression: + return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONExtractArray, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONExtractArray) -> sge.Expression: + return sge.func("JSON_EXTRACT_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONExtractStringArray, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression: + return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONQuery, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONQuery) -> sge.Expression: + return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONQueryArray, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONQueryArray) -> sge.Expression: + return sge.func("JSON_QUERY_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONValue, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONValue) -> sge.Expression: + return sge.func("JSON_VALUE", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.JSONValueArray, pass_op=True) +def _(expr: TypedExpr, op: ops.JSONValueArray) -> sge.Expression: + return sge.func("JSON_VALUE_ARRAY", expr.expr, sge.convert(op.json_path)) + + +@register_unary_op(ops.ParseJSON) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("PARSE_JSON", expr.expr) + + +@register_unary_op(ops.ToJSONString) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("TO_JSON_STRING", expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/numeric_ops.py b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py new file mode 100644 index 0000000000..09c08e2095 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/numeric_ops.py @@ -0,0 +1,240 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +import bigframes.core.compile.sqlglot.expressions.constants as constants +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.abs_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Abs(this=expr.expr) + + +@register_unary_op(ops.arccosh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(1), + true=constants._NAN, + ) + ], + default=sge.func("ACOSH", expr.expr), + ) + + +@register_unary_op(ops.arccos_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=constants._NAN, + ) + ], + default=sge.func("ACOS", expr.expr), + ) + + +@register_unary_op(ops.arcsin_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=constants._NAN, + ) + ], + default=sge.func("ASIN", expr.expr), + ) + + +@register_unary_op(ops.arcsinh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ASINH", expr.expr) + + +@register_unary_op(ops.arctan_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("ATAN", expr.expr) + + +@register_unary_op(ops.arctanh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(1), + true=constants._NAN, + ) + ], + default=sge.func("ATANH", expr.expr), + ) + + +@register_unary_op(ops.ceil_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Ceil(this=expr.expr) + + +@register_unary_op(ops.cos_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("COS", expr.expr) + + +@register_unary_op(ops.cosh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > sge.convert(709.78), + true=constants._INF, + ) + ], + default=sge.func("COSH", expr.expr), + ) + + +@register_unary_op(ops.exp_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr > constants._FLOAT64_EXP_BOUND, + true=constants._INF, + ) + ], + default=sge.func("EXP", expr.expr), + ) + + +@register_unary_op(ops.expm1_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr > constants._FLOAT64_EXP_BOUND, + true=constants._INF, + ) + ], + default=sge.func("EXP", expr.expr), + ) - sge.convert(1) + + +@register_unary_op(ops.floor_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Floor(this=expr.expr) + + +@register_unary_op(ops.invert_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.BitwiseNot(this=expr.expr) + + +@register_unary_op(ops.ln_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=constants._NAN, + ) + ], + default=sge.Ln(this=expr.expr), + ) + + +@register_unary_op(ops.log10_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=constants._NAN, + ) + ], + default=sge.Log(this=expr.expr, expression=sge.convert(10)), + ) + + +@register_unary_op(ops.log1p_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(-1), + true=constants._NAN, + ) + ], + default=sge.Ln(this=sge.convert(1) + expr.expr), + ) + + +@register_unary_op(ops.neg_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Neg(this=expr.expr) + + +@register_unary_op(ops.pos_op) +def _(expr: TypedExpr) -> sge.Expression: + return expr.expr + + +@register_unary_op(ops.sqrt_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=expr.expr < sge.convert(0), + true=constants._NAN, + ) + ], + default=sge.Sqrt(this=expr.expr), + ) + + +@register_unary_op(ops.sin_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("SIN", expr.expr) + + +@register_unary_op(ops.sinh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.func("ABS", expr.expr) > constants._FLOAT64_EXP_BOUND, + true=sge.func("SIGN", expr.expr) * constants._INF, + ) + ], + default=sge.func("SINH", expr.expr), + ) + + +@register_unary_op(ops.tan_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("TAN", expr.expr) + + +@register_unary_op(ops.tanh_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("TANH", expr.expr) diff --git a/bigframes/core/compile/sqlglot/expressions/string_ops.py b/bigframes/core/compile/sqlglot/expressions/string_ops.py new file mode 100644 index 0000000000..403cf403f5 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/string_ops.py @@ -0,0 +1,304 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import functools + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.capitalize_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Initcap(this=expr.expr) + + +@register_unary_op(ops.StrContainsOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrContainsOp) -> sge.Expression: + return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%")) + + +@register_unary_op(ops.StrContainsRegexOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrContainsRegexOp) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat)) + + +@register_unary_op(ops.StrExtractOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrExtractOp) -> sge.Expression: + return sge.RegexpExtract( + this=expr.expr, expression=sge.convert(op.pat), group=sge.convert(op.n) + ) + + +@register_unary_op(ops.StrFindOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrFindOp) -> sge.Expression: + # INSTR is 1-based, so we need to adjust the start position. + start = sge.convert(op.start + 1) if op.start is not None else sge.convert(1) + if op.end is not None: + # BigQuery's INSTR doesn't support `end`, so we need to use SUBSTR. + return sge.func( + "INSTR", + sge.Substring( + this=expr.expr, + start=start, + length=sge.convert(op.end - (op.start or 0)), + ), + sge.convert(op.substr), + ) - sge.convert(1) + else: + return sge.func( + "INSTR", + expr.expr, + sge.convert(op.substr), + start, + ) - sge.convert(1) + + +@register_unary_op(ops.StrLstripOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrLstripOp) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT") + + +@register_unary_op(ops.StrPadOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrPadOp) -> sge.Expression: + pad_length = sge.func( + "GREATEST", sge.Length(this=expr.expr), sge.convert(op.length) + ) + if op.side == "left": + return sge.func( + "LPAD", + expr.expr, + pad_length, + sge.convert(op.fillchar), + ) + elif op.side == "right": + return sge.func( + "RPAD", + expr.expr, + pad_length, + sge.convert(op.fillchar), + ) + else: # side == both + lpad_amount = sge.Cast( + this=sge.func( + "SAFE_DIVIDE", + sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)), + sge.convert(2), + ), + to="INT64", + ) + sge.Length(this=expr.expr) + return sge.func( + "RPAD", + sge.func( + "LPAD", + expr.expr, + lpad_amount, + sge.convert(op.fillchar), + ), + pad_length, + sge.convert(op.fillchar), + ) + + +@register_unary_op(ops.StrRepeatOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrRepeatOp) -> sge.Expression: + return sge.Repeat(this=expr.expr, times=sge.convert(op.repeats)) + + +@register_unary_op(ops.EndsWithOp, pass_op=True) +def _(expr: TypedExpr, op: ops.EndsWithOp) -> sge.Expression: + if not op.pat: + return sge.false() + + def to_endswith(pat: str) -> sge.Expression: + return sge.func("ENDS_WITH", expr.expr, sge.convert(pat)) + + conditions = [to_endswith(pat) for pat in op.pat] + return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions) + + +@register_unary_op(ops.isalnum_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^(\p{N}|\p{L})+$")) + + +@register_unary_op(ops.isalpha_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{L}+$")) + + +@register_unary_op(ops.isdecimal_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\d+$")) + + +@register_unary_op(ops.isdigit_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{Nd}+$")) + + +@register_unary_op(ops.islower_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.And( + this=sge.EQ( + this=sge.Lower(this=expr.expr), + expression=expr.expr, + ), + expression=sge.NEQ( + this=sge.Upper(this=expr.expr), + expression=expr.expr, + ), + ) + + +@register_unary_op(ops.isnumeric_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$")) + + +@register_unary_op(ops.isspace_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\s+$")) + + +@register_unary_op(ops.isupper_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.And( + this=sge.EQ( + this=sge.Upper(this=expr.expr), + expression=expr.expr, + ), + expression=sge.NEQ( + this=sge.Lower(this=expr.expr), + expression=expr.expr, + ), + ) + + +@register_unary_op(ops.len_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Length(this=expr.expr) + + +@register_unary_op(ops.lower_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Lower(this=expr.expr) + + +@register_unary_op(ops.ReplaceStrOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ReplaceStrOp) -> sge.Expression: + return sge.func("REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl)) + + +@register_unary_op(ops.RegexReplaceStrOp, pass_op=True) +def _(expr: TypedExpr, op: ops.RegexReplaceStrOp) -> sge.Expression: + return sge.func( + "REGEXP_REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl) + ) + + +@register_unary_op(ops.reverse_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.func("REVERSE", expr.expr) + + +@register_unary_op(ops.StrRstripOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrRstripOp) -> sge.Expression: + return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT") + + +@register_unary_op(ops.StartsWithOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StartsWithOp) -> sge.Expression: + if not op.pat: + return sge.false() + + def to_startswith(pat: str) -> sge.Expression: + return sge.func("STARTS_WITH", expr.expr, sge.convert(pat)) + + conditions = [to_startswith(pat) for pat in op.pat] + return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions) + + +@register_unary_op(ops.StrStripOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrStripOp) -> sge.Expression: + return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr) + + +@register_unary_op(ops.StringSplitOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StringSplitOp) -> sge.Expression: + return sge.Split(this=expr.expr, expression=sge.convert(op.pat)) + + +@register_unary_op(ops.StrGetOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrGetOp) -> sge.Expression: + return sge.Substring( + this=expr.expr, + start=sge.convert(op.i + 1), + length=sge.convert(1), + ) + + +@register_unary_op(ops.StrSliceOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StrSliceOp) -> sge.Expression: + start = op.start + 1 if op.start is not None else None + if op.end is None: + length = None + elif op.start is None: + length = op.end + else: + length = op.end - op.start + return sge.Substring( + this=expr.expr, + start=sge.convert(start) if start is not None else None, + length=sge.convert(length) if length is not None else None, + ) + + +@register_unary_op(ops.upper_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Upper(this=expr.expr) + + +@register_unary_op(ops.ZfillOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ZfillOp) -> sge.Expression: + return sge.Case( + ifs=[ + sge.If( + this=sge.EQ( + this=sge.Substring( + this=expr.expr, start=sge.convert(1), length=sge.convert(1) + ), + expression=sge.convert("-"), + ), + true=sge.Concat( + expressions=[ + sge.convert("-"), + sge.func( + "LPAD", + sge.Substring(this=expr.expr, start=sge.convert(1)), + sge.convert(op.width - 1), + sge.convert("0"), + ), + ] + ), + ) + ], + default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")), + ) diff --git a/bigframes/core/compile/sqlglot/expressions/struct_ops.py b/bigframes/core/compile/sqlglot/expressions/struct_ops.py new file mode 100644 index 0000000000..ebd3a38397 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/struct_ops.py @@ -0,0 +1,42 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import typing + +import pandas as pd +import pyarrow as pa +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.StructFieldOp, pass_op=True) +def _(expr: TypedExpr, op: ops.StructFieldOp) -> sge.Expression: + if isinstance(op.name_or_index, str): + name = op.name_or_index + else: + pa_type = typing.cast(pd.ArrowDtype, expr.dtype) + pa_struct_type = typing.cast(pa.StructType, pa_type.pyarrow_dtype) + name = pa_struct_type.field(op.name_or_index).name + + return sge.Column( + this=sge.to_identifier(name, quoted=True), + catalog=expr.expr, + ) diff --git a/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py new file mode 100644 index 0000000000..667c828b13 --- /dev/null +++ b/bigframes/core/compile/sqlglot/expressions/timedelta_ops.py @@ -0,0 +1,38 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sqlglot.expressions as sge + +from bigframes import operations as ops +from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS +from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr +import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler + +register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op + + +@register_unary_op(ops.timedelta_floor_op) +def _(expr: TypedExpr) -> sge.Expression: + return sge.Floor(this=expr.expr) + + +@register_unary_op(ops.ToTimedeltaOp, pass_op=True) +def _(expr: TypedExpr, op: ops.ToTimedeltaOp) -> sge.Expression: + value = expr.expr + factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit] + if factor != 1: + value = sge.Mul(this=value, expression=sge.convert(factor)) + return value diff --git a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py b/bigframes/core/compile/sqlglot/expressions/unary_compiler.py deleted file mode 100644 index d93b1e681c..0000000000 --- a/bigframes/core/compile/sqlglot/expressions/unary_compiler.py +++ /dev/null @@ -1,892 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import functools -import typing - -import pandas as pd -import pyarrow as pa -import sqlglot -import sqlglot.expressions as sge - -from bigframes import operations as ops -from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS -import bigframes.core.compile.sqlglot.expressions.constants as constants -from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr -import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler -import bigframes.dtypes as dtypes - -register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op - - -@register_unary_op(ops.abs_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Abs(this=expr.expr) - - -@register_unary_op(ops.arccosh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr < sge.convert(1), - true=constants._NAN, - ) - ], - default=sge.func("ACOSH", expr.expr), - ) - - -@register_unary_op(ops.arccos_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.func("ABS", expr.expr) > sge.convert(1), - true=constants._NAN, - ) - ], - default=sge.func("ACOS", expr.expr), - ) - - -@register_unary_op(ops.arcsin_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.func("ABS", expr.expr) > sge.convert(1), - true=constants._NAN, - ) - ], - default=sge.func("ASIN", expr.expr), - ) - - -@register_unary_op(ops.arcsinh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ASINH", expr.expr) - - -@register_unary_op(ops.arctan_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ATAN", expr.expr) - - -@register_unary_op(ops.arctanh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.func("ABS", expr.expr) > sge.convert(1), - true=constants._NAN, - ) - ], - default=sge.func("ATANH", expr.expr), - ) - - -@register_unary_op(ops.AsTypeOp, pass_op=True) -def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression: - # TODO: Support more types for casting, such as JSON, etc. - return sge.Cast(this=expr.expr, to=op.to_type) - - -@register_unary_op(ops.ArrayToStringOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ArrayToStringOp) -> sge.Expression: - return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'") - - -@register_unary_op(ops.ArrayIndexOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ArrayIndexOp) -> sge.Expression: - return sge.Bracket( - this=expr.expr, - expressions=[sge.Literal.number(op.index)], - safe=True, - offset=False, - ) - - -@register_unary_op(ops.ArraySliceOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ArraySliceOp) -> sge.Expression: - slice_idx = sqlglot.to_identifier("slice_idx") - - conditions: typing.List[sge.Predicate] = [slice_idx >= op.start] - - if op.stop is not None: - conditions.append(slice_idx < op.stop) - - # local name for each element in the array - el = sqlglot.to_identifier("el") - - selected_elements = ( - sge.select(el) - .from_( - sge.Unnest( - expressions=[expr.expr], - alias=sge.TableAlias(columns=[el]), - offset=slice_idx, - ) - ) - .where(*conditions) - ) - - return sge.array(selected_elements) - - -@register_unary_op(ops.capitalize_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Initcap(this=expr.expr) - - -@register_unary_op(ops.ceil_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Ceil(this=expr.expr) - - -@register_unary_op(ops.cos_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("COS", expr.expr) - - -@register_unary_op(ops.cosh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.func("ABS", expr.expr) > sge.convert(709.78), - true=constants._INF, - ) - ], - default=sge.func("COSH", expr.expr), - ) - - -@register_unary_op(ops.StrContainsOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrContainsOp) -> sge.Expression: - return sge.Like(this=expr.expr, expression=sge.convert(f"%{op.pat}%")) - - -@register_unary_op(ops.StrContainsRegexOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrContainsRegexOp) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(op.pat)) - - -@register_unary_op(ops.StrExtractOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrExtractOp) -> sge.Expression: - return sge.RegexpExtract( - this=expr.expr, expression=sge.convert(op.pat), group=sge.convert(op.n) - ) - - -@register_unary_op(ops.StrFindOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrFindOp) -> sge.Expression: - # INSTR is 1-based, so we need to adjust the start position. - start = sge.convert(op.start + 1) if op.start is not None else sge.convert(1) - if op.end is not None: - # BigQuery's INSTR doesn't support `end`, so we need to use SUBSTR. - return sge.func( - "INSTR", - sge.Substring( - this=expr.expr, - start=start, - length=sge.convert(op.end - (op.start or 0)), - ), - sge.convert(op.substr), - ) - sge.convert(1) - else: - return sge.func( - "INSTR", - expr.expr, - sge.convert(op.substr), - start, - ) - sge.convert(1) - - -@register_unary_op(ops.StrLstripOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrLstripOp) -> sge.Expression: - return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="LEFT") - - -@register_unary_op(ops.StrPadOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrPadOp) -> sge.Expression: - pad_length = sge.func( - "GREATEST", sge.Length(this=expr.expr), sge.convert(op.length) - ) - if op.side == "left": - return sge.func( - "LPAD", - expr.expr, - pad_length, - sge.convert(op.fillchar), - ) - elif op.side == "right": - return sge.func( - "RPAD", - expr.expr, - pad_length, - sge.convert(op.fillchar), - ) - else: # side == both - lpad_amount = sge.Cast( - this=sge.func( - "SAFE_DIVIDE", - sge.Sub(this=pad_length, expression=sge.Length(this=expr.expr)), - sge.convert(2), - ), - to="INT64", - ) + sge.Length(this=expr.expr) - return sge.func( - "RPAD", - sge.func( - "LPAD", - expr.expr, - lpad_amount, - sge.convert(op.fillchar), - ), - pad_length, - sge.convert(op.fillchar), - ) - - -@register_unary_op(ops.StrRepeatOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrRepeatOp) -> sge.Expression: - return sge.Repeat(this=expr.expr, times=sge.convert(op.repeats)) - - -@register_unary_op(ops.date_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Date(this=expr.expr) - - -@register_unary_op(ops.day_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="DAY"), expression=expr.expr) - - -@register_unary_op(ops.dayofweek_op) -def _(expr: TypedExpr) -> sge.Expression: - # Adjust the 1-based day-of-week index (from SQL) to a 0-based index. - return sge.Extract( - this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr - ) - sge.convert(1) - - -@register_unary_op(ops.dayofyear_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="DAYOFYEAR"), expression=expr.expr) - - -@register_unary_op(ops.EndsWithOp, pass_op=True) -def _(expr: TypedExpr, op: ops.EndsWithOp) -> sge.Expression: - if not op.pat: - return sge.false() - - def to_endswith(pat: str) -> sge.Expression: - return sge.func("ENDS_WITH", expr.expr, sge.convert(pat)) - - conditions = [to_endswith(pat) for pat in op.pat] - return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions) - - -@register_unary_op(ops.exp_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr > constants._FLOAT64_EXP_BOUND, - true=constants._INF, - ) - ], - default=sge.func("EXP", expr.expr), - ) - - -@register_unary_op(ops.expm1_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr > constants._FLOAT64_EXP_BOUND, - true=constants._INF, - ) - ], - default=sge.func("EXP", expr.expr), - ) - sge.convert(1) - - -@register_unary_op(ops.FloorDtOp, pass_op=True) -def _(expr: TypedExpr, op: ops.FloorDtOp) -> sge.Expression: - # TODO: Remove this method when it is covered by ops.FloorOp - return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=op.freq)) - - -@register_unary_op(ops.floor_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Floor(this=expr.expr) - - -@register_unary_op(ops.geo_area_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_AREA", expr.expr) - - -@register_unary_op(ops.geo_st_astext_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_ASTEXT", expr.expr) - - -@register_unary_op(ops.geo_st_boundary_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_BOUNDARY", expr.expr) - - -@register_unary_op(ops.GeoStBufferOp, pass_op=True) -def _(expr: TypedExpr, op: ops.GeoStBufferOp) -> sge.Expression: - return sge.func( - "ST_BUFFER", - expr.expr, - sge.convert(op.buffer_radius), - sge.convert(op.num_seg_quarter_circle), - sge.convert(op.use_spheroid), - ) - - -@register_unary_op(ops.geo_st_centroid_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_CENTROID", expr.expr) - - -@register_unary_op(ops.geo_st_convexhull_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_CONVEXHULL", expr.expr) - - -@register_unary_op(ops.geo_st_geogfromtext_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr) - - -@register_unary_op(ops.geo_st_isclosed_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("ST_ISCLOSED", expr.expr) - - -@register_unary_op(ops.GeoStLengthOp, pass_op=True) -def _(expr: TypedExpr, op: ops.GeoStLengthOp) -> sge.Expression: - return sge.func("ST_LENGTH", expr.expr) - - -@register_unary_op(ops.geo_x_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_X", expr.expr) - - -@register_unary_op(ops.geo_y_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SAFE.ST_Y", expr.expr) - - -@register_unary_op(ops.hash_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("FARM_FINGERPRINT", expr.expr) - - -@register_unary_op(ops.hour_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="HOUR"), expression=expr.expr) - - -@register_unary_op(ops.invert_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.BitwiseNot(this=expr.expr) - - -@register_unary_op(ops.IsInOp, pass_op=True) -def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression: - values = [] - is_numeric_expr = dtypes.is_numeric(expr.dtype) - for value in op.values: - if value is None: - continue - dtype = dtypes.bigframes_type(type(value)) - if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype): - values.append(sge.convert(value)) - - if op.match_nulls: - contains_nulls = any(_is_null(value) for value in op.values) - if contains_nulls: - return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In( - this=expr.expr, expressions=values - ) - - if len(values) == 0: - return sge.convert(False) - - return sge.func( - "COALESCE", sge.In(this=expr.expr, expressions=values), sge.convert(False) - ) - - -@register_unary_op(ops.isalnum_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^(\p{N}|\p{L})+$")) - - -@register_unary_op(ops.isalpha_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{L}+$")) - - -@register_unary_op(ops.isdecimal_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\d+$")) - - -@register_unary_op(ops.isdigit_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\p{Nd}+$")) - - -@register_unary_op(ops.islower_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.And( - this=sge.EQ( - this=sge.Lower(this=expr.expr), - expression=expr.expr, - ), - expression=sge.NEQ( - this=sge.Upper(this=expr.expr), - expression=expr.expr, - ), - ) - - -@register_unary_op(ops.iso_day_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr) - - -@register_unary_op(ops.iso_week_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr) - - -@register_unary_op(ops.iso_year_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr) - - -@register_unary_op(ops.isnull_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Is(this=expr.expr, expression=sge.Null()) - - -@register_unary_op(ops.isnumeric_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$")) - - -@register_unary_op(ops.isspace_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\s+$")) - - -@register_unary_op(ops.isupper_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.And( - this=sge.EQ( - this=sge.Upper(this=expr.expr), - expression=expr.expr, - ), - expression=sge.NEQ( - this=sge.Lower(this=expr.expr), - expression=expr.expr, - ), - ) - - -@register_unary_op(ops.len_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Length(this=expr.expr) - - -@register_unary_op(ops.ln_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr < sge.convert(0), - true=constants._NAN, - ) - ], - default=sge.Ln(this=expr.expr), - ) - - -@register_unary_op(ops.log10_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr < sge.convert(0), - true=constants._NAN, - ) - ], - default=sge.Log(this=expr.expr, expression=sge.convert(10)), - ) - - -@register_unary_op(ops.log1p_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr < sge.convert(-1), - true=constants._NAN, - ) - ], - default=sge.Ln(this=sge.convert(1) + expr.expr), - ) - - -@register_unary_op(ops.lower_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Lower(this=expr.expr) - - -@register_unary_op(ops.MapOp, pass_op=True) -def _(expr: TypedExpr, op: ops.MapOp) -> sge.Expression: - return sge.Case( - this=expr.expr, - ifs=[ - sge.If(this=sge.convert(key), true=sge.convert(value)) - for key, value in op.mappings - ], - ) - - -@register_unary_op(ops.minute_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="MINUTE"), expression=expr.expr) - - -@register_unary_op(ops.month_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="MONTH"), expression=expr.expr) - - -@register_unary_op(ops.neg_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Neg(this=expr.expr) - - -@register_unary_op(ops.normalize_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY")) - - -@register_unary_op(ops.notnull_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null())) - - -@register_unary_op(ops.obj_fetch_metadata_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("OBJ.FETCH_METADATA", expr.expr) - - -@register_unary_op(ops.ObjGetAccessUrl) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("OBJ.GET_ACCESS_URL", expr.expr) - - -@register_unary_op(ops.pos_op) -def _(expr: TypedExpr) -> sge.Expression: - return expr.expr - - -@register_unary_op(ops.quarter_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="QUARTER"), expression=expr.expr) - - -@register_unary_op(ops.ReplaceStrOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ReplaceStrOp) -> sge.Expression: - return sge.func("REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl)) - - -@register_unary_op(ops.RegexReplaceStrOp, pass_op=True) -def _(expr: TypedExpr, op: ops.RegexReplaceStrOp) -> sge.Expression: - return sge.func( - "REGEXP_REPLACE", expr.expr, sge.convert(op.pat), sge.convert(op.repl) - ) - - -@register_unary_op(ops.reverse_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("REVERSE", expr.expr) - - -@register_unary_op(ops.second_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="SECOND"), expression=expr.expr) - - -@register_unary_op(ops.StrRstripOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrRstripOp) -> sge.Expression: - return sge.Trim(this=expr.expr, expression=sge.convert(op.to_strip), side="RIGHT") - - -@register_unary_op(ops.sqrt_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=expr.expr < sge.convert(0), - true=constants._NAN, - ) - ], - default=sge.Sqrt(this=expr.expr), - ) - - -@register_unary_op(ops.StartsWithOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StartsWithOp) -> sge.Expression: - if not op.pat: - return sge.false() - - def to_startswith(pat: str) -> sge.Expression: - return sge.func("STARTS_WITH", expr.expr, sge.convert(pat)) - - conditions = [to_startswith(pat) for pat in op.pat] - return functools.reduce(lambda x, y: sge.Or(this=x, expression=y), conditions) - - -@register_unary_op(ops.StrStripOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrStripOp) -> sge.Expression: - return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr) - - -@register_unary_op(ops.sin_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("SIN", expr.expr) - - -@register_unary_op(ops.sinh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.func("ABS", expr.expr) > constants._FLOAT64_EXP_BOUND, - true=sge.func("SIGN", expr.expr) * constants._INF, - ) - ], - default=sge.func("SINH", expr.expr), - ) - - -@register_unary_op(ops.StringSplitOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StringSplitOp) -> sge.Expression: - return sge.Split(this=expr.expr, expression=sge.convert(op.pat)) - - -@register_unary_op(ops.StrGetOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrGetOp) -> sge.Expression: - return sge.Substring( - this=expr.expr, - start=sge.convert(op.i + 1), - length=sge.convert(1), - ) - - -@register_unary_op(ops.StrSliceOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrSliceOp) -> sge.Expression: - start = op.start + 1 if op.start is not None else None - if op.end is None: - length = None - elif op.start is None: - length = op.end - else: - length = op.end - op.start - return sge.Substring( - this=expr.expr, - start=sge.convert(start) if start is not None else None, - length=sge.convert(length) if length is not None else None, - ) - - -@register_unary_op(ops.StrftimeOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StrftimeOp) -> sge.Expression: - return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr) - - -@register_unary_op(ops.StructFieldOp, pass_op=True) -def _(expr: TypedExpr, op: ops.StructFieldOp) -> sge.Expression: - if isinstance(op.name_or_index, str): - name = op.name_or_index - else: - pa_type = typing.cast(pd.ArrowDtype, expr.dtype) - pa_struct_type = typing.cast(pa.StructType, pa_type.pyarrow_dtype) - name = pa_struct_type.field(op.name_or_index).name - - return sge.Column( - this=sge.to_identifier(name, quoted=True), - catalog=expr.expr, - ) - - -@register_unary_op(ops.tan_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TAN", expr.expr) - - -@register_unary_op(ops.tanh_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TANH", expr.expr) - - -@register_unary_op(ops.time_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TIME", expr.expr) - - -@register_unary_op(ops.timedelta_floor_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Floor(this=expr.expr) - - -@register_unary_op(ops.ToDatetimeOp) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME") - - -@register_unary_op(ops.ToTimestampOp) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TIMESTAMP_SECONDS", expr.expr) - - -@register_unary_op(ops.ToTimedeltaOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ToTimedeltaOp) -> sge.Expression: - value = expr.expr - factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit] - if factor != 1: - value = sge.Mul(this=value, expression=sge.convert(factor)) - return value - - -@register_unary_op(ops.UnixMicros) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("UNIX_MICROS", expr.expr) - - -@register_unary_op(ops.UnixMillis) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("UNIX_MILLIS", expr.expr) - - -@register_unary_op(ops.UnixSeconds, pass_op=True) -def _(expr: TypedExpr, op: ops.UnixSeconds) -> sge.Expression: - return sge.func("UNIX_SECONDS", expr.expr) - - -@register_unary_op(ops.JSONExtract, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONExtract) -> sge.Expression: - return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONExtractArray, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONExtractArray) -> sge.Expression: - return sge.func("JSON_EXTRACT_ARRAY", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONExtractStringArray, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONExtractStringArray) -> sge.Expression: - return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONQuery, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONQuery) -> sge.Expression: - return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONQueryArray, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONQueryArray) -> sge.Expression: - return sge.func("JSON_QUERY_ARRAY", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONValue, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONValue) -> sge.Expression: - return sge.func("JSON_VALUE", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.JSONValueArray, pass_op=True) -def _(expr: TypedExpr, op: ops.JSONValueArray) -> sge.Expression: - return sge.func("JSON_VALUE_ARRAY", expr.expr, sge.convert(op.json_path)) - - -@register_unary_op(ops.ParseJSON) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("PARSE_JSON", expr.expr) - - -@register_unary_op(ops.ToJSONString) -def _(expr: TypedExpr) -> sge.Expression: - return sge.func("TO_JSON_STRING", expr.expr) - - -@register_unary_op(ops.upper_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Upper(this=expr.expr) - - -@register_unary_op(ops.year_op) -def _(expr: TypedExpr) -> sge.Expression: - return sge.Extract(this=sge.Identifier(this="YEAR"), expression=expr.expr) - - -@register_unary_op(ops.ZfillOp, pass_op=True) -def _(expr: TypedExpr, op: ops.ZfillOp) -> sge.Expression: - return sge.Case( - ifs=[ - sge.If( - this=sge.EQ( - this=sge.Substring( - this=expr.expr, start=sge.convert(1), length=sge.convert(1) - ), - expression=sge.convert("-"), - ), - true=sge.Concat( - expressions=[ - sge.convert("-"), - sge.func( - "LPAD", - sge.Substring(this=expr.expr, start=sge.convert(1)), - sge.convert(op.width - 1), - sge.convert("0"), - ), - ] - ), - ) - ], - default=sge.func("LPAD", expr.expr, sge.convert(op.width), sge.convert("0")), - ) - - -# Helpers -def _is_null(value) -> bool: - # float NaN/inf should be treated as distinct from 'true' null values - return typing.cast(bool, pd.isna(value)) and not isinstance(value, float) From 9f3a0504c4608d2319fec91c19e1d4ea6972bbd0 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 17 Sep 2025 20:13:32 +0000 Subject: [PATCH 2/3] split test_unary_copmiler.py into multiple files --- .../test_array_index/out.sql | 0 .../test_array_slice_with_only_start/out.sql | 0 .../out.sql | 0 .../test_array_to_string/out.sql | 0 .../test_floordiv_numeric/out.sql | 154 --- .../test_obj_fetch_metadata/out.sql | 0 .../test_obj_get_access_url/out.sql | 0 .../test_is_in/out.sql | 0 .../test_date/out.sql | 0 .../test_day/out.sql | 0 .../test_dayofweek/out.sql | 0 .../test_dayofyear/out.sql | 0 .../test_floor_dt/out.sql | 0 .../test_hour/out.sql | 0 .../test_iso_day/out.sql | 0 .../test_iso_week/out.sql | 0 .../test_iso_year/out.sql | 0 .../test_minute/out.sql | 0 .../test_month/out.sql | 0 .../test_normalize/out.sql | 0 .../test_quarter/out.sql | 0 .../test_second/out.sql | 0 .../test_strftime/out.sql | 0 .../test_time/out.sql | 0 .../test_to_datetime/out.sql | 0 .../test_to_timestamp/out.sql | 0 .../test_unix_micros/out.sql | 0 .../test_unix_millis/out.sql | 0 .../test_unix_seconds/out.sql | 0 .../test_year/out.sql | 0 .../test_hash/out.sql | 0 .../test_isnull/out.sql | 0 .../test_map/out.sql | 0 .../test_notnull/out.sql | 0 .../test_geo_area/out.sql | 0 .../test_geo_st_astext/out.sql | 0 .../test_geo_st_boundary/out.sql | 0 .../test_geo_st_buffer/out.sql | 0 .../test_geo_st_centroid/out.sql | 0 .../test_geo_st_convexhull/out.sql | 0 .../test_geo_st_geogfromtext/out.sql | 0 .../test_geo_st_isclosed/out.sql | 0 .../test_geo_st_length/out.sql | 0 .../test_geo_x/out.sql | 0 .../test_geo_y/out.sql | 0 .../test_json_extract/out.sql | 0 .../test_json_extract_array/out.sql | 0 .../test_json_extract_string_array/out.sql | 0 .../test_json_query/out.sql | 0 .../test_json_query_array/out.sql | 0 .../test_json_value/out.sql | 0 .../test_parse_json/out.sql | 0 .../test_to_json_string/out.sql | 0 .../test_abs/out.sql | 0 .../test_arccos/out.sql | 0 .../test_arccosh/out.sql | 0 .../test_arcsin/out.sql | 0 .../test_arcsinh/out.sql | 0 .../test_arctan/out.sql | 0 .../test_arctanh/out.sql | 0 .../test_ceil/out.sql | 0 .../test_cos/out.sql | 0 .../test_cosh/out.sql | 0 .../test_exp/out.sql | 0 .../test_expm1/out.sql | 0 .../test_floor/out.sql | 0 .../test_invert/out.sql | 0 .../test_ln/out.sql | 0 .../test_log10/out.sql | 0 .../test_log1p/out.sql | 0 .../test_neg/out.sql | 0 .../test_pos/out.sql | 0 .../test_sin/out.sql | 0 .../test_sinh/out.sql | 0 .../test_sqrt/out.sql | 0 .../test_tan/out.sql | 0 .../test_tanh/out.sql | 0 .../test_capitalize/out.sql | 0 .../test_endswith/out.sql | 0 .../test_isalnum/out.sql | 0 .../test_isalpha/out.sql | 0 .../test_isdecimal/out.sql | 0 .../test_isdigit/out.sql | 0 .../test_islower/out.sql | 0 .../test_isnumeric/out.sql | 0 .../test_isspace/out.sql | 0 .../test_isupper/out.sql | 0 .../test_len/out.sql | 0 .../test_lower/out.sql | 0 .../test_lstrip/out.sql | 0 .../test_regex_replace_str/out.sql | 0 .../test_replace_str/out.sql | 0 .../test_reverse/out.sql | 0 .../test_rstrip/out.sql | 0 .../test_startswith/out.sql | 0 .../test_str_contains/out.sql | 0 .../test_str_contains_regex/out.sql | 0 .../test_str_extract/out.sql | 0 .../test_str_find/out.sql | 0 .../test_str_get/out.sql | 0 .../test_str_pad/out.sql | 0 .../test_str_repeat/out.sql | 0 .../test_str_slice/out.sql | 0 .../test_string_split/out.sql | 0 .../test_strip/out.sql | 0 .../test_upper/out.sql | 0 .../test_zfill/out.sql | 0 .../test_struct_field/out.sql | 0 .../test_timedelta_floor/out.sql | 0 .../test_to_timedelta/out.sql | 0 .../out.sql | 16 - .../test_compile_string_add/out.sql | 16 - .../sqlglot/expressions/test_array_ops.py | 60 ++ .../sqlglot/expressions/test_blob_ops.py | 31 + .../expressions/test_comparison_ops.py | 44 + .../sqlglot/expressions/test_datetime_ops.py | 199 ++++ .../sqlglot/expressions/test_generic_ops.py | 57 + .../sqlglot/expressions/test_geo_ops.py | 123 +++ .../sqlglot/expressions/test_json_ops.py | 97 ++ .../sqlglot/expressions/test_numeric_ops.py | 213 ++++ .../sqlglot/expressions/test_string_ops.py | 287 +++++ .../sqlglot/expressions/test_struct_ops.py | 36 + .../sqlglot/expressions/test_timedelta_ops.py | 40 + .../expressions/test_unary_compiler.py | 998 ------------------ .../core/compile/sqlglot/expressions/utils.py | 35 + 125 files changed, 1222 insertions(+), 1184 deletions(-) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_array_ops}/test_array_index/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_array_ops}/test_array_slice_with_only_start/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_array_ops}/test_array_slice_with_start_and_stop/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_array_ops}/test_array_to_string/out.sql (100%) delete mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_floordiv_numeric/out.sql rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_blob_ops}/test_obj_fetch_metadata/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_blob_ops}/test_obj_get_access_url/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_comparison_ops}/test_is_in/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_date/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_day/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_dayofweek/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_dayofyear/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_floor_dt/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_hour/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_iso_day/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_iso_week/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_iso_year/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_minute/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_month/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_normalize/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_quarter/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_second/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_strftime/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_time/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_to_datetime/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_to_timestamp/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_unix_micros/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_unix_millis/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_unix_seconds/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_datetime_ops}/test_year/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_generic_ops}/test_hash/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_generic_ops}/test_isnull/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_generic_ops}/test_map/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_generic_ops}/test_notnull/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_area/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_astext/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_boundary/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_buffer/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_centroid/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_convexhull/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_geogfromtext/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_isclosed/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_st_length/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_x/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_geo_ops}/test_geo_y/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_extract/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_extract_array/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_extract_string_array/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_query/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_query_array/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_json_value/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_parse_json/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_json_ops}/test_to_json_string/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_abs/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arccos/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arccosh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arcsin/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arcsinh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arctan/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_arctanh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_ceil/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_cos/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_cosh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_exp/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_expm1/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_floor/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_invert/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_ln/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_log10/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_log1p/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_neg/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_pos/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_sin/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_sinh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_sqrt/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_tan/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_numeric_ops}/test_tanh/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_capitalize/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_endswith/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isalnum/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isalpha/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isdecimal/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isdigit/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_islower/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isnumeric/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isspace/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_isupper/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_len/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_lower/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_lstrip/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_regex_replace_str/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_replace_str/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_reverse/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_rstrip/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_startswith/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_contains/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_contains_regex/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_extract/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_find/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_get/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_pad/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_repeat/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_str_slice/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_string_split/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_strip/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_upper/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_string_ops}/test_zfill/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_struct_ops}/test_struct_field/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_timedelta_ops}/test_timedelta_floor/out.sql (100%) rename tests/unit/core/compile/sqlglot/expressions/snapshots/{test_unary_compiler => test_timedelta_ops}/test_to_timedelta/out.sql (100%) delete mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_numerical_add_w_scalar/out.sql delete mode 100644 tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_string_add/out.sql create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_array_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_json_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_string_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py delete mode 100644 tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py create mode 100644 tests/unit/core/compile/sqlglot/expressions/utils.py diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_index/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_index/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_index/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_slice_with_only_start/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_only_start/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_slice_with_only_start/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_only_start/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_slice_with_start_and_stop/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_start_and_stop/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_slice_with_start_and_stop/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_slice_with_start_and_stop/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_to_string/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_to_string/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_array_to_string/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_array_ops/test_array_to_string/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_floordiv_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_floordiv_numeric/out.sql deleted file mode 100644 index c38bc18523..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_floordiv_numeric/out.sql +++ /dev/null @@ -1,154 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `bool_col` AS `bfcol_0`, - `int64_col` AS `bfcol_1`, - `float64_col` AS `bfcol_2`, - `rowindex` AS `bfcol_3` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - `bfcol_3` AS `bfcol_8`, - `bfcol_1` AS `bfcol_9`, - `bfcol_0` AS `bfcol_10`, - `bfcol_2` AS `bfcol_11`, - CASE - WHEN `bfcol_1` = CAST(0 AS INT64) - THEN CAST(0 AS INT64) * `bfcol_1` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_1`, `bfcol_1`)) AS INT64) - END AS `bfcol_12` - FROM `bfcte_0` -), `bfcte_2` AS ( - SELECT - *, - `bfcol_8` AS `bfcol_18`, - `bfcol_9` AS `bfcol_19`, - `bfcol_10` AS `bfcol_20`, - `bfcol_11` AS `bfcol_21`, - `bfcol_12` AS `bfcol_22`, - CASE - WHEN 1 = CAST(0 AS INT64) - THEN CAST(0 AS INT64) * `bfcol_9` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_9`, 1)) AS INT64) - END AS `bfcol_23` - FROM `bfcte_1` -), `bfcte_3` AS ( - SELECT - *, - `bfcol_18` AS `bfcol_30`, - `bfcol_19` AS `bfcol_31`, - `bfcol_20` AS `bfcol_32`, - `bfcol_21` AS `bfcol_33`, - `bfcol_22` AS `bfcol_34`, - `bfcol_23` AS `bfcol_35`, - CASE - WHEN 0.0 = CAST(0 AS INT64) - THEN CAST('Infinity' AS FLOAT64) * `bfcol_19` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_19`, 0.0)) AS INT64) - END AS `bfcol_36` - FROM `bfcte_2` -), `bfcte_4` AS ( - SELECT - *, - `bfcol_30` AS `bfcol_44`, - `bfcol_31` AS `bfcol_45`, - `bfcol_32` AS `bfcol_46`, - `bfcol_33` AS `bfcol_47`, - `bfcol_34` AS `bfcol_48`, - `bfcol_35` AS `bfcol_49`, - `bfcol_36` AS `bfcol_50`, - CASE - WHEN `bfcol_33` = CAST(0 AS INT64) - THEN CAST('Infinity' AS FLOAT64) * `bfcol_31` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_31`, `bfcol_33`)) AS INT64) - END AS `bfcol_51` - FROM `bfcte_3` -), `bfcte_5` AS ( - SELECT - *, - `bfcol_44` AS `bfcol_60`, - `bfcol_45` AS `bfcol_61`, - `bfcol_46` AS `bfcol_62`, - `bfcol_47` AS `bfcol_63`, - `bfcol_48` AS `bfcol_64`, - `bfcol_49` AS `bfcol_65`, - `bfcol_50` AS `bfcol_66`, - `bfcol_51` AS `bfcol_67`, - CASE - WHEN `bfcol_45` = CAST(0 AS INT64) - THEN CAST('Infinity' AS FLOAT64) * `bfcol_47` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_47`, `bfcol_45`)) AS INT64) - END AS `bfcol_68` - FROM `bfcte_4` -), `bfcte_6` AS ( - SELECT - *, - `bfcol_60` AS `bfcol_78`, - `bfcol_61` AS `bfcol_79`, - `bfcol_62` AS `bfcol_80`, - `bfcol_63` AS `bfcol_81`, - `bfcol_64` AS `bfcol_82`, - `bfcol_65` AS `bfcol_83`, - `bfcol_66` AS `bfcol_84`, - `bfcol_67` AS `bfcol_85`, - `bfcol_68` AS `bfcol_86`, - CASE - WHEN 0.0 = CAST(0 AS INT64) - THEN CAST('Infinity' AS FLOAT64) * `bfcol_63` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_63`, 0.0)) AS INT64) - END AS `bfcol_87` - FROM `bfcte_5` -), `bfcte_7` AS ( - SELECT - *, - `bfcol_78` AS `bfcol_98`, - `bfcol_79` AS `bfcol_99`, - `bfcol_80` AS `bfcol_100`, - `bfcol_81` AS `bfcol_101`, - `bfcol_82` AS `bfcol_102`, - `bfcol_83` AS `bfcol_103`, - `bfcol_84` AS `bfcol_104`, - `bfcol_85` AS `bfcol_105`, - `bfcol_86` AS `bfcol_106`, - `bfcol_87` AS `bfcol_107`, - CASE - WHEN CAST(`bfcol_80` AS INT64) = CAST(0 AS INT64) - THEN CAST(0 AS INT64) * `bfcol_79` - ELSE CAST(FLOOR(IEEE_DIVIDE(`bfcol_79`, CAST(`bfcol_80` AS INT64))) AS INT64) - END AS `bfcol_108` - FROM `bfcte_6` -), `bfcte_8` AS ( - SELECT - *, - `bfcol_98` AS `bfcol_120`, - `bfcol_99` AS `bfcol_121`, - `bfcol_100` AS `bfcol_122`, - `bfcol_101` AS `bfcol_123`, - `bfcol_102` AS `bfcol_124`, - `bfcol_103` AS `bfcol_125`, - `bfcol_104` AS `bfcol_126`, - `bfcol_105` AS `bfcol_127`, - `bfcol_106` AS `bfcol_128`, - `bfcol_107` AS `bfcol_129`, - `bfcol_108` AS `bfcol_130`, - CASE - WHEN `bfcol_99` = CAST(0 AS INT64) - THEN CAST(0 AS INT64) * CAST(`bfcol_100` AS INT64) - ELSE CAST(FLOOR(IEEE_DIVIDE(CAST(`bfcol_100` AS INT64), `bfcol_99`)) AS INT64) - END AS `bfcol_131` - FROM `bfcte_7` -) -SELECT - `bfcol_120` AS `rowindex`, - `bfcol_121` AS `int64_col`, - `bfcol_122` AS `bool_col`, - `bfcol_123` AS `float64_col`, - `bfcol_124` AS `int_div_int`, - `bfcol_125` AS `int_div_1`, - `bfcol_126` AS `int_div_0`, - `bfcol_127` AS `int_div_float`, - `bfcol_128` AS `float_div_int`, - `bfcol_129` AS `float_div_0`, - `bfcol_130` AS `int_div_bool`, - `bfcol_131` AS `bool_div_int` -FROM `bfcte_8` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_fetch_metadata/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_fetch_metadata/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_obj_get_access_url/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_blob_ops/test_obj_get_access_url/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_is_in/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_is_in/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_comparison_ops/test_is_in/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_date/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_date/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_date/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_date/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_day/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_day/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_day/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_day/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_dayofweek/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_dayofweek/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_dayofweek/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_dayofweek/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_dayofyear/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_dayofyear/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_dayofyear/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_dayofyear/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_floor_dt/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_floor_dt/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_floor_dt/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_floor_dt/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hour/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_hour/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hour/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_hour/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_day/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_day/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_day/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_day/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_week/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_week/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_week/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_week/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_year/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_iso_year/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_iso_year/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_minute/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_minute/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_minute/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_minute/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_month/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_month/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_month/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_month/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_normalize/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_normalize/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_normalize/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_normalize/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_quarter/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_quarter/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_quarter/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_quarter/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_second/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_second/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_second/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_second/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strftime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strftime/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_strftime/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_time/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_time/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_time/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_time/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_datetime/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_datetime/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_datetime/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_timestamp/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_timestamp/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_micros/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_micros/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_micros/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_micros/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_millis/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_millis/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_millis/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_millis/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_seconds/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_seconds/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_unix_seconds/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_unix_seconds/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_year/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_year/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_year/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_year/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_hash/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_hash/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_hash/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_isnull/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnull/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_isnull/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_map/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_map/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_map/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_notnull/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_notnull/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_notnull/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_area/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_area/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_area/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_area/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_astext/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_astext/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_astext/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_astext/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_boundary/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_boundary/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_boundary/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_boundary/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_buffer/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_buffer/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_buffer/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_buffer/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_centroid/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_centroid/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_centroid/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_centroid/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_convexhull/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_convexhull/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_convexhull/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_convexhull/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_geogfromtext/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_geogfromtext/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_geogfromtext/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_geogfromtext/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_isclosed/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_isclosed/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_isclosed/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_isclosed/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_length/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_length/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_st_length/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_st_length/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_x/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_x/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_y/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_geo_y/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract_array/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract_array/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract_array/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract_array/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract_string_array/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract_string_array/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_extract_string_array/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_extract_string_array/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_query/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_query/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_query/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_query/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_query_array/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_query_array/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_query_array/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_query_array/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_value/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_value/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_json_value/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_json_value/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_parse_json/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_parse_json/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_parse_json/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_parse_json/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_json_string/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_to_json_string/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_json_string/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_json_ops/test_to_json_string/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_abs/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_abs/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_abs/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_abs/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arccos/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccos/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arccos/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccosh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arccosh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arccosh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arccosh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arcsin/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsin/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arcsin/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsinh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arcsinh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arcsinh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arcsinh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctan/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctan/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctan/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctanh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_arctanh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_arctanh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ceil/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ceil/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ceil/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ceil/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_cos/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cos/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_cos/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cosh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_cosh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_cosh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_cosh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_exp/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_exp/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_exp/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_exp/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_expm1/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_expm1/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_expm1/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_floor/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_floor/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_floor/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_floor/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_invert/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_invert/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_invert/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_invert/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_ln/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_ln/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log10/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log10/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_log1p/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_log1p/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_neg/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_neg/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_neg/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_pos/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_pos/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_pos/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sin/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sin/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sin/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sinh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sinh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sinh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sqrt/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_sqrt/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_sqrt/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_tan/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tan/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_tan/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tanh/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_tanh/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_tanh/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_numeric_ops/test_tanh/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_capitalize/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_capitalize/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_capitalize/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_capitalize/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_endswith/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_endswith/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_endswith/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isalnum/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isalnum/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isalnum/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isalnum/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isalpha/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isalpha/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isalpha/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isalpha/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isdecimal/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isdecimal/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isdecimal/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isdecimal/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isdigit/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isdigit/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isdigit/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isdigit/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_islower/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_islower/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_islower/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_islower/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnumeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isnumeric/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isnumeric/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isnumeric/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isspace/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isspace/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isspace/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isspace/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isupper/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isupper/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_isupper/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_isupper/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_len/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_len/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_len/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_lower/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lower/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_lower/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_lstrip/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_lstrip/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_lstrip/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_regex_replace_str/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_regex_replace_str/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_regex_replace_str/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_regex_replace_str/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_replace_str/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_replace_str/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_replace_str/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_replace_str/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_reverse/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_reverse/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_reverse/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_rstrip/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_rstrip/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_rstrip/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_startswith/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_startswith/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_startswith/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_contains/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_contains/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_contains_regex/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_contains_regex/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_contains_regex/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_extract/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_extract/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_extract/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_extract/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_find/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_find/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_find/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_get/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_get/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_get/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_get/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_pad/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_pad/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_pad/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_repeat/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_repeat/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_repeat/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_repeat/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_slice/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_str_slice/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_str_slice/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_string_split/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_string_split/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_string_split/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_string_split/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_strip/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_strip/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_strip/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_upper/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_upper/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_upper/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_zfill/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_zfill/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_zfill/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_string_ops/test_zfill/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_field/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_struct_field/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_struct_ops/test_struct_field/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_timedelta_floor/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_timedelta_floor/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_timedelta_floor/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_timedelta_floor/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_timedelta/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql similarity index 100% rename from tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_to_timedelta/out.sql rename to tests/unit/core/compile/sqlglot/expressions/snapshots/test_timedelta_ops/test_to_timedelta/out.sql diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_numerical_add_w_scalar/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_numerical_add_w_scalar/out.sql deleted file mode 100644 index 9c4b01a6df..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_numerical_add_w_scalar/out.sql +++ /dev/null @@ -1,16 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `int64_col` AS `bfcol_0`, - `rowindex` AS `bfcol_1` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - `bfcol_1` AS `bfcol_4`, - `bfcol_0` + 1 AS `bfcol_5` - FROM `bfcte_0` -) -SELECT - `bfcol_4` AS `rowindex`, - `bfcol_5` AS `int64_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_string_add/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_string_add/out.sql deleted file mode 100644 index 7a8ab83df1..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_unary_compiler/test_compile_string_add/out.sql +++ /dev/null @@ -1,16 +0,0 @@ -WITH `bfcte_0` AS ( - SELECT - `rowindex` AS `bfcol_0`, - `string_col` AS `bfcol_1` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( - SELECT - *, - `bfcol_0` AS `bfcol_4`, - CONCAT(`bfcol_1`, 'a') AS `bfcol_5` - FROM `bfcte_0` -) -SELECT - `bfcol_4` AS `rowindex`, - `bfcol_5` AS `string_col` -FROM `bfcte_1` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py new file mode 100644 index 0000000000..ee089e7e26 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py @@ -0,0 +1,60 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +from bigframes.operations._op_converters import convert_index, convert_slice +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [convert_index(1).as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): + col_name = "string_list_col" + bf_df = repeated_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py new file mode 100644 index 0000000000..7876a754ee --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_blob_ops.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import bigframes.pandas as bpd + +pytest.importorskip("pytest_snapshot") + + +def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): + blob_s = scalar_types_df["string_col"].str.to_blob() + sql = blob_s.blob.version().to_frame().sql + snapshot.assert_match(sql, "out.sql") + + +def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): + blob_s = scalar_types_df["string_col"].str.to_blob() + sql = blob_s.blob.read_url().to_frame().sql + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py new file mode 100644 index 0000000000..e6aafcc960 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -0,0 +1,44 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): + int_col = "int64_col" + float_col = "float64_col" + bf_df = scalar_types_df[[int_col, float_col]] + ops_map = { + "ints": ops.IsInOp(values=(1, 2, 3)).as_expr(int_col), + "ints_w_null": ops.IsInOp(values=(None, 123456)).as_expr(int_col), + "floats": ops.IsInOp(values=(1.0, 2.0, 3.0), match_nulls=False).as_expr( + int_col + ), + "strings": ops.IsInOp(values=("1.0", "2.0")).as_expr(int_col), + "mixed": ops.IsInOp(values=("1.0", 2.5, 3)).as_expr(int_col), + "empty": ops.IsInOp(values=()).as_expr(int_col), + "ints_wo_match_nulls": ops.IsInOp( + values=(None, 123456), match_nulls=False + ).as_expr(int_col), + "float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col), + } + + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py new file mode 100644 index 0000000000..88e6ec6c2f --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -0,0 +1,199 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_date(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_day(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_hour(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_minute(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_month(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_second(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_time(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_year(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "timestamp_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py new file mode 100644 index 0000000000..aec425394e --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -0,0 +1,57 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_hash(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_map(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, + [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], + [col_name], + ) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py new file mode 100644 index 0000000000..676328545a --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -0,0 +1,123 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_boundary_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.GeoStBufferOp(1.0, 8.0, False).as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_convexhull_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_geogfromtext_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.geo_st_isclosed_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.GeoStLengthOp(True).as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "geography_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py new file mode 100644 index 0000000000..5064f047c7 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -0,0 +1,97 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_json_extract(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtract(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtractArray(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONExtractStringArray(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_json_query(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONQuery(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONQueryArray(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_json_value(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.JSONValue(json_path="$").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): + col_name = "json_col" + bf_df = json_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py new file mode 100644 index 0000000000..fae7e6dc0c --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -0,0 +1,213 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_abs(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_cos(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_exp(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_floor(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_invert(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.invert_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_ln(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_log10(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_neg(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_pos(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_sin(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_tan(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "float64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py new file mode 100644 index 0000000000..22dc9e8398 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -0,0 +1,287 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_capitalize(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_endswith(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "single": ops.EndsWithOp(pat=("ab",)).as_expr(col_name), + "double": ops.EndsWithOp(pat=("ab", "cd")).as_expr(col_name), + "empty": ops.EndsWithOp(pat=()).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_islower(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_len(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_lower(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.ReplaceStrOp("e", "a").as_expr(col_name)], [col_name] + ) + snapshot.assert_match(sql, "out.sql") + + +def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.RegexReplaceStrOp(r"e", "a").as_expr(col_name)], [col_name] + ) + snapshot.assert_match(sql, "out.sql") + + +def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): + + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "single": ops.StartsWithOp(pat=("ab",)).as_expr(col_name), + "double": ops.StartsWithOp(pat=("ab", "cd")).as_expr(col_name), + "empty": ops.StartsWithOp(pat=()).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "left": ops.StrPadOp(length=10, fillchar="-", side="left").as_expr(col_name), + "right": ops.StrPadOp(length=10, fillchar="-", side="right").as_expr(col_name), + "both": ops.StrPadOp(length=10, fillchar="-", side="both").as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + snapshot.assert_match(sql, "out.sql") + + +def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_strip(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrContainsOp("e").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrContainsRegexOp("e").as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StrExtractOp(r"([a-z]*)", 1).as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") + + +def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name]) + snapshot.assert_match(sql, "out.sql") + + +def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + ops_map = { + "none_none": ops.StrFindOp("e", start=None, end=None).as_expr(col_name), + "start_none": ops.StrFindOp("e", start=2, end=None).as_expr(col_name), + "none_end": ops.StrFindOp("e", start=None, end=5).as_expr(col_name), + "start_end": ops.StrFindOp("e", start=2, end=5).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") + + +def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.StringSplitOp(pat=",").as_expr(col_name)], [col_name] + ) + snapshot.assert_match(sql, "out.sql") + + +def test_upper(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) + + snapshot.assert_match(sql, "out.sql") + + +def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "string_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops(bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name]) + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py new file mode 100644 index 0000000000..76bee6756d --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): + col_name = "people" + bf_df = nested_structs_types_df[[col_name]] + + ops_map = { + # When a name string is provided. + "string": ops.StructFieldOp("name").as_expr(col_name), + # When an index integer is provided. + "int": ops.StructFieldOp(0).as_expr(col_name), + } + sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py new file mode 100644 index 0000000000..cac6e801a9 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes import operations as ops +import bigframes.pandas as bpd +from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops + +pytest.importorskip("pytest_snapshot") + + +def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): + bf_df = scalar_types_df[["int64_col"]] + bf_df["duration_us"] = bpd.to_timedelta(bf_df["int64_col"], "us") + bf_df["duration_s"] = bpd.to_timedelta(bf_df["int64_col"], "s") + bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "W") + + snapshot.assert_match(bf_df.sql, "out.sql") + + +def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): + col_name = "int64_col" + bf_df = scalar_types_df[[col_name]] + sql = _apply_unary_ops( + bf_df, [ops.timedelta_floor_op.as_expr(col_name)], [col_name] + ) + + snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py b/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py deleted file mode 100644 index fced18f5be..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py +++ /dev/null @@ -1,998 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing - -import pytest - -from bigframes import operations as ops -from bigframes.core import expression as expr -from bigframes.operations._op_converters import convert_index, convert_slice -import bigframes.pandas as bpd - -pytest.importorskip("pytest_snapshot") - - -def _apply_unary_ops( - obj: bpd.DataFrame, - ops_list: typing.Sequence[expr.Expression], - new_names: typing.Sequence[str], -) -> str: - array_value = obj._block.expr - result, old_names = array_value.compute_values(ops_list) - - # Rename columns for deterministic golden SQL results. - assert len(old_names) == len(new_names) - col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)} - result = result.rename_columns(col_ids).select_columns(new_names) - - sql = result.session._executor.to_sql(result, enable_cache=False) - return sql - - -def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_abs(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_capitalize(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_date(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_day(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_endswith(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - ops_map = { - "single": ops.EndsWithOp(pat=("ab",)).as_expr(col_name), - "double": ops.EndsWithOp(pat=("ab", "cd")).as_expr(col_name), - "empty": ops.EndsWithOp(pat=()).as_expr(col_name), - } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - snapshot.assert_match(sql, "out.sql") - - -def test_exp(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_floor(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.geo_st_boundary_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.GeoStBufferOp(1.0, 8.0, False).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.geo_st_convexhull_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.geo_st_geogfromtext_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.geo_st_isclosed_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.GeoStLengthOp(True).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "geography_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [convert_index(1).as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): - col_name = "string_list_col" - bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_cos(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_hash(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_hour(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_invert(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.invert_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): - int_col = "int64_col" - float_col = "float64_col" - bf_df = scalar_types_df[[int_col, float_col]] - ops_map = { - "ints": ops.IsInOp(values=(1, 2, 3)).as_expr(int_col), - "ints_w_null": ops.IsInOp(values=(None, 123456)).as_expr(int_col), - "floats": ops.IsInOp(values=(1.0, 2.0, 3.0), match_nulls=False).as_expr( - int_col - ), - "strings": ops.IsInOp(values=("1.0", "2.0")).as_expr(int_col), - "mixed": ops.IsInOp(values=("1.0", 2.5, 3)).as_expr(int_col), - "empty": ops.IsInOp(values=()).as_expr(int_col), - "ints_wo_match_nulls": ops.IsInOp( - values=(None, 123456), match_nulls=False - ).as_expr(int_col), - "float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col), - } - - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - snapshot.assert_match(sql, "out.sql") - - -def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_islower(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_len(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_ln(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_log10(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_lower(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_map(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, - [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], - [col_name], - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_minute(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_month(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_neg(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.version().to_frame().sql - snapshot.assert_match(sql, "out.sql") - - -def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot): - blob_s = scalar_types_df["string_col"].str.to_blob() - sql = blob_s.blob.read_url().to_frame().sql - snapshot.assert_match(sql, "out.sql") - - -def test_pos(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.ReplaceStrOp("e", "a").as_expr(col_name)], [col_name] - ) - snapshot.assert_match(sql, "out.sql") - - -def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.RegexReplaceStrOp(r"e", "a").as_expr(col_name)], [col_name] - ) - snapshot.assert_match(sql, "out.sql") - - -def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_second(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): - - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - ops_map = { - "single": ops.StartsWithOp(pat=("ab",)).as_expr(col_name), - "double": ops.StartsWithOp(pat=("ab", "cd")).as_expr(col_name), - "empty": ops.StartsWithOp(pat=()).as_expr(col_name), - } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - snapshot.assert_match(sql, "out.sql") - - -def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - ops_map = { - "left": ops.StrPadOp(length=10, fillchar="-", side="left").as_expr(col_name), - "right": ops.StrPadOp(length=10, fillchar="-", side="right").as_expr(col_name), - "both": ops.StrPadOp(length=10, fillchar="-", side="both").as_expr(col_name), - } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - snapshot.assert_match(sql, "out.sql") - - -def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): - col_name = "people" - bf_df = nested_structs_types_df[[col_name]] - - ops_map = { - # When a name string is provided. - "string": ops.StructFieldOp("name").as_expr(col_name), - # When an index integer is provided. - "int": ops.StructFieldOp(0).as_expr(col_name), - } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - - snapshot.assert_match(sql, "out.sql") - - -def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.StrContainsOp("e").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.StrContainsRegexOp("e").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.StrExtractOp(r"([a-z]*)", 1).as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name]) - snapshot.assert_match(sql, "out.sql") - - -def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - ops_map = { - "none_none": ops.StrFindOp("e", start=None, end=None).as_expr(col_name), - "start_none": ops.StrFindOp("e", start=2, end=None).as_expr(col_name), - "none_end": ops.StrFindOp("e", start=None, end=5).as_expr(col_name), - "start_end": ops.StrFindOp("e", start=2, end=5).as_expr(col_name), - } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) - - snapshot.assert_match(sql, "out.sql") - - -def test_strip(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_sin(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.StringSplitOp(pat=",").as_expr(col_name)], [col_name] - ) - snapshot.assert_match(sql, "out.sql") - - -def test_tan(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "float64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_time(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): - bf_df = scalar_types_df[["int64_col"]] - bf_df["duration_us"] = bpd.to_timedelta(bf_df["int64_col"], "us") - bf_df["duration_s"] = bpd.to_timedelta(bf_df["int64_col"], "s") - bf_df["duration_w"] = bpd.to_timedelta(bf_df["int64_col"], "W") - - snapshot.assert_match(bf_df.sql, "out.sql") - - -def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "int64_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.timedelta_floor_op.as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_extract(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONExtract(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONExtractArray(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONExtractStringArray(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_query(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONQuery(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONQueryArray(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_json_value(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( - bf_df, [ops.JSONValue(json_path="$").as_expr(col_name)], [col_name] - ) - - snapshot.assert_match(sql, "out.sql") - - -def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): - col_name = "json_col" - bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_upper(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_year(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "timestamp_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) - - snapshot.assert_match(sql, "out.sql") - - -def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): - col_name = "string_col" - bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name]) - snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/utils.py b/tests/unit/core/compile/sqlglot/expressions/utils.py new file mode 100644 index 0000000000..b212160ed3 --- /dev/null +++ b/tests/unit/core/compile/sqlglot/expressions/utils.py @@ -0,0 +1,35 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from bigframes.core import expression as expr +import bigframes.pandas as bpd + + +def _apply_unary_ops( + obj: bpd.DataFrame, + ops_list: typing.Sequence[expr.Expression], + new_names: typing.Sequence[str], +) -> str: + array_value = obj._block.expr + result, old_names = array_value.compute_values(ops_list) + + # Rename columns for deterministic golden SQL results. + assert len(old_names) == len(new_names) + col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)} + result = result.rename_columns(col_ids).select_columns(new_names) + + sql = result.session._executor.to_sql(result, enable_cache=False) + return sql From 6a3a1c328315214a0f20549dcb57dbdab49d2532 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 17 Sep 2025 21:29:30 +0000 Subject: [PATCH 3/3] put util to bigframes.testing to avoid the importing error because of sqlglot module names matching with folder name --- bigframes/testing/utils.py | 22 ++++- .../sqlglot/expressions/test_array_ops.py | 12 +-- .../expressions/test_comparison_ops.py | 4 +- .../sqlglot/expressions/test_datetime_ops.py | 64 +++++++++------ .../sqlglot/expressions/test_generic_ops.py | 10 +-- .../sqlglot/expressions/test_geo_ops.py | 26 +++--- .../sqlglot/expressions/test_json_ops.py | 20 ++--- .../sqlglot/expressions/test_numeric_ops.py | 50 ++++++------ .../sqlglot/expressions/test_string_ops.py | 80 ++++++++++++------- .../sqlglot/expressions/test_struct_ops.py | 4 +- .../sqlglot/expressions/test_timedelta_ops.py | 4 +- .../core/compile/sqlglot/expressions/utils.py | 35 -------- 12 files changed, 179 insertions(+), 152 deletions(-) delete mode 100644 tests/unit/core/compile/sqlglot/expressions/utils.py diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index 5da24c5b9b..d38e323d57 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -14,7 +14,7 @@ import base64 import decimal -from typing import Iterable, Optional, Set, Union +from typing import Iterable, Optional, Sequence, Set, Union import geopandas as gpd # type: ignore import google.api_core.operation @@ -25,6 +25,7 @@ import pyarrow as pa # type: ignore import pytest +from bigframes.core import expression as expr import bigframes.functions._utils as bff_utils import bigframes.pandas @@ -448,3 +449,22 @@ def get_function_name(func, package_requirements=None, is_row_processor=False): function_hash = bff_utils.get_hash(func, package_requirements) return f"bigframes_{function_hash}" + + +def _apply_unary_ops( + obj: bigframes.pandas.DataFrame, + ops_list: Sequence[expr.Expression], + new_names: Sequence[str], +) -> str: + """Applies a list of unary ops to the given DataFrame and returns the SQL + representing the resulting DataFrames.""" + array_value = obj._block.expr + result, old_names = array_value.compute_values(ops_list) + + # Rename columns for deterministic golden SQL results. + assert len(old_names) == len(new_names) + col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)} + result = result.rename_columns(col_ids).select_columns(new_names) + + sql = result.session._executor.to_sql(result, enable_cache=False) + return sql diff --git a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py index ee089e7e26..407c7bbb3c 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py @@ -17,7 +17,7 @@ from bigframes import operations as ops from bigframes.operations._op_converters import convert_index, convert_slice import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -25,7 +25,7 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name] ) @@ -35,7 +35,9 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [convert_index(1).as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [convert_index(1).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -43,7 +45,7 @@ def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] ) @@ -53,7 +55,7 @@ def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot) def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index e6aafcc960..9a901687fa 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -40,5 +40,5 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): "float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index 88e6ec6c2f..0a8aa320bb 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,7 @@ def test_date(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -32,7 +32,7 @@ def test_date(scalar_types_df: bpd.DataFrame, snapshot): def test_day(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -40,7 +40,9 @@ def test_day(scalar_types_df: bpd.DataFrame, snapshot): def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -48,7 +50,9 @@ def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -56,7 +60,9 @@ def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -64,7 +70,7 @@ def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): def test_hour(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -72,7 +78,7 @@ def test_hour(scalar_types_df: bpd.DataFrame, snapshot): def test_minute(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -80,7 +86,7 @@ def test_minute(scalar_types_df: bpd.DataFrame, snapshot): def test_month(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -88,7 +94,9 @@ def test_month(scalar_types_df: bpd.DataFrame, snapshot): def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.normalize_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -96,7 +104,7 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -104,7 +112,7 @@ def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): def test_second(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -112,7 +120,7 @@ def test_second(scalar_types_df: bpd.DataFrame, snapshot): def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] ) @@ -122,7 +130,7 @@ def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): def test_time(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -130,7 +138,9 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot): def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -138,7 +148,9 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -146,7 +158,9 @@ def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -154,7 +168,9 @@ def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -162,7 +178,9 @@ def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -170,7 +188,7 @@ def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): def test_year(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -178,7 +196,7 @@ def test_year(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -186,7 +204,7 @@ def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -194,6 +212,6 @@ def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index aec425394e..130d34a2fa 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,7 @@ def test_hash(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -32,7 +32,7 @@ def test_hash(scalar_types_df: bpd.DataFrame, snapshot): def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -40,7 +40,7 @@ def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -48,7 +48,7 @@ def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): def test_map(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], [col_name], diff --git a/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py index 676328545a..e136d172f6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,7 @@ def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -32,7 +32,9 @@ def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -40,7 +42,7 @@ def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.geo_st_boundary_op.as_expr(col_name)], [col_name] ) @@ -50,7 +52,7 @@ def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.GeoStBufferOp(1.0, 8.0, False).as_expr(col_name)], [col_name] ) @@ -60,7 +62,7 @@ def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] ) @@ -70,7 +72,7 @@ def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.geo_st_convexhull_op.as_expr(col_name)], [col_name] ) @@ -80,7 +82,7 @@ def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.geo_st_geogfromtext_op.as_expr(col_name)], [col_name] ) @@ -90,7 +92,7 @@ def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.geo_st_isclosed_op.as_expr(col_name)], [col_name] ) @@ -100,7 +102,7 @@ def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.GeoStLengthOp(True).as_expr(col_name)], [col_name] ) @@ -110,7 +112,7 @@ def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -118,6 +120,6 @@ def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py index 5064f047c7..ecbac10ef2 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,7 @@ def test_json_extract(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONExtract(json_path="$").as_expr(col_name)], [col_name] ) @@ -34,7 +34,7 @@ def test_json_extract(json_types_df: bpd.DataFrame, snapshot): def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONExtractArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -44,7 +44,7 @@ def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONExtractStringArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -54,7 +54,7 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): def test_json_query(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONQuery(json_path="$").as_expr(col_name)], [col_name] ) @@ -64,7 +64,7 @@ def test_json_query(json_types_df: bpd.DataFrame, snapshot): def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONQueryArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -74,7 +74,7 @@ def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): def test_json_value(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.JSONValue(json_path="$").as_expr(col_name)], [col_name] ) @@ -84,7 +84,7 @@ def test_json_value(json_types_df: bpd.DataFrame, snapshot): def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -92,6 +92,8 @@ def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index fae7e6dc0c..10fd4b2427 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,7 @@ def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -32,7 +32,7 @@ def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -40,7 +40,7 @@ def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -48,7 +48,7 @@ def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -56,7 +56,7 @@ def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -64,7 +64,7 @@ def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -72,7 +72,7 @@ def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): def test_abs(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -80,7 +80,7 @@ def test_abs(scalar_types_df: bpd.DataFrame, snapshot): def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -88,7 +88,7 @@ def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): def test_cos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -96,7 +96,7 @@ def test_cos(scalar_types_df: bpd.DataFrame, snapshot): def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -104,7 +104,7 @@ def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): def test_exp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -112,7 +112,7 @@ def test_exp(scalar_types_df: bpd.DataFrame, snapshot): def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -120,7 +120,7 @@ def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): def test_floor(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -128,7 +128,7 @@ def test_floor(scalar_types_df: bpd.DataFrame, snapshot): def test_invert(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.invert_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.invert_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -136,7 +136,7 @@ def test_invert(scalar_types_df: bpd.DataFrame, snapshot): def test_ln(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -144,7 +144,7 @@ def test_ln(scalar_types_df: bpd.DataFrame, snapshot): def test_log10(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -152,7 +152,7 @@ def test_log10(scalar_types_df: bpd.DataFrame, snapshot): def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -160,7 +160,7 @@ def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): def test_neg(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -168,7 +168,7 @@ def test_neg(scalar_types_df: bpd.DataFrame, snapshot): def test_pos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -176,7 +176,7 @@ def test_pos(scalar_types_df: bpd.DataFrame, snapshot): def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -184,7 +184,7 @@ def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): def test_sin(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -192,7 +192,7 @@ def test_sin(scalar_types_df: bpd.DataFrame, snapshot): def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -200,7 +200,7 @@ def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): def test_tan(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -208,6 +208,6 @@ def test_tan(scalar_types_df: bpd.DataFrame, snapshot): def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index 22dc9e8398..79c67a09ca 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -24,7 +24,9 @@ def test_capitalize(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -37,14 +39,14 @@ def test_endswith(scalar_types_df: bpd.DataFrame, snapshot): "double": ops.EndsWithOp(pat=("ab", "cd")).as_expr(col_name), "empty": ops.EndsWithOp(pat=()).as_expr(col_name), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -52,7 +54,7 @@ def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -60,7 +62,9 @@ def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -68,7 +72,7 @@ def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -76,7 +80,7 @@ def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): def test_islower(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -84,7 +88,9 @@ def test_islower(scalar_types_df: bpd.DataFrame, snapshot): def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -92,7 +98,7 @@ def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -100,7 +106,7 @@ def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -108,7 +114,7 @@ def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): def test_len(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -116,7 +122,7 @@ def test_len(scalar_types_df: bpd.DataFrame, snapshot): def test_lower(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -124,7 +130,9 @@ def test_lower(scalar_types_df: bpd.DataFrame, snapshot): def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -132,7 +140,7 @@ def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.ReplaceStrOp("e", "a").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -141,7 +149,7 @@ def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.RegexReplaceStrOp(r"e", "a").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -150,7 +158,7 @@ def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -158,7 +166,9 @@ def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -172,14 +182,14 @@ def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): "double": ops.StartsWithOp(pat=("ab", "cd")).as_expr(col_name), "empty": ops.StartsWithOp(pat=()).as_expr(col_name), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -192,14 +202,16 @@ def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): "right": ops.StrPadOp(length=10, fillchar="-", side="right").as_expr(col_name), "both": ops.StrPadOp(length=10, fillchar="-", side="both").as_expr(col_name), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -207,7 +219,9 @@ def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): def test_strip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -215,7 +229,7 @@ def test_strip(scalar_types_df: bpd.DataFrame, snapshot): def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.StrContainsOp("e").as_expr(col_name)], [col_name] ) @@ -225,7 +239,7 @@ def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.StrContainsRegexOp("e").as_expr(col_name)], [col_name] ) @@ -235,7 +249,7 @@ def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.StrExtractOp(r"([a-z]*)", 1).as_expr(col_name)], [col_name] ) @@ -245,7 +259,9 @@ def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -258,7 +274,7 @@ def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): "none_end": ops.StrFindOp("e", start=None, end=5).as_expr(col_name), "start_end": ops.StrFindOp("e", start=2, end=5).as_expr(col_name), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -266,7 +282,7 @@ def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.StringSplitOp(pat=",").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -275,7 +291,7 @@ def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): def test_upper(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -283,5 +299,7 @@ def test_upper(scalar_types_df: bpd.DataFrame, snapshot): def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops(bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name]) + sql = utils._apply_unary_ops( + bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py index 76bee6756d..19156ead99 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -31,6 +31,6 @@ def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): # When an index integer is provided. "int": ops.StructFieldOp(0).as_expr(col_name), } - sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py index cac6e801a9..1f01047ba9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py @@ -16,7 +16,7 @@ from bigframes import operations as ops import bigframes.pandas as bpd -from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops +from bigframes.testing import utils pytest.importorskip("pytest_snapshot") @@ -33,7 +33,7 @@ def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = _apply_unary_ops( + sql = utils._apply_unary_ops( bf_df, [ops.timedelta_floor_op.as_expr(col_name)], [col_name] ) diff --git a/tests/unit/core/compile/sqlglot/expressions/utils.py b/tests/unit/core/compile/sqlglot/expressions/utils.py deleted file mode 100644 index b212160ed3..0000000000 --- a/tests/unit/core/compile/sqlglot/expressions/utils.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import typing - -from bigframes.core import expression as expr -import bigframes.pandas as bpd - - -def _apply_unary_ops( - obj: bpd.DataFrame, - ops_list: typing.Sequence[expr.Expression], - new_names: typing.Sequence[str], -) -> str: - array_value = obj._block.expr - result, old_names = array_value.compute_values(ops_list) - - # Rename columns for deterministic golden SQL results. - assert len(old_names) == len(new_names) - col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)} - result = result.rename_columns(col_ids).select_columns(new_names) - - sql = result.session._executor.to_sql(result, enable_cache=False) - return sql