Skip to content

Commit 68ff3dd

Browse files
committed
fix: simplify search op
1 parent e7db6ed commit 68ff3dd

File tree

7 files changed

+83
-74
lines changed

7 files changed

+83
-74
lines changed

bigframes/bigquery/_operations/search.py

Lines changed: 12 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@
2020

2121
import google.cloud.bigquery as bigquery
2222

23+
import bigframes.core.sql
24+
import bigframes.dataframe
2325
import bigframes.ml.utils as utils
2426

2527
if typing.TYPE_CHECKING:
26-
import bigframes.dataframe as dataframe
2728
import bigframes.series as series
2829
import bigframes.session
2930

@@ -91,15 +92,15 @@ def create_vector_index(
9192
def vector_search(
9293
base_table: str,
9394
column_to_search: str,
94-
query: Union["dataframe.DataFrame", "series.Series"],
95+
query: Union[bigframes.dataframe.DataFrame, series.Series],
9596
*,
9697
query_column_to_search: Optional[str] = None,
9798
top_k: Optional[int] = None,
9899
distance_type: Optional[Literal["euclidean", "cosine", "dot_product"]] = None,
99100
fraction_lists_to_search: Optional[float] = None,
100101
use_brute_force: Optional[bool] = None,
101102
allow_large_results: Optional[bool] = None,
102-
) -> dataframe.DataFrame:
103+
) -> bigframes.dataframe.DataFrame:
103104
"""
104105
Conduct vector search which searches embeddings to find semantically similar entities.
105106
@@ -108,7 +109,6 @@ def vector_search(
108109
109110
**Examples:**
110111
111-
112112
>>> import bigframes.pandas as bpd
113113
>>> import bigframes.bigquery as bbq
114114
@@ -250,12 +250,8 @@ def vector_search(
250250

251251

252252
def search(
253-
data_to_search: Union["dataframe.DataFrame", "series.Series"],
253+
data_to_search: Union[bigframes.dataframe.DataFrame, series.Series],
254254
search_query: str,
255-
*,
256-
json_scope: Optional[str] = None,
257-
analyzer: Optional[str] = None,
258-
analyzer_options: Optional[str] = None,
259255
) -> series.Series:
260256
"""
261257
The SEARCH function checks to see whether a BigQuery table or other search
@@ -288,46 +284,28 @@ def search(
288284
search_query (str):
289285
A STRING literal, or a STRING constant expression that represents
290286
the terms of the search query.
291-
json_scope (str, optional):
292-
A named argument with a STRING value. Takes one of the following
293-
values to indicate the scope of JSON data to be searched. It has no
294-
effect if data_to_search isn't a JSON value or doesn't contain a
295-
JSON field.
296-
analyzer (str, optional):
297-
A named argument with a STRING value. Takes one of the following
298-
values to indicate the text analyzer to use: 'LOG_ANALYZER',
299-
'NO_OP_ANALYZER', 'PATTERN_ANALYZER'.
300-
analyzer_options (str, optional):
301-
A named argument with a JSON-formatted STRING value. Takes a list
302-
of text analysis rules.
303287
304288
Returns:
305289
bigframes.series.Series: A new Series with the boolean result.
306290
"""
307291
import bigframes.operations.search_ops as search_ops
308292
import bigframes.series
309293

310-
if not isinstance(data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame)):
294+
if not isinstance(
295+
data_to_search, (bigframes.series.Series, bigframes.dataframe.DataFrame)
296+
):
311297
raise ValueError("data_to_search must be a Series or DataFrame")
312298

313299
if isinstance(data_to_search, bigframes.dataframe.DataFrame):
314-
# SEARCH on a table (or dataframe) treats it as a STRUCT
315-
# We need to apply the op on the dataframe, which should handle it as a struct or row
316-
# However, unary ops are usually applied on Series.
317-
# But DataFrame can be passed if we convert it to a struct first?
318-
# Or does DataFrame support _apply_unary_op?
319-
# bigframes.dataframe.DataFrame does not have _apply_unary_op.
320-
# We can convert DataFrame to a Series of Structs.
321-
# But SEARCH in BigQuery can take a table reference which is evaluated as a STRUCT.
322-
# So creating a struct from all columns seems correct.
300+
# SEARCH on a table (or dataframe) treats it as a STRUCT. For easier
301+
# application of a scalar unary op, we convert to a struct proactively
302+
# in the expression.
323303
import bigframes.bigquery._operations.struct as struct_ops
304+
324305
data_to_search = struct_ops.struct(data_to_search)
325306

326307
return data_to_search._apply_unary_op(
327308
search_ops.SearchOp(
328309
search_query=search_query,
329-
json_scope=json_scope,
330-
analyzer=analyzer,
331-
analyzer_options=analyzer_options,
332310
)
333311
)

bigframes/core/compile/ibis_compiler/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@
2222

2323
import bigframes.core.compile.ibis_compiler.operations.generic_ops # noqa: F401
2424
import bigframes.core.compile.ibis_compiler.operations.geo_ops # noqa: F401
25+
import bigframes.core.compile.ibis_compiler.operations.search_ops # noqa: F401
2526
import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
BigFrames -> Ibis compilation for the operations in bigframes.operations.search_ops.
17+
18+
Please keep implementations in sequential order by op name.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
from bigframes_vendored.ibis.expr import types as ibis_types
24+
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
25+
26+
from bigframes.core.compile.ibis_compiler import scalar_op_compiler
27+
from bigframes.operations import search_ops
28+
29+
register_unary_op = scalar_op_compiler.scalar_op_compiler.register_unary_op
30+
31+
32+
@register_unary_op(search_ops.SearchOp, pass_op=True)
33+
def search_op_impl(x: ibis_types.Value, op: search_ops.SearchOp):
34+
return search(x, op.search_query)
35+
36+
37+
@ibis_udf.scalar.builtin(name="search")
38+
def search(data_to_search, search_query) -> bool:
39+
"""Checks to see whether a table or other search data contains a set of search terms."""
40+
return False # pragma: NO COVER

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,43 +2153,6 @@ def str_lstrip_op( # type: ignore[empty-body]
21532153
"""Remove leading and trailing characters."""
21542154

21552155

2156-
@scalar_op_compiler.register_unary_op(ops.SearchOp, pass_op=True)
2157-
def search_op_impl(x: ibis_types.Value, op: ops.SearchOp):
2158-
values = [
2159-
typing.cast(ibis_generic.Value, x.op()),
2160-
typing.cast(ibis_generic.Value, ibis_types.literal(op.search_query).op()),
2161-
]
2162-
sql_template = "SEARCH({0}, {1}"
2163-
arg_index = 2
2164-
if op.json_scope is not None:
2165-
values.append(
2166-
typing.cast(ibis_generic.Value, ibis_types.literal(op.json_scope).op())
2167-
)
2168-
sql_template += f", json_scope=>{{{arg_index}}}"
2169-
arg_index += 1
2170-
if op.analyzer is not None:
2171-
values.append(
2172-
typing.cast(ibis_generic.Value, ibis_types.literal(op.analyzer).op())
2173-
)
2174-
sql_template += f", analyzer=>{{{arg_index}}}"
2175-
arg_index += 1
2176-
if op.analyzer_options is not None:
2177-
values.append(
2178-
typing.cast(
2179-
ibis_generic.Value, ibis_types.literal(op.analyzer_options).op()
2180-
)
2181-
)
2182-
sql_template += f", analyzer_options=>{{{arg_index}}}"
2183-
arg_index += 1
2184-
sql_template += ")"
2185-
2186-
return ibis_generic.SqlScalar(
2187-
ibis_generic.Literal(sql_template, dtype=ibis_dtypes.string),
2188-
values=tuple(values),
2189-
output_type=ibis_dtypes.boolean,
2190-
).to_expr()
2191-
2192-
21932156
@ibis_udf.scalar.builtin(name="rtrim")
21942157
def str_rstrip_op( # type: ignore[empty-body]
21952158
x: ibis_dtypes.String, to_strip: ibis_dtypes.String

bigframes/core/compile/sqlglot/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import bigframes.core.compile.sqlglot.expressions.geo_ops # noqa: F401
2626
import bigframes.core.compile.sqlglot.expressions.json_ops # noqa: F401
2727
import bigframes.core.compile.sqlglot.expressions.numeric_ops # noqa: F401
28+
import bigframes.core.compile.sqlglot.expressions.search_ops # noqa: F401
2829
import bigframes.core.compile.sqlglot.expressions.string_ops # noqa: F401
2930
import bigframes.core.compile.sqlglot.expressions.struct_ops # noqa: F401
3031
import bigframes.core.compile.sqlglot.expressions.timedelta_ops # noqa: F401
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import sqlglot.expressions as sge
18+
19+
from bigframes import operations as ops
20+
from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
21+
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
22+
23+
register_nary_op = scalar_compiler.scalar_op_compiler.register_nary_op
24+
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
25+
26+
27+
@register_unary_op(ops.SearchOp, pass_op=True)
28+
def _(expr: TypedExpr, op: ops.SearchOp) -> sge.Expression:
29+
return sge.func("SEARCH", expr.expr, sge.convert(op.search_query))

bigframes/operations/search_ops.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@
2323
class SearchOp(base_ops.UnaryOp):
2424
name: typing.ClassVar[str] = "search"
2525
search_query: str
26-
json_scope: typing.Optional[str] = None
27-
analyzer: typing.Optional[str] = None
28-
analyzer_options: typing.Optional[str] = None
2926

3027
def output_type(self, *input_types):
3128
return dtypes.BOOL_DTYPE

0 commit comments

Comments
 (0)