Skip to content

Commit ab9e44b

Browse files
authored
Merge branch 'main' into fix-series-input
2 parents cb4ea23 + b711815 commit ab9e44b

File tree

78 files changed

+5244
-1589
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+5244
-1589
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.23.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.22.0...v2.23.0) (2025-09-29)
8+
9+
10+
### Features
11+
12+
* Add ai.generate_double to bigframes.bigquery package ([#2111](https://github.com/googleapis/python-bigquery-dataframes/issues/2111)) ([6b8154c](https://github.com/googleapis/python-bigquery-dataframes/commit/6b8154c578bb1a276e9cf8fe494d91f8cd6260f2))
13+
14+
15+
### Bug Fixes
16+
17+
* Prevent invalid syntax for no-op .replace ops ([#2112](https://github.com/googleapis/python-bigquery-dataframes/issues/2112)) ([c311876](https://github.com/googleapis/python-bigquery-dataframes/commit/c311876b2adbc0b66ae5e463c6e56466c6a6a495))
18+
19+
20+
### Documentation
21+
22+
* Add timedelta notebook sample ([#2124](https://github.com/googleapis/python-bigquery-dataframes/issues/2124)) ([d1a9888](https://github.com/googleapis/python-bigquery-dataframes/commit/d1a9888a2b47de6aca5dddc94d0c8f280344b58a))
23+
724
## [2.22.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.21.0...v2.22.0) (2025-09-25)
825

926

bigframes/bigquery/_operations/ai.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,80 @@
3535
]
3636

3737

38+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
39+
def generate(
40+
prompt: PROMPT_TYPE,
41+
*,
42+
connection_id: str | None = None,
43+
endpoint: str | None = None,
44+
request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified",
45+
model_params: Mapping[Any, Any] | None = None,
46+
# TODO(b/446974666) Add output_schema parameter
47+
) -> series.Series:
48+
"""
49+
Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data.
50+
51+
**Examples:**
52+
53+
>>> import bigframes.pandas as bpd
54+
>>> import bigframes.bigquery as bbq
55+
>>> bpd.options.display.progress_bar = None
56+
>>> country = bpd.Series(["Japan", "Canada"])
57+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only"))
58+
0 {'result': 'Tokyo\\n', 'full_response': '{"cand...
59+
1 {'result': 'Ottawa\\n', 'full_response': '{"can...
60+
dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
61+
62+
>>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result")
63+
0 Tokyo\\n
64+
1 Ottawa\\n
65+
Name: result, dtype: string
66+
67+
Args:
68+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
69+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
70+
or pandas Series.
71+
connection_id (str, optional):
72+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
73+
If not provided, the connection from the current session will be used.
74+
endpoint (str, optional):
75+
Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any
76+
generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and
77+
uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable
78+
version of Gemini to use.
79+
request_type (Literal["dedicated", "shared", "unspecified"]):
80+
Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses.
81+
* "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not
82+
purchased or is not active if Provisioned Throughput quota isn't available.
83+
* "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota.
84+
* "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota.
85+
If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first.
86+
If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota.
87+
model_params (Mapping[Any, Any]):
88+
Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format.
89+
90+
Returns:
91+
bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
92+
* "result": a STRING value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
93+
* "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
94+
The generated text is in the text element.
95+
* "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
96+
"""
97+
98+
prompt_context, series_list = _separate_context_and_series(prompt)
99+
assert len(series_list) > 0
100+
101+
operator = ai_ops.AIGenerate(
102+
prompt_context=tuple(prompt_context),
103+
connection_id=_resolve_connection_id(series_list[0], connection_id),
104+
endpoint=endpoint,
105+
request_type=request_type,
106+
model_params=json.dumps(model_params) if model_params else None,
107+
)
108+
109+
return series_list[0]._apply_nary_op(operator, series_list[1:])
110+
111+
38112
@log_adapter.method_logger(custom_base_name="bigquery_ai")
39113
def generate_bool(
40114
prompt: PROMPT_TYPE,
@@ -188,6 +262,226 @@ def generate_int(
188262
return series_list[0]._apply_nary_op(operator, series_list[1:])
189263

190264

265+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
266+
def generate_double(
267+
prompt: PROMPT_TYPE,
268+
*,
269+
connection_id: str | None = None,
270+
endpoint: str | None = None,
271+
request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified",
272+
model_params: Mapping[Any, Any] | None = None,
273+
) -> series.Series:
274+
"""
275+
Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data.
276+
277+
**Examples:**
278+
279+
>>> import bigframes.pandas as bpd
280+
>>> import bigframes.bigquery as bbq
281+
>>> bpd.options.display.progress_bar = None
282+
>>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"])
283+
>>> bbq.ai.generate_double(("How many legs does a ", animal, " have?"))
284+
0 {'result': 2.0, 'full_response': '{"candidates...
285+
1 {'result': 4.0, 'full_response': '{"candidates...
286+
2 {'result': 8.0, 'full_response': '{"candidates...
287+
dtype: struct<result: double, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow]
288+
289+
>>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result")
290+
0 2.0
291+
1 4.0
292+
2 8.0
293+
Name: result, dtype: Float64
294+
295+
Args:
296+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
297+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
298+
or pandas Series.
299+
connection_id (str, optional):
300+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
301+
If not provided, the connection from the current session will be used.
302+
endpoint (str, optional):
303+
Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any
304+
generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and
305+
uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable
306+
version of Gemini to use.
307+
request_type (Literal["dedicated", "shared", "unspecified"]):
308+
Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses.
309+
* "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not
310+
purchased or is not active if Provisioned Throughput quota isn't available.
311+
* "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota.
312+
* "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota.
313+
If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first.
314+
If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota.
315+
model_params (Mapping[Any, Any]):
316+
Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format.
317+
318+
Returns:
319+
bigframes.series.Series: A new struct Series with the result data. The struct contains these fields:
320+
* "result": an DOUBLE value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI.
321+
* "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model.
322+
The generated text is in the text element.
323+
* "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful.
324+
"""
325+
326+
prompt_context, series_list = _separate_context_and_series(prompt)
327+
assert len(series_list) > 0
328+
329+
operator = ai_ops.AIGenerateDouble(
330+
prompt_context=tuple(prompt_context),
331+
connection_id=_resolve_connection_id(series_list[0], connection_id),
332+
endpoint=endpoint,
333+
request_type=request_type,
334+
model_params=json.dumps(model_params) if model_params else None,
335+
)
336+
337+
return series_list[0]._apply_nary_op(operator, series_list[1:])
338+
339+
340+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
341+
def if_(
342+
prompt: PROMPT_TYPE,
343+
*,
344+
connection_id: str | None = None,
345+
) -> series.Series:
346+
"""
347+
Evaluates the prompt to True or False. Compared to `ai.generate_bool()`, this function
348+
provides optimization such that not all rows are evaluated with the LLM.
349+
350+
**Examples:**
351+
>>> import bigframes.pandas as bpd
352+
>>> import bigframes.bigquery as bbq
353+
>>> bpd.options.display.progress_bar = None
354+
>>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"])
355+
>>> bbq.ai.if_((us_state, " has a city called Springfield"))
356+
0 True
357+
1 True
358+
2 False
359+
dtype: boolean
360+
361+
>>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))]
362+
0 Massachusetts
363+
1 Illinois
364+
dtype: string
365+
366+
Args:
367+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
368+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
369+
or pandas Series.
370+
connection_id (str, optional):
371+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
372+
If not provided, the connection from the current session will be used.
373+
374+
Returns:
375+
bigframes.series.Series: A new series of bools.
376+
"""
377+
378+
prompt_context, series_list = _separate_context_and_series(prompt)
379+
assert len(series_list) > 0
380+
381+
operator = ai_ops.AIIf(
382+
prompt_context=tuple(prompt_context),
383+
connection_id=_resolve_connection_id(series_list[0], connection_id),
384+
)
385+
386+
return series_list[0]._apply_nary_op(operator, series_list[1:])
387+
388+
389+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
390+
def classify(
391+
input: PROMPT_TYPE,
392+
categories: tuple[str, ...] | list[str],
393+
*,
394+
connection_id: str | None = None,
395+
) -> series.Series:
396+
"""
397+
Classifies a given input into one of the specified categories. It will always return one of the provided categories best fit the prompt input.
398+
399+
**Examples:**
400+
401+
>>> import bigframes.pandas as bpd
402+
>>> import bigframes.bigquery as bbq
403+
>>> bpd.options.display.progress_bar = None
404+
>>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']})
405+
>>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish'])
406+
>>> df
407+
creature type
408+
0 Cat Mammal
409+
1 Salmon Fish
410+
<BLANKLINE>
411+
[2 rows x 2 columns]
412+
413+
Args:
414+
input (Series | List[str|Series] | Tuple[str|Series, ...]):
415+
A mixture of Series and string literals that specifies the input to send to the model. The Series can be BigFrames Series
416+
or pandas Series.
417+
categories (tuple[str, ...] | list[str]):
418+
Categories to classify the input into.
419+
connection_id (str, optional):
420+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
421+
If not provided, the connection from the current session will be used.
422+
423+
Returns:
424+
bigframes.series.Series: A new series of strings.
425+
"""
426+
427+
prompt_context, series_list = _separate_context_and_series(input)
428+
assert len(series_list) > 0
429+
430+
operator = ai_ops.AIClassify(
431+
prompt_context=tuple(prompt_context),
432+
categories=tuple(categories),
433+
connection_id=_resolve_connection_id(series_list[0], connection_id),
434+
)
435+
436+
return series_list[0]._apply_nary_op(operator, series_list[1:])
437+
438+
439+
@log_adapter.method_logger(custom_base_name="bigquery_ai")
440+
def score(
441+
prompt: PROMPT_TYPE,
442+
*,
443+
connection_id: str | None = None,
444+
) -> series.Series:
445+
"""
446+
Computes a score based on rubrics described in natural language. It will return a double value.
447+
There is no fixed range for the score returned. To get high quality results, provide a scoring
448+
rubric with examples in the prompt.
449+
450+
**Examples:**
451+
452+
>>> import bigframes.pandas as bpd
453+
>>> import bigframes.bigquery as bbq
454+
>>> bpd.options.display.progress_bar = None
455+
>>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"])
456+
>>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP
457+
0 2.0
458+
1 1.0
459+
2 3.0
460+
dtype: Float64
461+
462+
Args:
463+
prompt (Series | List[str|Series] | Tuple[str|Series, ...]):
464+
A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series
465+
or pandas Series.
466+
connection_id (str, optional):
467+
Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`.
468+
If not provided, the connection from the current session will be used.
469+
470+
Returns:
471+
bigframes.series.Series: A new series of double (float) values.
472+
"""
473+
474+
prompt_context, series_list = _separate_context_and_series(prompt)
475+
assert len(series_list) > 0
476+
477+
operator = ai_ops.AIScore(
478+
prompt_context=tuple(prompt_context),
479+
connection_id=_resolve_connection_id(series_list[0], connection_id),
480+
)
481+
482+
return series_list[0]._apply_nary_op(operator, series_list[1:])
483+
484+
191485
def _separate_context_and_series(
192486
prompt: PROMPT_TYPE,
193487
) -> Tuple[List[str | None], List[series.Series]]:

0 commit comments

Comments
 (0)