|
35 | 35 | ] |
36 | 36 |
|
37 | 37 |
|
| 38 | +@log_adapter.method_logger(custom_base_name="bigquery_ai") |
| 39 | +def generate( |
| 40 | + prompt: PROMPT_TYPE, |
| 41 | + *, |
| 42 | + connection_id: str | None = None, |
| 43 | + endpoint: str | None = None, |
| 44 | + request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified", |
| 45 | + model_params: Mapping[Any, Any] | None = None, |
| 46 | + # TODO(b/446974666) Add output_schema parameter |
| 47 | +) -> series.Series: |
| 48 | + """ |
| 49 | + Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data. |
| 50 | +
|
| 51 | + **Examples:** |
| 52 | +
|
| 53 | + >>> import bigframes.pandas as bpd |
| 54 | + >>> import bigframes.bigquery as bbq |
| 55 | + >>> bpd.options.display.progress_bar = None |
| 56 | + >>> country = bpd.Series(["Japan", "Canada"]) |
| 57 | + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) |
| 58 | + 0 {'result': 'Tokyo\\n', 'full_response': '{"cand... |
| 59 | + 1 {'result': 'Ottawa\\n', 'full_response': '{"can... |
| 60 | + dtype: struct<result: string, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow] |
| 61 | +
|
| 62 | + >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")).struct.field("result") |
| 63 | + 0 Tokyo\\n |
| 64 | + 1 Ottawa\\n |
| 65 | + Name: result, dtype: string |
| 66 | +
|
| 67 | + Args: |
| 68 | + prompt (Series | List[str|Series] | Tuple[str|Series, ...]): |
| 69 | + A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series |
| 70 | + or pandas Series. |
| 71 | + connection_id (str, optional): |
| 72 | + Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. |
| 73 | + If not provided, the connection from the current session will be used. |
| 74 | + endpoint (str, optional): |
| 75 | + Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any |
| 76 | + generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and |
| 77 | + uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable |
| 78 | + version of Gemini to use. |
| 79 | + request_type (Literal["dedicated", "shared", "unspecified"]): |
| 80 | + Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses. |
| 81 | + * "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not |
| 82 | + purchased or is not active if Provisioned Throughput quota isn't available. |
| 83 | + * "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota. |
| 84 | + * "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota. |
| 85 | + If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first. |
| 86 | + If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota. |
| 87 | + model_params (Mapping[Any, Any]): |
| 88 | + Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format. |
| 89 | +
|
| 90 | + Returns: |
| 91 | + bigframes.series.Series: A new struct Series with the result data. The struct contains these fields: |
| 92 | + * "result": a STRING value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI. |
| 93 | + * "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model. |
| 94 | + The generated text is in the text element. |
| 95 | + * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful. |
| 96 | + """ |
| 97 | + |
| 98 | + prompt_context, series_list = _separate_context_and_series(prompt) |
| 99 | + assert len(series_list) > 0 |
| 100 | + |
| 101 | + operator = ai_ops.AIGenerate( |
| 102 | + prompt_context=tuple(prompt_context), |
| 103 | + connection_id=_resolve_connection_id(series_list[0], connection_id), |
| 104 | + endpoint=endpoint, |
| 105 | + request_type=request_type, |
| 106 | + model_params=json.dumps(model_params) if model_params else None, |
| 107 | + ) |
| 108 | + |
| 109 | + return series_list[0]._apply_nary_op(operator, series_list[1:]) |
| 110 | + |
| 111 | + |
38 | 112 | @log_adapter.method_logger(custom_base_name="bigquery_ai") |
39 | 113 | def generate_bool( |
40 | 114 | prompt: PROMPT_TYPE, |
@@ -188,6 +262,226 @@ def generate_int( |
188 | 262 | return series_list[0]._apply_nary_op(operator, series_list[1:]) |
189 | 263 |
|
190 | 264 |
|
| 265 | +@log_adapter.method_logger(custom_base_name="bigquery_ai") |
| 266 | +def generate_double( |
| 267 | + prompt: PROMPT_TYPE, |
| 268 | + *, |
| 269 | + connection_id: str | None = None, |
| 270 | + endpoint: str | None = None, |
| 271 | + request_type: Literal["dedicated", "shared", "unspecified"] = "unspecified", |
| 272 | + model_params: Mapping[Any, Any] | None = None, |
| 273 | +) -> series.Series: |
| 274 | + """ |
| 275 | + Returns the AI analysis based on the prompt, which can be any combination of text and unstructured data. |
| 276 | +
|
| 277 | + **Examples:** |
| 278 | +
|
| 279 | + >>> import bigframes.pandas as bpd |
| 280 | + >>> import bigframes.bigquery as bbq |
| 281 | + >>> bpd.options.display.progress_bar = None |
| 282 | + >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) |
| 283 | + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) |
| 284 | + 0 {'result': 2.0, 'full_response': '{"candidates... |
| 285 | + 1 {'result': 4.0, 'full_response': '{"candidates... |
| 286 | + 2 {'result': 8.0, 'full_response': '{"candidates... |
| 287 | + dtype: struct<result: double, full_response: extension<dbjson<JSONArrowType>>, status: string>[pyarrow] |
| 288 | +
|
| 289 | + >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")).struct.field("result") |
| 290 | + 0 2.0 |
| 291 | + 1 4.0 |
| 292 | + 2 8.0 |
| 293 | + Name: result, dtype: Float64 |
| 294 | +
|
| 295 | + Args: |
| 296 | + prompt (Series | List[str|Series] | Tuple[str|Series, ...]): |
| 297 | + A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series |
| 298 | + or pandas Series. |
| 299 | + connection_id (str, optional): |
| 300 | + Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. |
| 301 | + If not provided, the connection from the current session will be used. |
| 302 | + endpoint (str, optional): |
| 303 | + Specifies the Vertex AI endpoint to use for the model. For example `"gemini-2.5-flash"`. You can specify any |
| 304 | + generally available or preview Gemini model. If you specify the model name, BigQuery ML automatically identifies and |
| 305 | + uses the full endpoint of the model. If you don't specify an ENDPOINT value, BigQuery ML selects a recent stable |
| 306 | + version of Gemini to use. |
| 307 | + request_type (Literal["dedicated", "shared", "unspecified"]): |
| 308 | + Specifies the type of inference request to send to the Gemini model. The request type determines what quota the request uses. |
| 309 | + * "dedicated": function only uses Provisioned Throughput quota. The function returns the error Provisioned throughput is not |
| 310 | + purchased or is not active if Provisioned Throughput quota isn't available. |
| 311 | + * "shared": the function only uses dynamic shared quota (DSQ), even if you have purchased Provisioned Throughput quota. |
| 312 | + * "unspecified": If you haven't purchased Provisioned Throughput quota, the function uses DSQ quota. |
| 313 | + If you have purchased Provisioned Throughput quota, the function uses the Provisioned Throughput quota first. |
| 314 | + If requests exceed the Provisioned Throughput quota, the overflow traffic uses DSQ quota. |
| 315 | + model_params (Mapping[Any, Any]): |
| 316 | + Provides additional parameters to the model. The MODEL_PARAMS value must conform to the generateContent request body format. |
| 317 | +
|
| 318 | + Returns: |
| 319 | + bigframes.series.Series: A new struct Series with the result data. The struct contains these fields: |
| 320 | + * "result": an DOUBLE value containing the model's response to the prompt. The result is None if the request fails or is filtered by responsible AI. |
| 321 | + * "full_response": a JSON value containing the response from the projects.locations.endpoints.generateContent call to the model. |
| 322 | + The generated text is in the text element. |
| 323 | + * "status": a STRING value that contains the API response status for the corresponding row. This value is empty if the operation was successful. |
| 324 | + """ |
| 325 | + |
| 326 | + prompt_context, series_list = _separate_context_and_series(prompt) |
| 327 | + assert len(series_list) > 0 |
| 328 | + |
| 329 | + operator = ai_ops.AIGenerateDouble( |
| 330 | + prompt_context=tuple(prompt_context), |
| 331 | + connection_id=_resolve_connection_id(series_list[0], connection_id), |
| 332 | + endpoint=endpoint, |
| 333 | + request_type=request_type, |
| 334 | + model_params=json.dumps(model_params) if model_params else None, |
| 335 | + ) |
| 336 | + |
| 337 | + return series_list[0]._apply_nary_op(operator, series_list[1:]) |
| 338 | + |
| 339 | + |
| 340 | +@log_adapter.method_logger(custom_base_name="bigquery_ai") |
| 341 | +def if_( |
| 342 | + prompt: PROMPT_TYPE, |
| 343 | + *, |
| 344 | + connection_id: str | None = None, |
| 345 | +) -> series.Series: |
| 346 | + """ |
| 347 | + Evaluates the prompt to True or False. Compared to `ai.generate_bool()`, this function |
| 348 | + provides optimization such that not all rows are evaluated with the LLM. |
| 349 | +
|
| 350 | + **Examples:** |
| 351 | + >>> import bigframes.pandas as bpd |
| 352 | + >>> import bigframes.bigquery as bbq |
| 353 | + >>> bpd.options.display.progress_bar = None |
| 354 | + >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"]) |
| 355 | + >>> bbq.ai.if_((us_state, " has a city called Springfield")) |
| 356 | + 0 True |
| 357 | + 1 True |
| 358 | + 2 False |
| 359 | + dtype: boolean |
| 360 | +
|
| 361 | + >>> us_state[bbq.ai.if_((us_state, " has a city called Springfield"))] |
| 362 | + 0 Massachusetts |
| 363 | + 1 Illinois |
| 364 | + dtype: string |
| 365 | +
|
| 366 | + Args: |
| 367 | + prompt (Series | List[str|Series] | Tuple[str|Series, ...]): |
| 368 | + A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series |
| 369 | + or pandas Series. |
| 370 | + connection_id (str, optional): |
| 371 | + Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. |
| 372 | + If not provided, the connection from the current session will be used. |
| 373 | +
|
| 374 | + Returns: |
| 375 | + bigframes.series.Series: A new series of bools. |
| 376 | + """ |
| 377 | + |
| 378 | + prompt_context, series_list = _separate_context_and_series(prompt) |
| 379 | + assert len(series_list) > 0 |
| 380 | + |
| 381 | + operator = ai_ops.AIIf( |
| 382 | + prompt_context=tuple(prompt_context), |
| 383 | + connection_id=_resolve_connection_id(series_list[0], connection_id), |
| 384 | + ) |
| 385 | + |
| 386 | + return series_list[0]._apply_nary_op(operator, series_list[1:]) |
| 387 | + |
| 388 | + |
| 389 | +@log_adapter.method_logger(custom_base_name="bigquery_ai") |
| 390 | +def classify( |
| 391 | + input: PROMPT_TYPE, |
| 392 | + categories: tuple[str, ...] | list[str], |
| 393 | + *, |
| 394 | + connection_id: str | None = None, |
| 395 | +) -> series.Series: |
| 396 | + """ |
| 397 | + Classifies a given input into one of the specified categories. It will always return one of the provided categories best fit the prompt input. |
| 398 | +
|
| 399 | + **Examples:** |
| 400 | +
|
| 401 | + >>> import bigframes.pandas as bpd |
| 402 | + >>> import bigframes.bigquery as bbq |
| 403 | + >>> bpd.options.display.progress_bar = None |
| 404 | + >>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']}) |
| 405 | + >>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish']) |
| 406 | + >>> df |
| 407 | + creature type |
| 408 | + 0 Cat Mammal |
| 409 | + 1 Salmon Fish |
| 410 | + <BLANKLINE> |
| 411 | + [2 rows x 2 columns] |
| 412 | +
|
| 413 | + Args: |
| 414 | + input (Series | List[str|Series] | Tuple[str|Series, ...]): |
| 415 | + A mixture of Series and string literals that specifies the input to send to the model. The Series can be BigFrames Series |
| 416 | + or pandas Series. |
| 417 | + categories (tuple[str, ...] | list[str]): |
| 418 | + Categories to classify the input into. |
| 419 | + connection_id (str, optional): |
| 420 | + Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. |
| 421 | + If not provided, the connection from the current session will be used. |
| 422 | +
|
| 423 | + Returns: |
| 424 | + bigframes.series.Series: A new series of strings. |
| 425 | + """ |
| 426 | + |
| 427 | + prompt_context, series_list = _separate_context_and_series(input) |
| 428 | + assert len(series_list) > 0 |
| 429 | + |
| 430 | + operator = ai_ops.AIClassify( |
| 431 | + prompt_context=tuple(prompt_context), |
| 432 | + categories=tuple(categories), |
| 433 | + connection_id=_resolve_connection_id(series_list[0], connection_id), |
| 434 | + ) |
| 435 | + |
| 436 | + return series_list[0]._apply_nary_op(operator, series_list[1:]) |
| 437 | + |
| 438 | + |
| 439 | +@log_adapter.method_logger(custom_base_name="bigquery_ai") |
| 440 | +def score( |
| 441 | + prompt: PROMPT_TYPE, |
| 442 | + *, |
| 443 | + connection_id: str | None = None, |
| 444 | +) -> series.Series: |
| 445 | + """ |
| 446 | + Computes a score based on rubrics described in natural language. It will return a double value. |
| 447 | + There is no fixed range for the score returned. To get high quality results, provide a scoring |
| 448 | + rubric with examples in the prompt. |
| 449 | +
|
| 450 | + **Examples:** |
| 451 | +
|
| 452 | + >>> import bigframes.pandas as bpd |
| 453 | + >>> import bigframes.bigquery as bbq |
| 454 | + >>> bpd.options.display.progress_bar = None |
| 455 | + >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"]) |
| 456 | + >>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP |
| 457 | + 0 2.0 |
| 458 | + 1 1.0 |
| 459 | + 2 3.0 |
| 460 | + dtype: Float64 |
| 461 | +
|
| 462 | + Args: |
| 463 | + prompt (Series | List[str|Series] | Tuple[str|Series, ...]): |
| 464 | + A mixture of Series and string literals that specifies the prompt to send to the model. The Series can be BigFrames Series |
| 465 | + or pandas Series. |
| 466 | + connection_id (str, optional): |
| 467 | + Specifies the connection to use to communicate with the model. For example, `myproject.us.myconnection`. |
| 468 | + If not provided, the connection from the current session will be used. |
| 469 | +
|
| 470 | + Returns: |
| 471 | + bigframes.series.Series: A new series of double (float) values. |
| 472 | + """ |
| 473 | + |
| 474 | + prompt_context, series_list = _separate_context_and_series(prompt) |
| 475 | + assert len(series_list) > 0 |
| 476 | + |
| 477 | + operator = ai_ops.AIScore( |
| 478 | + prompt_context=tuple(prompt_context), |
| 479 | + connection_id=_resolve_connection_id(series_list[0], connection_id), |
| 480 | + ) |
| 481 | + |
| 482 | + return series_list[0]._apply_nary_op(operator, series_list[1:]) |
| 483 | + |
| 484 | + |
191 | 485 | def _separate_context_and_series( |
192 | 486 | prompt: PROMPT_TYPE, |
193 | 487 | ) -> Tuple[List[str | None], List[series.Series]]: |
|
0 commit comments