googleapis
diff --git a/‎bigframes/operations/blob.py‎
Lines changed: 33 additions & 0 deletions b/‎bigframes/operations/blob.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎bigframes/testing/mocks.py‎
Lines changed: 3 additions & 2 deletions b/‎bigframes/testing/mocks.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎notebooks/multimodal/multimodal_dataframe.ipynb‎
Lines changed: 5 additions & 4 deletions b/‎notebooks/multimodal/multimodal_dataframe.ipynb‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎samples/snippets/multimodal_test.py‎
Lines changed: 5 additions & 4 deletions b/‎samples/snippets/multimodal_test.py‎
Lines changed: 5 additions & 4 deletions
@@ -303,6 +303,7 @@ def get_runtime_json_str(
     def exif(
         self,
         *,
+        engine: Literal[None, "pillow"] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 8192,
         container_cpu: Union[float, int] = 0.33,
@@ -311,6 +312,7 @@ def exif(
         """Extract EXIF data. Now only support image types.
 
         Args:
+            engine ('pillow' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for function internet transactions, and the output blob if "dst" is str. If None, uses default connection of the session.
             max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
             container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
@@ -319,6 +321,8 @@ def exif(
         Returns:
             bigframes.series.Series: JSON series of key-value pairs.
         """
+        if engine is None or engine.casefold() != "pillow":
+            raise ValueError("Must specify the engine, supported value is 'pillow'.")
 
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
@@ -344,6 +348,7 @@ def image_blur(
         self,
         ksize: tuple[int, int],
         *,
+        engine: Literal[None, "opencv"] = None,
         dst: Optional[Union[str, bigframes.series.Series]] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 8192,
@@ -354,6 +359,7 @@ def image_blur(
 
         Args:
             ksize (tuple(int, int)): Kernel size.
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
                 str: GCS folder str. The output filenames are the same as the input files.
                 blob Series: The output file paths are determined by the uris of the blob Series.
@@ -367,6 +373,9 @@ def image_blur(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
@@ -424,6 +433,7 @@ def image_resize(
         self,
         dsize: tuple[int, int] = (0, 0),
         *,
+        engine: Literal[None, "opencv"] = None,
         fx: float = 0.0,
         fy: float = 0.0,
         dst: Optional[Union[str, bigframes.series.Series]] = None,
@@ -436,6 +446,7 @@ def image_resize(
 
         Args:
             dsize (tuple(int, int), default (0, 0)): Destination size. If set to 0, fx and fy parameters determine the size.
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             fx (float, default 0.0): scale factor along the horizontal axis. If set to 0.0, dsize parameter determines the output size.
             fy (float, defalut 0.0): scale factor along the vertical axis. If set to 0.0, dsize parameter determines the output size.
             dst (str or bigframes.series.Series or None, default None): Output destination. Can be one of:
@@ -451,6 +462,9 @@ def image_resize(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         dsize_set = dsize[0] > 0 and dsize[1] > 0
         fsize_set = fx > 0.0 and fy > 0.0
         if not dsize_set ^ fsize_set:
@@ -516,6 +530,7 @@ def image_resize(
     def image_normalize(
         self,
         *,
+        engine: Literal[None, "opencv"] = None,
         alpha: float = 1.0,
         beta: float = 0.0,
         norm_type: str = "l2",
@@ -528,6 +543,7 @@ def image_normalize(
         """Normalize images.
 
         Args:
+            engine ('opencv' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             alpha (float, default 1.0): Norm value to normalize to or the lower range boundary in case of the range normalization.
             beta (float, default 0.0): Upper range boundary in case of the range normalization; it is not used for the norm normalization.
             norm_type (str, default "l2"): Normalization type. Accepted values are "inf", "l1", "l2" and "minmax".
@@ -544,6 +560,9 @@ def image_normalize(
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
+        if engine is None or engine.casefold() != "opencv":
+            raise ValueError("Must specify the engine, supported value is 'opencv'.")
+
         import bigframes.blob._functions as blob_func
 
         connection = self._resolve_connection(connection)
@@ -604,6 +623,7 @@ def image_normalize(
     def pdf_extract(
         self,
         *,
+        engine: Literal[None, "pypdf"] = None,
         connection: Optional[str] = None,
         max_batching_rows: int = 1,
         container_cpu: Union[float, int] = 2,
@@ -613,6 +633,7 @@ def pdf_extract(
         """Extracts text from PDF URLs and saves the text as string.
 
         Args:
+            engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -631,6 +652,9 @@ def pdf_extract(
                 Contains the extracted text from the PDF file.
                 Includes error messages if verbosity is enabled.
         """
+        if engine is None or engine.casefold() != "pypdf":
+            raise ValueError("Must specify the engine, supported value is 'pypdf'.")
+
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
         import bigframes.pandas as bpd
@@ -663,6 +687,7 @@ def pdf_extract(
     def pdf_chunk(
         self,
         *,
+        engine: Literal[None, "pypdf"] = None,
         connection: Optional[str] = None,
         chunk_size: int = 2000,
         overlap_size: int = 200,
@@ -675,6 +700,7 @@ def pdf_chunk(
            arrays of strings.
 
         Args:
+            engine ('pypdf' or None, default None): The engine (bigquery or third party library) used for the function. The value must be specified.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -698,6 +724,8 @@ def pdf_chunk(
                 where each string is a chunk of text extracted from PDF.
                 Includes error messages if verbosity is enabled.
         """
+        if engine is None or engine.casefold() != "pypdf":
+            raise ValueError("Must specify the engine, supported value is 'pypdf'.")
 
         import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
@@ -740,6 +768,7 @@ def pdf_chunk(
     def audio_transcribe(
         self,
         *,
+        engine: Literal["bigquery"] = "bigquery",
         connection: Optional[str] = None,
         model_name: Optional[
             Literal[
@@ -753,6 +782,7 @@ def audio_transcribe(
         Transcribe audio content using a Gemini multimodal model.
 
         Args:
+            engine ('bigquery'): The engine (bigquery or third party library) used for the function.
             connection (str or None, default None): BQ connection used for
                 function internet transactions, and the output blob if "dst"
                 is str. If None, uses default connection of the session.
@@ -770,6 +800,9 @@ def audio_transcribe(
                 Contains the transcribed text from the audio file.
                 Includes error messages if verbosity is enabled.
         """
+        if engine.casefold() != "bigquery":
+            raise ValueError("Must specify the engine, supported value is 'bigquery'.")
+
         import bigframes.bigquery as bbq
         import bigframes.ml.llm as llm
         import bigframes.pandas as bpd
 
@@ -41,6 +41,7 @@ def create_bigquery_session(
     bqclient: Optional[mock.Mock] = None,
     session_id: str = "abcxyz",
     table_schema: Sequence[google.cloud.bigquery.SchemaField] = TEST_SCHEMA,
+    table_name: str = "test_table",
     anonymous_dataset: Optional[google.cloud.bigquery.DatasetReference] = None,
     location: str = "test-region",
     ordering_mode: Literal["strict", "partial"] = "partial",
@@ -76,7 +77,7 @@ def create_bigquery_session(
         type(table).schema = mock.PropertyMock(return_value=table_schema)
         type(table).project = anonymous_dataset.project
         type(table).dataset_id = anonymous_dataset.dataset_id
-        type(table).table_id = "test_table"
+        type(table).table_id = table_name
         type(table).num_rows = mock.PropertyMock(return_value=1000000000)
         bqclient.get_table.return_value = table
 
@@ -94,7 +95,7 @@ def query_mock(
         query_job = mock.create_autospec(google.cloud.bigquery.QueryJob, instance=True)
         query_job._properties = {}
         type(query_job).destination = mock.PropertyMock(
-            return_value=anonymous_dataset.table("test_table"),
+            return_value=anonymous_dataset.table(table_name),
         )
         type(query_job).statement_type = mock.PropertyMock(return_value="SELECT")
 
 
@@ -254,16 +254,17 @@
       "outputs": [],
       "source": [
         "df_image[\"blurred\"] = df_image[\"image\"].blob.image_blur(\n",
-        "    (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\"\n",
+        "    (20, 20), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_transformed/\", engine=\"opencv\"\n",
         ")\n",
         "df_image[\"resized\"] = df_image[\"image\"].blob.image_resize(\n",
-        "    (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\"\n",
+        "    (300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_resize_transformed/\", engine=\"opencv\"\n",
         ")\n",
         "df_image[\"normalized\"] = df_image[\"image\"].blob.image_normalize(\n",
         "    alpha=50.0,\n",
         "    beta=150.0,\n",
         "    norm_type=\"minmax\",\n",
         "    dst=f\"gs://{OUTPUT_BUCKET}/image_normalize_transformed/\",\n",
+        "    engine=\"opencv\",\n",
         ")"
       ]
     },
@@ -280,7 +281,7 @@
       "outputs": [],
       "source": [
         "# You can also chain functions together\n",
-        "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\")"
+        "df_image[\"blur_resized\"] = df_image[\"blurred\"].blob.image_resize((300, 200), dst=f\"gs://{OUTPUT_BUCKET}/image_blur_resize_transformed/\", engine=\"opencv\")"
       ]
     },
     {
@@ -419,7 +420,7 @@
       },
       "outputs": [],
       "source": [
-        "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk()"
+        "df_pdf[\"chunked\"] = df_pdf[\"pdf\"].blob.pdf_chunk(engine=\"pypdf\")"
       ]
     },
     {
 
@@ -56,21 +56,22 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
 
     # [START bigquery_dataframes_multimodal_dataframe_image_transform]
     df_image["blurred"] = df_image["image"].blob.image_blur(
-        (20, 20), dst=f"{dst_bucket}/image_blur_transformed/"
+        (20, 20), dst=f"{dst_bucket}/image_blur_transformed/", engine="opencv"
     )
     df_image["resized"] = df_image["image"].blob.image_resize(
-        (300, 200), dst=f"{dst_bucket}/image_resize_transformed/"
+        (300, 200), dst=f"{dst_bucket}/image_resize_transformed/", engine="opencv"
     )
     df_image["normalized"] = df_image["image"].blob.image_normalize(
         alpha=50.0,
         beta=150.0,
         norm_type="minmax",
         dst=f"{dst_bucket}/image_normalize_transformed/",
+        engine="opencv",
     )
 
     # You can also chain functions together
     df_image["blur_resized"] = df_image["blurred"].blob.image_resize(
-        (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/"
+        (300, 200), dst=f"{dst_bucket}/image_blur_resize_transformed/", engine="opencv"
     )
     df_image
     # [END bigquery_dataframes_multimodal_dataframe_image_transform]
@@ -113,7 +114,7 @@ def test_multimodal_dataframe(gcs_dst_bucket: str) -> None:
     df_pdf = bpd.from_glob_path(
         "gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*", name="pdf"
     )
-    df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk()
+    df_pdf["chunked"] = df_pdf["pdf"].blob.pdf_chunk(engine="pypdf")
     chunked = df_pdf["chunked"].explode()
     chunked
     # [END bigquery_dataframes_multimodal_dataframe_pdf_chunk]