googleapis
diff --git a/‎bigframes/blob/_functions.py‎
Lines changed: 188 additions & 97 deletions b/‎bigframes/blob/_functions.py‎
Lines changed: 188 additions & 97 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 5 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bigframes/core/compile/sqlglot/expressions/numeric_ops.py‎
Lines changed: 8 additions & 0 deletions b/‎bigframes/core/compile/sqlglot/expressions/numeric_ops.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎bigframes/operations/blob.py‎
Lines changed: 107 additions & 30 deletions b/‎bigframes/operations/blob.py‎
Lines changed: 107 additions & 30 deletions
@@ -109,6 +109,11 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     return sge.LTE(this=left_expr, expression=right_expr)
 
 
+@register_binary_op(ops.minimum_op)
+def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
+    return sge.Least(this=left.expr, expressions=right.expr)
+
+
 @register_binary_op(ops.ne_op)
 def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     left_expr = _coerce_bool_to_int(left)
 
@@ -377,6 +377,14 @@ def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
         return result
 
 
+@register_binary_op(ops.round_op)
+def _(expr: TypedExpr, n_digits: TypedExpr) -> sge.Expression:
+    rounded = sge.Round(this=expr.expr, decimals=n_digits.expr)
+    if expr.dtype == dtypes.INT_DTYPE:
+        return sge.Cast(this=rounded, to="INT64")
+    return rounded
+
+
 @register_binary_op(ops.sub_op)
 def _(left: TypedExpr, right: TypedExpr) -> sge.Expression:
     if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
 
@@ -193,6 +193,20 @@ def _df_apply_udf(
 
         return s
 
+    def _apply_udf_or_raise_error(
+        self, df: bigframes.dataframe.DataFrame, udf, operation_name: str
+    ) -> bigframes.series.Series:
+        """Helper to apply UDF with consistent error handling."""
+        try:
+            res = self._df_apply_udf(df, udf)
+        except Exception as e:
+            raise RuntimeError(f"{operation_name} UDF execution failed: {e}") from e
+
+        if res is None:
+            raise RuntimeError(f"{operation_name} returned None result")
+
+        return res
+
     def read_url(self) -> bigframes.series.Series:
         """Retrieve the read URL of the Blob.
 
@@ -343,6 +357,10 @@ def exif(
 
         Returns:
             bigframes.series.Series: JSON series of key-value pairs if verbose=False, or struct with status and content if verbose=True.
+
+        Raises:
+            ValueError: If engine is not 'pillow'.
+            RuntimeError: If EXIF extraction fails or returns invalid structure.
         """
         if engine is None or engine.casefold() != "pillow":
             raise ValueError("Must specify the engine, supported value is 'pillow'.")
@@ -364,22 +382,28 @@ def exif(
             container_memory=container_memory,
         ).udf()
 
-        res = self._df_apply_udf(df, exif_udf)
+        res = self._apply_udf_or_raise_error(df, exif_udf, "EXIF extraction")
 
         if verbose:
-            exif_content_series = bbq.parse_json(
-                res._apply_unary_op(ops.JSONValue(json_path="$.content"))
-            ).rename("exif_content")
-            exif_status_series = res._apply_unary_op(
-                ops.JSONValue(json_path="$.status")
-            )
+            try:
+                exif_content_series = bbq.parse_json(
+                    res._apply_unary_op(ops.JSONValue(json_path="$.content"))
+                ).rename("exif_content")
+                exif_status_series = res._apply_unary_op(
+                    ops.JSONValue(json_path="$.status")
+                )
+            except Exception as e:
+                raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e
             results_df = bpd.DataFrame(
                 {"status": exif_status_series, "content": exif_content_series}
             )
             results_struct = bbq.struct(results_df).rename("exif_results")
             return results_struct
         else:
-            return bbq.parse_json(res)
+            try:
+                return bbq.parse_json(res)
+            except Exception as e:
+                raise RuntimeError(f"Failed to parse EXIF JSON result: {e}") from e
 
     def image_blur(
         self,
@@ -411,6 +435,10 @@ def image_blur(
 
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content.
+
+        Raises:
+            ValueError: If engine is not 'opencv' or parameters are invalid.
+            RuntimeError: If image blur operation fails.
         """
         if engine is None or engine.casefold() != "opencv":
             raise ValueError("Must specify the engine, supported value is 'opencv'.")
@@ -437,7 +465,7 @@ def image_blur(
             df["ksize_x"], df["ksize_y"] = ksize
             df["ext"] = ext  # type: ignore
             df["verbose"] = verbose
-            res = self._df_apply_udf(df, image_blur_udf)
+            res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur")
 
             if verbose:
                 blurred_content_b64_series = res._apply_unary_op(
@@ -486,7 +514,7 @@ def image_blur(
         df["ext"] = ext  # type: ignore
         df["verbose"] = verbose
 
-        res = self._df_apply_udf(df, image_blur_udf)
+        res = self._apply_udf_or_raise_error(df, image_blur_udf, "Image blur")
         res.cache()  # to execute the udf
 
         if verbose:
@@ -540,6 +568,10 @@ def image_resize(
 
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content.
+
+        Raises:
+            ValueError: If engine is not 'opencv' or parameters are invalid.
+            RuntimeError: If image resize operation fails.
         """
         if engine is None or engine.casefold() != "opencv":
             raise ValueError("Must specify the engine, supported value is 'opencv'.")
@@ -570,11 +602,11 @@ def image_resize(
                 container_memory=container_memory,
             ).udf()
 
-            df["dsize_x"], df["dsizye_y"] = dsize
+            df["dsize_x"], df["dsize_y"] = dsize
             df["fx"], df["fy"] = fx, fy
             df["ext"] = ext  # type: ignore
             df["verbose"] = verbose
-            res = self._df_apply_udf(df, image_resize_udf)
+            res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize")
 
             if verbose:
                 resized_content_b64_series = res._apply_unary_op(
@@ -620,12 +652,12 @@ def image_resize(
         dst_rt = dst.blob.get_runtime_json_str(mode="RW")
 
         df = df.join(dst_rt, how="outer")
-        df["dsize_x"], df["dsizye_y"] = dsize
+        df["dsize_x"], df["dsize_y"] = dsize
         df["fx"], df["fy"] = fx, fy
         df["ext"] = ext  # type: ignore
         df["verbose"] = verbose
 
-        res = self._df_apply_udf(df, image_resize_udf)
+        res = self._apply_udf_or_raise_error(df, image_resize_udf, "Image resize")
         res.cache()  # to execute the udf
 
         if verbose:
@@ -679,6 +711,10 @@ def image_normalize(
 
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ. If verbose=True, returns struct with status and content.
+
+        Raises:
+            ValueError: If engine is not 'opencv' or parameters are invalid.
+            RuntimeError: If image normalize operation fails.
         """
         if engine is None or engine.casefold() != "opencv":
             raise ValueError("Must specify the engine, supported value is 'opencv'.")
@@ -707,7 +743,9 @@ def image_normalize(
             df["norm_type"] = norm_type
             df["ext"] = ext  # type: ignore
             df["verbose"] = verbose
-            res = self._df_apply_udf(df, image_normalize_udf)
+            res = self._apply_udf_or_raise_error(
+                df, image_normalize_udf, "Image normalize"
+            )
 
             if verbose:
                 normalized_content_b64_series = res._apply_unary_op(
@@ -758,7 +796,7 @@ def image_normalize(
         df["ext"] = ext  # type: ignore
         df["verbose"] = verbose
 
-        res = self._df_apply_udf(df, image_normalize_udf)
+        res = self._apply_udf_or_raise_error(df, image_normalize_udf, "Image normalize")
         res.cache()  # to execute the udf
 
         if verbose:
@@ -809,6 +847,10 @@ def pdf_extract(
                 depend on the "verbose" parameter.
                 Contains the extracted text from the PDF file.
                 Includes error messages if verbosity is enabled.
+
+        Raises:
+            ValueError: If engine is not 'pypdf'.
+            RuntimeError: If PDF extraction fails or returns invalid structure.
         """
         if engine is None or engine.casefold() != "pypdf":
             raise ValueError("Must specify the engine, supported value is 'pypdf'.")
@@ -830,18 +872,29 @@ def pdf_extract(
 
         df = self.get_runtime_json_str(mode="R").to_frame()
         df["verbose"] = verbose
-        res = self._df_apply_udf(df, pdf_extract_udf)
+
+        res = self._apply_udf_or_raise_error(df, pdf_extract_udf, "PDF extraction")
 
         if verbose:
-            extracted_content_series = res._apply_unary_op(
-                ops.JSONValue(json_path="$.content")
-            )
-            status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
-            results_df = bpd.DataFrame(
-                {"status": status_series, "content": extracted_content_series}
-            )
-            results_struct = bbq.struct(results_df).rename("extracted_results")
-            return results_struct
+            # Extract content with error handling
+            try:
+                content_series = res._apply_unary_op(
+                    ops.JSONValue(json_path="$.content")
+                )
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to extract content field from PDF result: {e}"
+                ) from e
+            try:
+                status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to extract status field from PDF result: {e}"
+                ) from e
+
+            res_df = bpd.DataFrame({"status": status_series, "content": content_series})
+            struct_series = bbq.struct(res_df).rename("extracted_results")
+            return struct_series
         else:
             return res.rename("extracted_content")
 
@@ -884,6 +937,10 @@ def pdf_chunk(
                 depend on the "verbose" parameter.
                 where each string is a chunk of text extracted from PDF.
                 Includes error messages if verbosity is enabled.
+
+        Raises:
+            ValueError: If engine is not 'pypdf'.
+            RuntimeError: If PDF chunking fails or returns invalid structure.
         """
         if engine is None or engine.casefold() != "pypdf":
             raise ValueError("Must specify the engine, supported value is 'pypdf'.")
@@ -915,13 +972,25 @@ def pdf_chunk(
         df["overlap_size"] = overlap_size
         df["verbose"] = verbose
 
-        res = self._df_apply_udf(df, pdf_chunk_udf)
+        res = self._apply_udf_or_raise_error(df, pdf_chunk_udf, "PDF chunking")
+
+        try:
+            content_series = bbq.json_extract_string_array(res, "$.content")
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to extract content array from PDF chunk result: {e}"
+            ) from e
 
         if verbose:
-            chunked_content_series = bbq.json_extract_string_array(res, "$.content")
-            status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
+            try:
+                status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
+            except Exception as e:
+                raise RuntimeError(
+                    f"Failed to extract status field from PDF chunk result: {e}"
+                ) from e
+
             results_df = bpd.DataFrame(
-                {"status": status_series, "content": chunked_content_series}
+                {"status": status_series, "content": content_series}
             )
             resultes_struct = bbq.struct(results_df).rename("chunked_results")
             return resultes_struct
@@ -962,6 +1031,10 @@ def audio_transcribe(
                 depend on the "verbose" parameter.
                 Contains the transcribed text from the audio file.
                 Includes error messages if verbosity is enabled.
+
+        Raises:
+            ValueError: If engine is not 'bigquery'.
+            RuntimeError: If the transcription result structure is invalid.
         """
         if engine.casefold() != "bigquery":
             raise ValueError("Must specify the engine, supported value is 'bigquery'.")
@@ -984,6 +1057,10 @@ def audio_transcribe(
             model_params={"generationConfig": {"temperature": 0.0}},
         )
 
+        # Validate that the result is not None
+        if transcribed_results is None:
+            raise RuntimeError("Transcription returned None result")
+
         transcribed_content_series = transcribed_results.struct.field("result").rename(
             "transcribed_content"
         )