add error handling for one function, test is not clean

shuoweil · shuoweil · commit 6d001c8adedd · 2025-10-09T06:47:24.000Z
diff --git a/bigframes/blob/_functions.py b/bigframes/blob/_functions.py
@@ -395,40 +395,47 @@ def image_normalize_func(
 
 def image_normalize_to_bytes_func(
     src_obj_ref_rt: str, alpha: float, beta: float, norm_type: str, ext: str
-) -> bytes:
-    import json
+) -> str:
+    try:
+        import base64
+        import json
 
-    import cv2 as cv  # type: ignore
-    import numpy as np
-    import requests
-    from requests import adapters
+        import cv2 as cv  # type: ignore
+        import numpy as np
+        import requests
+        from requests import adapters
 
-    session = requests.Session()
-    session.mount("https://", adapters.HTTPAdapter(max_retries=3))
+        session = requests.Session()
+        session.mount("https://", adapters.HTTPAdapter(max_retries=3))
 
-    ext = ext or ".jpeg"
+        ext = ext or ".jpeg"
 
-    norm_type_mapping = {
-        "inf": cv.NORM_INF,
-        "l1": cv.NORM_L1,
-        "l2": cv.NORM_L2,
-        "minmax": cv.NORM_MINMAX,
-    }
+        norm_type_mapping = {
+            "inf": cv.NORM_INF,
+            "l1": cv.NORM_L1,
+            "l2": cv.NORM_L2,
+            "minmax": cv.NORM_MINMAX,
+        }
 
-    src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
-    src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
+        src_obj_ref_rt_json = json.loads(src_obj_ref_rt)
+        src_url = src_obj_ref_rt_json["access_urls"]["read_url"]
 
-    response = session.get(src_url, timeout=30)
-    bts = response.content
+        response = session.get(src_url, timeout=30)
+        bts = response.content
 
-    nparr = np.frombuffer(bts, np.uint8)
-    img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
-    img_normalized = cv.normalize(
-        img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type]
-    )
-    bts = cv.imencode(".jpeg", img_normalized)[1].tobytes()
+        nparr = np.frombuffer(bts, np.uint8)
+        img = cv.imdecode(nparr, cv.IMREAD_UNCHANGED)
+        img_normalized = cv.normalize(
+            img, None, alpha=alpha, beta=beta, norm_type=norm_type_mapping[norm_type]
+        )
+        bts = cv.imencode(".jpeg", img_normalized)[1].tobytes()
+        result_dict = {"status": "", "content": base64.b64encode(bts).decode("utf-8")}
 
-    return bts
+    except Exception as e:
+        result_dict = {"status": str(e), "content": ""}
+
+    result_json = json.dumps(result_dict)
+    return result_json
 
 
 image_normalize_to_bytes_def = FunctionDef(
diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py
@@ -539,6 +539,7 @@ def image_normalize(
         max_batching_rows: int = 8192,
         container_cpu: Union[float, int] = 0.33,
         container_memory: str = "512Mi",
+        verbose: bool = False,
     ) -> bigframes.series.Series:
         """Normalize images.
 
@@ -556,14 +557,28 @@ def image_normalize(
             max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
             container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
             container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
+            verbose (bool, default "False"): controls the verbosity of the output.
+                when set to True, both error messages and the normalized image
+                content are displayed. Conversely, when set to False, only the
+                normalized image content is presented, suppressing error messages.
 
         Returns:
             bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
         """
         if engine is None or engine.casefold() != "opencv":
             raise ValueError("Must specify the engine, supported value is 'opencv'.")
 
+            bigframes.series.Series: blob Series if destination is GCS. Or
+                struct[str, bytes] or bytes Series if destination is BQ,
+                depend on the "verbose" parameter. Contains the normalized image
+                data. Includes error messages if verbosity is enbled.
+
+        """
+        import base64
+
+        import bigframes.bigquery as bbq
         import bigframes.blob._functions as blob_func
+        import bigframes.pandas as bpd
 
         connection = self._resolve_connection(connection)
         df = self.get_runtime_json_str(mode="R").to_frame()
@@ -586,7 +601,27 @@ def image_normalize(
             df["ext"] = ext  # type: ignore
             res = self._df_apply_udf(df, image_normalize_udf)
 
-            return res
+            bq_session = self._block.bq_session
+            encoded_content_series = res._apply_unary_op(
+                ops.JSONValue(json_path="$.content")
+            )
+            base64_decode_udf = bq_session.register_function(
+                "base64_decode_bq",
+                lambda x: bbq.query(f"SELECT TO_BASE64(FROM_BASE64('{x}'))")
+                .to_dataframe()
+                .iloc[0, 0],
+            )
+            decoded_content_series = encoded_content_series.apply(base64_decode_udf)
+
+            if verbose:
+                status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
+                res_df = bpd.DataFrame(
+                    {"status": status_series, "content": decoded_content_series}
+                )
+                struct_series = bbq.struct(res_df)
+                return struct_series
+            else:
+                return decoded_content_series
 
         if isinstance(dst, str):
             dst = os.path.join(dst, "")
@@ -618,7 +653,27 @@ def image_normalize(
         res = self._df_apply_udf(df, image_normalize_udf)
         res.cache()  # to execute the udf
 
-        return dst
+        bq_session = self._block.bq_session
+        encoded_content_series = res._apply_unary_op(
+            ops.JSONValue(json_path="$.content")
+        )
+        base64_decode_udf = bq_session.register_function(
+            "base64_decode_bq",
+            lambda x: bbq.query(f"SELECT TO_BASE64(FROM_BASE64('{x}'))")
+            .to_dataframe()
+            .iloc[0, 0],
+        )
+        decoded_content_series = encoded_content_series.apply(base64_decode_udf)
+
+        if verbose:
+            status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
+            res_df = bpd.DataFrame(
+                {"status": status_series, "content": decoded_content_series}
+            )
+            struct_series = bbq.struct(res_df)
+            return struct_series
+        else:
+            return decoded_content_series
 
     def pdf_extract(
         self,