@@ -539,6 +539,7 @@ def image_normalize(
539539 max_batching_rows : int = 8192 ,
540540 container_cpu : Union [float , int ] = 0.33 ,
541541 container_memory : str = "512Mi" ,
542+ verbose : bool = False ,
542543 ) -> bigframes .series .Series :
543544 """Normalize images.
544545
@@ -556,14 +557,28 @@ def image_normalize(
556557 max_batching_rows (int, default 8,192): Max number of rows per batch send to cloud run to execute the function.
557558 container_cpu (int or float, default 0.33): number of container CPUs. Possible values are [0.33, 8]. Floats larger than 1 are cast to intergers.
558559 container_memory (str, default "512Mi"): container memory size. String of the format <number><unit>. Possible values are from 512Mi to 32Gi.
560+ verbose (bool, default "False"): controls the verbosity of the output.
561+ when set to True, both error messages and the normalized image
562+ content are displayed. Conversely, when set to False, only the
563+ normalized image content is presented, suppressing error messages.
559564
560565 Returns:
561566 bigframes.series.Series: blob Series if destination is GCS. Or bytes Series if destination is BQ.
562567 """
563568 if engine is None or engine .casefold () != "opencv" :
564569 raise ValueError ("Must specify the engine, supported value is 'opencv'." )
565570
571+ bigframes .series .Series : blob Series if destination is GCS . Or
572+ struct [str , bytes ] or bytes Series if destination is BQ ,
573+ depend on the "verbose" parameter . Contains the normalized image
574+ data . Includes error messages if verbosity is enbled .
575+
576+ """
577+ import base64
578+
579+ import bigframes.bigquery as bbq
566580 import bigframes.blob._functions as blob_func
581+ import bigframes.pandas as bpd
567582
568583 connection = self._resolve_connection(connection)
569584 df = self.get_runtime_json_str(mode="R").to_frame()
@@ -586,7 +601,27 @@ def image_normalize(
586601 df["ext"] = ext # type: ignore
587602 res = self._df_apply_udf(df, image_normalize_udf)
588603
589- return res
604+ bq_session = self._block.bq_session
605+ encoded_content_series = res._apply_unary_op(
606+ ops.JSONValue(json_path="$.content")
607+ )
608+ base64_decode_udf = bq_session.register_function(
609+ "base64_decode_bq",
610+ lambda x: bbq.query(f"SELECT TO_BASE64(FROM_BASE64('{x}'))")
611+ .to_dataframe()
612+ .iloc[0, 0],
613+ )
614+ decoded_content_series = encoded_content_series.apply(base64_decode_udf)
615+
616+ if verbose:
617+ status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
618+ res_df = bpd.DataFrame(
619+ {"status": status_series, "content": decoded_content_series}
620+ )
621+ struct_series = bbq.struct(res_df)
622+ return struct_series
623+ else:
624+ return decoded_content_series
590625
591626 if isinstance(dst, str):
592627 dst = os.path.join(dst, "")
@@ -618,7 +653,27 @@ def image_normalize(
618653 res = self._df_apply_udf(df, image_normalize_udf)
619654 res.cache() # to execute the udf
620655
621- return dst
656+ bq_session = self._block.bq_session
657+ encoded_content_series = res._apply_unary_op(
658+ ops.JSONValue(json_path="$.content")
659+ )
660+ base64_decode_udf = bq_session.register_function(
661+ "base64_decode_bq",
662+ lambda x: bbq.query(f"SELECT TO_BASE64(FROM_BASE64('{x}'))")
663+ .to_dataframe()
664+ .iloc[0, 0],
665+ )
666+ decoded_content_series = encoded_content_series.apply(base64_decode_udf)
667+
668+ if verbose:
669+ status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
670+ res_df = bpd.DataFrame(
671+ {"status": status_series, "content": decoded_content_series}
672+ )
673+ struct_series = bbq.struct(res_df)
674+ return struct_series
675+ else:
676+ return decoded_content_series
622677
623678 def pdf_extract(
624679 self,
0 commit comments