@@ -345,11 +345,10 @@ def exif(
345345
346346 res = self ._df_apply_udf (df , exif_udf )
347347
348- exif_content_series = bbq .parse_json (
349- res ._apply_unary_op (ops .JSONValue (json_path = "$.content" ))
350- ).rename ("exif_content" )
351-
352348 if verbose :
349+ exif_content_series = bbq .parse_json (
350+ res ._apply_unary_op (ops .JSONValue (json_path = "$.content" ))
351+ ).rename ("exif_content" )
353352 exif_status_series = res ._apply_unary_op (
354353 ops .JSONValue (json_path = "$.status" )
355354 )
@@ -359,7 +358,7 @@ def exif(
359358 results_struct = bbq .struct (results_df ).rename ("exif_results" )
360359 return results_struct
361360 else :
362- return exif_content_series
361+ return bbq . parse_json ( res )
363362
364363 def image_blur (
365364 self ,
@@ -416,12 +415,9 @@ def image_blur(
416415
417416 df ["ksize_x" ], df ["ksize_y" ] = ksize
418417 df ["ext" ] = ext # type: ignore
418+ df ["verbose" ] = verbose
419419 res = self ._df_apply_udf (df , image_blur_udf )
420420
421- blurred_content_series = res ._apply_unary_op (
422- ops .JSONValue (json_path = "$.content" )
423- ).rename ("blurred_content" )
424-
425421 if verbose :
426422 blurred_content_b64_series = res ._apply_unary_op (
427423 ops .JSONValue (json_path = "$.content" )
@@ -467,6 +463,7 @@ def image_blur(
467463 df = df .join (dst_rt , how = "outer" )
468464 df ["ksize_x" ], df ["ksize_y" ] = ksize
469465 df ["ext" ] = ext # type: ignore
466+ df ["verbose" ] = verbose
470467
471468 res = self ._df_apply_udf (df , image_blur_udf )
472469 res .cache () # to execute the udf
@@ -475,13 +472,15 @@ def image_blur(
475472 blurred_status_series = res ._apply_unary_op (
476473 ops .JSONValue (json_path = "$.status" )
477474 )
475+ content_series = res ._apply_unary_op (ops .JSONValue (json_path = "$.content" ))
476+ dst_blobs = content_series .str .to_blob (connection = connection )
478477 results_df = bpd .DataFrame (
479- {"status" : blurred_status_series , "content" : dst }
478+ {"status" : blurred_status_series , "content" : dst_blobs }
480479 )
481480 results_struct = bbq .struct (results_df ).rename ("blurred_results" )
482481 return results_struct
483482 else :
484- return dst
483+ return res . str . to_blob ( connection = connection )
485484
486485 def image_resize (
487486 self ,
@@ -550,12 +549,9 @@ def image_resize(
550549 df ["dsize_x" ], df ["dsizye_y" ] = dsize
551550 df ["fx" ], df ["fy" ] = fx , fy
552551 df ["ext" ] = ext # type: ignore
552+ df ["verbose" ] = verbose
553553 res = self ._df_apply_udf (df , image_resize_udf )
554554
555- resized_content_series = res ._apply_unary_op (
556- ops .JSONValue (json_path = "$.content" )
557- ).rename ("resized_content" )
558-
559555 if verbose :
560556 resized_content_b64_series = res ._apply_unary_op (
561557 ops .JSONValue (json_path = "$.content" )
@@ -603,6 +599,7 @@ def image_resize(
603599 df ["dsize_x" ], df ["dsizye_y" ] = dsize
604600 df ["fx" ], df ["fy" ] = fx , fy
605601 df ["ext" ] = ext # type: ignore
602+ df ["verbose" ] = verbose
606603
607604 res = self ._df_apply_udf (df , image_resize_udf )
608605 res .cache () # to execute the udf
@@ -611,13 +608,15 @@ def image_resize(
611608 resized_status_series = res ._apply_unary_op (
612609 ops .JSONValue (json_path = "$.status" )
613610 )
611+ content_series = res ._apply_unary_op (ops .JSONValue (json_path = "$.content" ))
612+ dst_blobs = content_series .str .to_blob (connection = connection )
614613 results_df = bpd .DataFrame (
615- {"status" : resized_status_series , "content" : dst }
614+ {"status" : resized_status_series , "content" : dst_blobs }
616615 )
617616 results_struct = bbq .struct (results_df ).rename ("resized_results" )
618617 return results_struct
619618 else :
620- return dst
619+ return res . str . to_blob ( connection = connection )
621620
622621 def image_normalize (
623622 self ,
@@ -680,15 +679,16 @@ def image_normalize(
680679 df ["beta" ] = beta
681680 df ["norm_type" ] = norm_type
682681 df ["ext" ] = ext # type: ignore
682+ df ["verbose" ] = verbose
683683 res = self ._df_apply_udf (df , image_normalize_udf )
684684
685- normalized_content_b64_series = res ._apply_unary_op (
686- ops .JSONValue (json_path = "$.content" )
687- )
688- normalized_bytes = bbq .sql_scalar (
689- "FROM_BASE64({0})" , columns = [normalized_content_b64_series ]
690- )
691685 if verbose :
686+ normalized_content_b64_series = res ._apply_unary_op (
687+ ops .JSONValue (json_path = "$.content" )
688+ )
689+ normalized_bytes = bbq .sql_scalar (
690+ "FROM_BASE64({0})" , columns = [normalized_content_b64_series ]
691+ )
692692 normalized_status_series = res ._apply_unary_op (
693693 ops .JSONValue (json_path = "$.status" )
694694 )
@@ -698,7 +698,10 @@ def image_normalize(
698698 results_struct = bbq .struct (results_df ).rename ("normalized_results" )
699699 return results_struct
700700 else :
701- return normalized_bytes .rename ("normalized_bytes" )
701+ normalized_bytes = bbq .sql_scalar (
702+ "FROM_BASE64({0})" , columns = [res ]
703+ ).rename ("normalized_bytes" )
704+ return normalized_bytes
702705
703706 if isinstance (dst , str ):
704707 dst = os .path .join (dst , "" )
@@ -726,31 +729,27 @@ def image_normalize(
726729 df ["beta" ] = beta
727730 df ["norm_type" ] = norm_type
728731 df ["ext" ] = ext # type: ignore
732+ df ["verbose" ] = verbose
729733
730734 res = self ._df_apply_udf (df , image_normalize_udf )
731735 res .cache () # to execute the udf
732736
733- normalized_content_series = res ._apply_unary_op (
734- ops .JSONValue (json_path = "$.content" )
735- )
736- normalized_content_blobs = normalized_content_series .str .to_blob (
737- connection = connection
738- )
739-
740737 if verbose :
741738 normalized_status_series = res ._apply_unary_op (
742739 ops .JSONValue (json_path = "$.status" )
743740 )
741+ content_series = res ._apply_unary_op (ops .JSONValue (json_path = "$.content" ))
742+ dst_blobs = content_series .str .to_blob (connection = connection )
744743 results_df = bpd .DataFrame (
745744 {
746745 "status" : normalized_status_series ,
747- "content" : normalized_content_blobs ,
746+ "content" : dst_blobs ,
748747 }
749748 )
750749 results_struct = bbq .struct (results_df ).rename ("normalized_results" )
751750 return results_struct
752751 else :
753- return normalized_content_blobs . rename ( "normalized_content" )
752+ return res . str . to_blob ( connection = connection )
754753
755754 def pdf_extract (
756755 self ,
@@ -802,23 +801,22 @@ def pdf_extract(
802801 container_memory = container_memory ,
803802 ).udf ()
804803
805- src_rt = self .get_runtime_json_str (mode = "R" )
806-
807- res = src_rt .apply (pdf_extract_udf )
808-
809- extracted_content_series = res ._apply_unary_op (
810- ops .JSONValue (json_path = "$.content" )
811- )
804+ df = self .get_runtime_json_str (mode = "R" ).to_frame ()
805+ df ["verbose" ] = verbose
806+ res = self ._df_apply_udf (df , pdf_extract_udf )
812807
813808 if verbose :
809+ extracted_content_series = res ._apply_unary_op (
810+ ops .JSONValue (json_path = "$.content" )
811+ )
814812 status_series = res ._apply_unary_op (ops .JSONValue (json_path = "$.status" ))
815813 results_df = bpd .DataFrame (
816814 {"status" : status_series , "content" : extracted_content_series }
817815 )
818816 results_struct = bbq .struct (results_df ).rename ("extracted_results" )
819817 return results_struct
820818 else :
821- return extracted_content_series .rename ("extracted_content" )
819+ return res .rename ("extracted_content" )
822820
823821 def pdf_chunk (
824822 self ,
@@ -885,24 +883,23 @@ def pdf_chunk(
885883 container_memory = container_memory ,
886884 ).udf ()
887885
888- src_rt = self .get_runtime_json_str (mode = "R" )
889- df = src_rt .to_frame ()
886+ df = self .get_runtime_json_str (mode = "R" ).to_frame ()
890887 df ["chunk_size" ] = chunk_size
891888 df ["overlap_size" ] = overlap_size
889+ df ["verbose" ] = verbose
892890
893891 res = self ._df_apply_udf (df , pdf_chunk_udf )
894892
895- chunked_content_series = bbq .json_extract_string_array (res , "$.content" )
896-
897893 if verbose :
894+ chunked_content_series = bbq .json_extract_string_array (res , "$.content" )
898895 status_series = res ._apply_unary_op (ops .JSONValue (json_path = "$.status" ))
899896 results_df = bpd .DataFrame (
900897 {"status" : status_series , "content" : chunked_content_series }
901898 )
902899 resultes_struct = bbq .struct (results_df ).rename ("chunked_results" )
903900 return resultes_struct
904901 else :
905- return chunked_content_series .rename ("chunked_content" )
902+ return bbq . json_extract_string_array ( res , "$" ) .rename ("chunked_content" )
906903
907904 def audio_transcribe (
908905 self ,
0 commit comments