Skip to content

Commit 3d58a60

Browse files
committed
test fix
1 parent 6439bf5 commit 3d58a60

File tree

2 files changed

+83
-55
lines changed

2 files changed

+83
-55
lines changed

bigframes/blob/_functions.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def udf(self):
118118
return self._session.read_gbq_function(udf_name)
119119

120120

121-
def exif_func(src_obj_ref_rt: str) -> str:
121+
def exif_func(src_obj_ref_rt: str, verbose: bool) -> str:
122122
import io
123123
import json
124124

@@ -144,12 +144,18 @@ def exif_func(src_obj_ref_rt: str) -> str:
144144
if exif_data:
145145
for tag, value in exif_data.items():
146146
tag_name = ExifTags.TAGS.get(tag, tag)
147+
# Pillow might return bytes, which are not serializable.
148+
if isinstance(value, bytes):
149+
value = value.decode("utf-8", "replace")
147150
exif_dict[tag_name] = value
148151
result_dict["content"] = json.dumps(exif_dict)
149152
except Exception as e:
150153
result_dict["status"] = str(e)
151154

152-
return json.dumps(result_dict)
155+
if verbose:
156+
return json.dumps(result_dict)
157+
else:
158+
return result_dict["content"]
153159

154160

155161
exif_func_def = FunctionDef(exif_func, ["pillow", "requests"])
@@ -162,6 +168,7 @@ def image_blur_func(
162168
ksize_x: int,
163169
ksize_y: int,
164170
ext: str,
171+
verbose: bool,
165172
) -> str:
166173
import json
167174

@@ -210,14 +217,17 @@ def image_blur_func(
210217
except Exception as e:
211218
result_dict["status"] = str(e)
212219

213-
return json.dumps(result_dict)
220+
if verbose:
221+
return json.dumps(result_dict)
222+
else:
223+
return result_dict["content"]
214224

215225

216226
image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
217227

218228

219229
def image_blur_to_bytes_func(
220-
src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str
230+
src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str, verbose: bool
221231
) -> str:
222232
import base64
223233
import json
@@ -251,7 +261,11 @@ def image_blur_to_bytes_func(
251261
status = str(e)
252262

253263
encoded_content = base64.b64encode(content).decode("utf-8")
254-
return json.dumps({"status": status, "content": encoded_content})
264+
result_dict = {"status": status, "content": encoded_content}
265+
if verbose:
266+
return json.dumps(result_dict)
267+
else:
268+
return result_dict["content"]
255269

256270

257271
image_blur_to_bytes_def = FunctionDef(
@@ -267,6 +281,7 @@ def image_resize_func(
267281
fx: float,
268282
fy: float,
269283
ext: str,
284+
verbose: bool,
270285
) -> str:
271286
import json
272287

@@ -315,7 +330,10 @@ def image_resize_func(
315330
except Exception as e:
316331
result_dict["status"] = str(e)
317332

318-
return json.dumps(result_dict)
333+
if verbose:
334+
return json.dumps(result_dict)
335+
else:
336+
return result_dict["content"]
319337

320338

321339
image_resize_def = FunctionDef(
@@ -330,6 +348,7 @@ def image_resize_to_bytes_func(
330348
fx: float,
331349
fy: float,
332350
ext: str,
351+
verbose: bool,
333352
) -> str:
334353
import base64
335354
import json
@@ -363,7 +382,11 @@ def image_resize_to_bytes_func(
363382
status = str(e)
364383

365384
encoded_content = base64.b64encode(content).decode("utf-8")
366-
return json.dumps({"status": status, "content": encoded_content})
385+
result_dict = {"status": status, "content": encoded_content}
386+
if verbose:
387+
return json.dumps(result_dict)
388+
else:
389+
return result_dict["content"]
367390

368391

369392
image_resize_to_bytes_def = FunctionDef(
@@ -378,6 +401,7 @@ def image_normalize_func(
378401
beta: float,
379402
norm_type: str,
380403
ext: str,
404+
verbose: bool,
381405
) -> str:
382406
import json
383407

@@ -435,7 +459,10 @@ def image_normalize_func(
435459
except Exception as e:
436460
result_dict["status"] = str(e)
437461

438-
return json.dumps(result_dict)
462+
if verbose:
463+
return json.dumps(result_dict)
464+
else:
465+
return result_dict["content"]
439466

440467

441468
image_normalize_def = FunctionDef(
@@ -449,6 +476,7 @@ def image_normalize_to_bytes_func(
449476
beta: float,
450477
norm_type: str,
451478
ext: str,
479+
verbose: bool,
452480
) -> str:
453481
import base64
454482
import json
@@ -492,7 +520,10 @@ def image_normalize_to_bytes_func(
492520
except Exception as e:
493521
result_dict["status"] = str(e)
494522

495-
return json.dumps(result_dict)
523+
if verbose:
524+
return json.dumps(result_dict)
525+
else:
526+
return result_dict["content"]
496527

497528

498529
image_normalize_to_bytes_def = FunctionDef(

bigframes/operations/blob.py

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,10 @@ def exif(
345345

346346
res = self._df_apply_udf(df, exif_udf)
347347

348-
exif_content_series = bbq.parse_json(
349-
res._apply_unary_op(ops.JSONValue(json_path="$.content"))
350-
).rename("exif_content")
351-
352348
if verbose:
349+
exif_content_series = bbq.parse_json(
350+
res._apply_unary_op(ops.JSONValue(json_path="$.content"))
351+
).rename("exif_content")
353352
exif_status_series = res._apply_unary_op(
354353
ops.JSONValue(json_path="$.status")
355354
)
@@ -359,7 +358,7 @@ def exif(
359358
results_struct = bbq.struct(results_df).rename("exif_results")
360359
return results_struct
361360
else:
362-
return exif_content_series
361+
return bbq.parse_json(res)
363362

364363
def image_blur(
365364
self,
@@ -416,12 +415,9 @@ def image_blur(
416415

417416
df["ksize_x"], df["ksize_y"] = ksize
418417
df["ext"] = ext # type: ignore
418+
df["verbose"] = verbose
419419
res = self._df_apply_udf(df, image_blur_udf)
420420

421-
blurred_content_series = res._apply_unary_op(
422-
ops.JSONValue(json_path="$.content")
423-
).rename("blurred_content")
424-
425421
if verbose:
426422
blurred_content_b64_series = res._apply_unary_op(
427423
ops.JSONValue(json_path="$.content")
@@ -467,6 +463,7 @@ def image_blur(
467463
df = df.join(dst_rt, how="outer")
468464
df["ksize_x"], df["ksize_y"] = ksize
469465
df["ext"] = ext # type: ignore
466+
df["verbose"] = verbose
470467

471468
res = self._df_apply_udf(df, image_blur_udf)
472469
res.cache() # to execute the udf
@@ -475,13 +472,15 @@ def image_blur(
475472
blurred_status_series = res._apply_unary_op(
476473
ops.JSONValue(json_path="$.status")
477474
)
475+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
476+
dst_blobs = content_series.str.to_blob(connection=connection)
478477
results_df = bpd.DataFrame(
479-
{"status": blurred_status_series, "content": dst}
478+
{"status": blurred_status_series, "content": dst_blobs}
480479
)
481480
results_struct = bbq.struct(results_df).rename("blurred_results")
482481
return results_struct
483482
else:
484-
return dst
483+
return res.str.to_blob(connection=connection)
485484

486485
def image_resize(
487486
self,
@@ -550,12 +549,9 @@ def image_resize(
550549
df["dsize_x"], df["dsizye_y"] = dsize
551550
df["fx"], df["fy"] = fx, fy
552551
df["ext"] = ext # type: ignore
552+
df["verbose"] = verbose
553553
res = self._df_apply_udf(df, image_resize_udf)
554554

555-
resized_content_series = res._apply_unary_op(
556-
ops.JSONValue(json_path="$.content")
557-
).rename("resized_content")
558-
559555
if verbose:
560556
resized_content_b64_series = res._apply_unary_op(
561557
ops.JSONValue(json_path="$.content")
@@ -603,6 +599,7 @@ def image_resize(
603599
df["dsize_x"], df["dsizye_y"] = dsize
604600
df["fx"], df["fy"] = fx, fy
605601
df["ext"] = ext # type: ignore
602+
df["verbose"] = verbose
606603

607604
res = self._df_apply_udf(df, image_resize_udf)
608605
res.cache() # to execute the udf
@@ -611,13 +608,15 @@ def image_resize(
611608
resized_status_series = res._apply_unary_op(
612609
ops.JSONValue(json_path="$.status")
613610
)
611+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
612+
dst_blobs = content_series.str.to_blob(connection=connection)
614613
results_df = bpd.DataFrame(
615-
{"status": resized_status_series, "content": dst}
614+
{"status": resized_status_series, "content": dst_blobs}
616615
)
617616
results_struct = bbq.struct(results_df).rename("resized_results")
618617
return results_struct
619618
else:
620-
return dst
619+
return res.str.to_blob(connection=connection)
621620

622621
def image_normalize(
623622
self,
@@ -680,15 +679,16 @@ def image_normalize(
680679
df["beta"] = beta
681680
df["norm_type"] = norm_type
682681
df["ext"] = ext # type: ignore
682+
df["verbose"] = verbose
683683
res = self._df_apply_udf(df, image_normalize_udf)
684684

685-
normalized_content_b64_series = res._apply_unary_op(
686-
ops.JSONValue(json_path="$.content")
687-
)
688-
normalized_bytes = bbq.sql_scalar(
689-
"FROM_BASE64({0})", columns=[normalized_content_b64_series]
690-
)
691685
if verbose:
686+
normalized_content_b64_series = res._apply_unary_op(
687+
ops.JSONValue(json_path="$.content")
688+
)
689+
normalized_bytes = bbq.sql_scalar(
690+
"FROM_BASE64({0})", columns=[normalized_content_b64_series]
691+
)
692692
normalized_status_series = res._apply_unary_op(
693693
ops.JSONValue(json_path="$.status")
694694
)
@@ -698,7 +698,10 @@ def image_normalize(
698698
results_struct = bbq.struct(results_df).rename("normalized_results")
699699
return results_struct
700700
else:
701-
return normalized_bytes.rename("normalized_bytes")
701+
normalized_bytes = bbq.sql_scalar(
702+
"FROM_BASE64({0})", columns=[res]
703+
).rename("normalized_bytes")
704+
return normalized_bytes
702705

703706
if isinstance(dst, str):
704707
dst = os.path.join(dst, "")
@@ -726,31 +729,27 @@ def image_normalize(
726729
df["beta"] = beta
727730
df["norm_type"] = norm_type
728731
df["ext"] = ext # type: ignore
732+
df["verbose"] = verbose
729733

730734
res = self._df_apply_udf(df, image_normalize_udf)
731735
res.cache() # to execute the udf
732736

733-
normalized_content_series = res._apply_unary_op(
734-
ops.JSONValue(json_path="$.content")
735-
)
736-
normalized_content_blobs = normalized_content_series.str.to_blob(
737-
connection=connection
738-
)
739-
740737
if verbose:
741738
normalized_status_series = res._apply_unary_op(
742739
ops.JSONValue(json_path="$.status")
743740
)
741+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
742+
dst_blobs = content_series.str.to_blob(connection=connection)
744743
results_df = bpd.DataFrame(
745744
{
746745
"status": normalized_status_series,
747-
"content": normalized_content_blobs,
746+
"content": dst_blobs,
748747
}
749748
)
750749
results_struct = bbq.struct(results_df).rename("normalized_results")
751750
return results_struct
752751
else:
753-
return normalized_content_blobs.rename("normalized_content")
752+
return res.str.to_blob(connection=connection)
754753

755754
def pdf_extract(
756755
self,
@@ -802,23 +801,22 @@ def pdf_extract(
802801
container_memory=container_memory,
803802
).udf()
804803

805-
src_rt = self.get_runtime_json_str(mode="R")
806-
807-
res = src_rt.apply(pdf_extract_udf)
808-
809-
extracted_content_series = res._apply_unary_op(
810-
ops.JSONValue(json_path="$.content")
811-
)
804+
df = self.get_runtime_json_str(mode="R").to_frame()
805+
df["verbose"] = verbose
806+
res = self._df_apply_udf(df, pdf_extract_udf)
812807

813808
if verbose:
809+
extracted_content_series = res._apply_unary_op(
810+
ops.JSONValue(json_path="$.content")
811+
)
814812
status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
815813
results_df = bpd.DataFrame(
816814
{"status": status_series, "content": extracted_content_series}
817815
)
818816
results_struct = bbq.struct(results_df).rename("extracted_results")
819817
return results_struct
820818
else:
821-
return extracted_content_series.rename("extracted_content")
819+
return res.rename("extracted_content")
822820

823821
def pdf_chunk(
824822
self,
@@ -885,24 +883,23 @@ def pdf_chunk(
885883
container_memory=container_memory,
886884
).udf()
887885

888-
src_rt = self.get_runtime_json_str(mode="R")
889-
df = src_rt.to_frame()
886+
df = self.get_runtime_json_str(mode="R").to_frame()
890887
df["chunk_size"] = chunk_size
891888
df["overlap_size"] = overlap_size
889+
df["verbose"] = verbose
892890

893891
res = self._df_apply_udf(df, pdf_chunk_udf)
894892

895-
chunked_content_series = bbq.json_extract_string_array(res, "$.content")
896-
897893
if verbose:
894+
chunked_content_series = bbq.json_extract_string_array(res, "$.content")
898895
status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
899896
results_df = bpd.DataFrame(
900897
{"status": status_series, "content": chunked_content_series}
901898
)
902899
resultes_struct = bbq.struct(results_df).rename("chunked_results")
903900
return resultes_struct
904901
else:
905-
return chunked_content_series.rename("chunked_content")
902+
return bbq.json_extract_string_array(res, "$").rename("chunked_content")
906903

907904
def audio_transcribe(
908905
self,

0 commit comments

Comments
 (0)