Skip to content

Commit 4e2535e

Browse files
committed
test fix
1 parent 9ab240f commit 4e2535e

File tree

2 files changed

+83
-55
lines changed

2 files changed

+83
-55
lines changed

bigframes/blob/_functions.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def udf(self):
119119
return self._session.read_gbq_function(udf_name)
120120

121121

122-
def exif_func(src_obj_ref_rt: str) -> str:
122+
def exif_func(src_obj_ref_rt: str, verbose: bool) -> str:
123123
import io
124124
import json
125125

@@ -145,12 +145,18 @@ def exif_func(src_obj_ref_rt: str) -> str:
145145
if exif_data:
146146
for tag, value in exif_data.items():
147147
tag_name = ExifTags.TAGS.get(tag, tag)
148+
# Pillow might return bytes, which are not serializable.
149+
if isinstance(value, bytes):
150+
value = value.decode("utf-8", "replace")
148151
exif_dict[tag_name] = value
149152
result_dict["content"] = json.dumps(exif_dict)
150153
except Exception as e:
151154
result_dict["status"] = str(e)
152155

153-
return json.dumps(result_dict)
156+
if verbose:
157+
return json.dumps(result_dict)
158+
else:
159+
return result_dict["content"]
154160

155161

156162
exif_func_def = FunctionDef(exif_func, ["pillow", "requests"])
@@ -163,6 +169,7 @@ def image_blur_func(
163169
ksize_x: int,
164170
ksize_y: int,
165171
ext: str,
172+
verbose: bool,
166173
) -> str:
167174
import json
168175

@@ -211,14 +218,17 @@ def image_blur_func(
211218
except Exception as e:
212219
result_dict["status"] = str(e)
213220

214-
return json.dumps(result_dict)
221+
if verbose:
222+
return json.dumps(result_dict)
223+
else:
224+
return result_dict["content"]
215225

216226

217227
image_blur_def = FunctionDef(image_blur_func, ["opencv-python", "numpy", "requests"])
218228

219229

220230
def image_blur_to_bytes_func(
221-
src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str
231+
src_obj_ref_rt: str, ksize_x: int, ksize_y: int, ext: str, verbose: bool
222232
) -> str:
223233
import base64
224234
import json
@@ -252,7 +262,11 @@ def image_blur_to_bytes_func(
252262
status = str(e)
253263

254264
encoded_content = base64.b64encode(content).decode("utf-8")
255-
return json.dumps({"status": status, "content": encoded_content})
265+
result_dict = {"status": status, "content": encoded_content}
266+
if verbose:
267+
return json.dumps(result_dict)
268+
else:
269+
return result_dict["content"]
256270

257271

258272
image_blur_to_bytes_def = FunctionDef(
@@ -268,6 +282,7 @@ def image_resize_func(
268282
fx: float,
269283
fy: float,
270284
ext: str,
285+
verbose: bool,
271286
) -> str:
272287
import json
273288

@@ -316,7 +331,10 @@ def image_resize_func(
316331
except Exception as e:
317332
result_dict["status"] = str(e)
318333

319-
return json.dumps(result_dict)
334+
if verbose:
335+
return json.dumps(result_dict)
336+
else:
337+
return result_dict["content"]
320338

321339

322340
image_resize_def = FunctionDef(
@@ -331,6 +349,7 @@ def image_resize_to_bytes_func(
331349
fx: float,
332350
fy: float,
333351
ext: str,
352+
verbose: bool,
334353
) -> str:
335354
import base64
336355
import json
@@ -364,7 +383,11 @@ def image_resize_to_bytes_func(
364383
status = str(e)
365384

366385
encoded_content = base64.b64encode(content).decode("utf-8")
367-
return json.dumps({"status": status, "content": encoded_content})
386+
result_dict = {"status": status, "content": encoded_content}
387+
if verbose:
388+
return json.dumps(result_dict)
389+
else:
390+
return result_dict["content"]
368391

369392

370393
image_resize_to_bytes_def = FunctionDef(
@@ -379,6 +402,7 @@ def image_normalize_func(
379402
beta: float,
380403
norm_type: str,
381404
ext: str,
405+
verbose: bool,
382406
) -> str:
383407
import json
384408

@@ -436,7 +460,10 @@ def image_normalize_func(
436460
except Exception as e:
437461
result_dict["status"] = str(e)
438462

439-
return json.dumps(result_dict)
463+
if verbose:
464+
return json.dumps(result_dict)
465+
else:
466+
return result_dict["content"]
440467

441468

442469
image_normalize_def = FunctionDef(
@@ -450,6 +477,7 @@ def image_normalize_to_bytes_func(
450477
beta: float,
451478
norm_type: str,
452479
ext: str,
480+
verbose: bool,
453481
) -> str:
454482
import base64
455483
import json
@@ -493,7 +521,10 @@ def image_normalize_to_bytes_func(
493521
except Exception as e:
494522
result_dict["status"] = str(e)
495523

496-
return json.dumps(result_dict)
524+
if verbose:
525+
return json.dumps(result_dict)
526+
else:
527+
return result_dict["content"]
497528

498529

499530
image_normalize_to_bytes_def = FunctionDef(

bigframes/operations/blob.py

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -345,11 +345,10 @@ def exif(
345345

346346
res = self._df_apply_udf(df, exif_udf)
347347

348-
exif_content_series = bbq.parse_json(
349-
res._apply_unary_op(ops.JSONValue(json_path="$.content"))
350-
).rename("exif_content")
351-
352348
if verbose:
349+
exif_content_series = bbq.parse_json(
350+
res._apply_unary_op(ops.JSONValue(json_path="$.content"))
351+
).rename("exif_content")
353352
exif_status_series = res._apply_unary_op(
354353
ops.JSONValue(json_path="$.status")
355354
)
@@ -359,7 +358,7 @@ def exif(
359358
results_struct = bbq.struct(results_df).rename("exif_results")
360359
return results_struct
361360
else:
362-
return exif_content_series
361+
return bbq.parse_json(res)
363362

364363
def image_blur(
365364
self,
@@ -416,12 +415,9 @@ def image_blur(
416415

417416
df["ksize_x"], df["ksize_y"] = ksize
418417
df["ext"] = ext # type: ignore
418+
df["verbose"] = verbose
419419
res = self._df_apply_udf(df, image_blur_udf)
420420

421-
blurred_content_series = res._apply_unary_op(
422-
ops.JSONValue(json_path="$.content")
423-
).rename("blurred_content")
424-
425421
if verbose:
426422
blurred_content_b64_series = res._apply_unary_op(
427423
ops.JSONValue(json_path="$.content")
@@ -467,6 +463,7 @@ def image_blur(
467463
df = df.join(dst_rt, how="outer")
468464
df["ksize_x"], df["ksize_y"] = ksize
469465
df["ext"] = ext # type: ignore
466+
df["verbose"] = verbose
470467

471468
res = self._df_apply_udf(df, image_blur_udf)
472469
res.cache() # to execute the udf
@@ -475,13 +472,15 @@ def image_blur(
475472
blurred_status_series = res._apply_unary_op(
476473
ops.JSONValue(json_path="$.status")
477474
)
475+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
476+
dst_blobs = content_series.str.to_blob(connection=connection)
478477
results_df = bpd.DataFrame(
479-
{"status": blurred_status_series, "content": dst}
478+
{"status": blurred_status_series, "content": dst_blobs}
480479
)
481480
results_struct = bbq.struct(results_df).rename("blurred_results")
482481
return results_struct
483482
else:
484-
return dst
483+
return res.str.to_blob(connection=connection)
485484

486485
def image_resize(
487486
self,
@@ -550,12 +549,9 @@ def image_resize(
550549
df["dsize_x"], df["dsizye_y"] = dsize
551550
df["fx"], df["fy"] = fx, fy
552551
df["ext"] = ext # type: ignore
552+
df["verbose"] = verbose
553553
res = self._df_apply_udf(df, image_resize_udf)
554554

555-
resized_content_series = res._apply_unary_op(
556-
ops.JSONValue(json_path="$.content")
557-
).rename("resized_content")
558-
559555
if verbose:
560556
resized_content_b64_series = res._apply_unary_op(
561557
ops.JSONValue(json_path="$.content")
@@ -603,6 +599,7 @@ def image_resize(
603599
df["dsize_x"], df["dsizye_y"] = dsize
604600
df["fx"], df["fy"] = fx, fy
605601
df["ext"] = ext # type: ignore
602+
df["verbose"] = verbose
606603

607604
res = self._df_apply_udf(df, image_resize_udf)
608605
res.cache() # to execute the udf
@@ -611,13 +608,15 @@ def image_resize(
611608
resized_status_series = res._apply_unary_op(
612609
ops.JSONValue(json_path="$.status")
613610
)
611+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
612+
dst_blobs = content_series.str.to_blob(connection=connection)
614613
results_df = bpd.DataFrame(
615-
{"status": resized_status_series, "content": dst}
614+
{"status": resized_status_series, "content": dst_blobs}
616615
)
617616
results_struct = bbq.struct(results_df).rename("resized_results")
618617
return results_struct
619618
else:
620-
return dst
619+
return res.str.to_blob(connection=connection)
621620

622621
def image_normalize(
623622
self,
@@ -680,15 +679,16 @@ def image_normalize(
680679
df["beta"] = beta
681680
df["norm_type"] = norm_type
682681
df["ext"] = ext # type: ignore
682+
df["verbose"] = verbose
683683
res = self._df_apply_udf(df, image_normalize_udf)
684684

685-
normalized_content_b64_series = res._apply_unary_op(
686-
ops.JSONValue(json_path="$.content")
687-
)
688-
normalized_bytes = bbq.sql_scalar(
689-
"FROM_BASE64({0})", columns=[normalized_content_b64_series]
690-
)
691685
if verbose:
686+
normalized_content_b64_series = res._apply_unary_op(
687+
ops.JSONValue(json_path="$.content")
688+
)
689+
normalized_bytes = bbq.sql_scalar(
690+
"FROM_BASE64({0})", columns=[normalized_content_b64_series]
691+
)
692692
normalized_status_series = res._apply_unary_op(
693693
ops.JSONValue(json_path="$.status")
694694
)
@@ -698,7 +698,10 @@ def image_normalize(
698698
results_struct = bbq.struct(results_df).rename("normalized_results")
699699
return results_struct
700700
else:
701-
return normalized_bytes.rename("normalized_bytes")
701+
normalized_bytes = bbq.sql_scalar(
702+
"FROM_BASE64({0})", columns=[res]
703+
).rename("normalized_bytes")
704+
return normalized_bytes
702705

703706
if isinstance(dst, str):
704707
dst = os.path.join(dst, "")
@@ -726,31 +729,27 @@ def image_normalize(
726729
df["beta"] = beta
727730
df["norm_type"] = norm_type
728731
df["ext"] = ext # type: ignore
732+
df["verbose"] = verbose
729733

730734
res = self._df_apply_udf(df, image_normalize_udf)
731735
res.cache() # to execute the udf
732736

733-
normalized_content_series = res._apply_unary_op(
734-
ops.JSONValue(json_path="$.content")
735-
)
736-
normalized_content_blobs = normalized_content_series.str.to_blob(
737-
connection=connection
738-
)
739-
740737
if verbose:
741738
normalized_status_series = res._apply_unary_op(
742739
ops.JSONValue(json_path="$.status")
743740
)
741+
content_series = res._apply_unary_op(ops.JSONValue(json_path="$.content"))
742+
dst_blobs = content_series.str.to_blob(connection=connection)
744743
results_df = bpd.DataFrame(
745744
{
746745
"status": normalized_status_series,
747-
"content": normalized_content_blobs,
746+
"content": dst_blobs,
748747
}
749748
)
750749
results_struct = bbq.struct(results_df).rename("normalized_results")
751750
return results_struct
752751
else:
753-
return normalized_content_blobs.rename("normalized_content")
752+
return res.str.to_blob(connection=connection)
754753

755754
def pdf_extract(
756755
self,
@@ -802,23 +801,22 @@ def pdf_extract(
802801
container_memory=container_memory,
803802
).udf()
804803

805-
src_rt = self.get_runtime_json_str(mode="R")
806-
807-
res = src_rt.apply(pdf_extract_udf)
808-
809-
extracted_content_series = res._apply_unary_op(
810-
ops.JSONValue(json_path="$.content")
811-
)
804+
df = self.get_runtime_json_str(mode="R").to_frame()
805+
df["verbose"] = verbose
806+
res = self._df_apply_udf(df, pdf_extract_udf)
812807

813808
if verbose:
809+
extracted_content_series = res._apply_unary_op(
810+
ops.JSONValue(json_path="$.content")
811+
)
814812
status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
815813
results_df = bpd.DataFrame(
816814
{"status": status_series, "content": extracted_content_series}
817815
)
818816
results_struct = bbq.struct(results_df).rename("extracted_results")
819817
return results_struct
820818
else:
821-
return extracted_content_series.rename("extracted_content")
819+
return res.rename("extracted_content")
822820

823821
def pdf_chunk(
824822
self,
@@ -885,24 +883,23 @@ def pdf_chunk(
885883
container_memory=container_memory,
886884
).udf()
887885

888-
src_rt = self.get_runtime_json_str(mode="R")
889-
df = src_rt.to_frame()
886+
df = self.get_runtime_json_str(mode="R").to_frame()
890887
df["chunk_size"] = chunk_size
891888
df["overlap_size"] = overlap_size
889+
df["verbose"] = verbose
892890

893891
res = self._df_apply_udf(df, pdf_chunk_udf)
894892

895-
chunked_content_series = bbq.json_extract_string_array(res, "$.content")
896-
897893
if verbose:
894+
chunked_content_series = bbq.json_extract_string_array(res, "$.content")
898895
status_series = res._apply_unary_op(ops.JSONValue(json_path="$.status"))
899896
results_df = bpd.DataFrame(
900897
{"status": status_series, "content": chunked_content_series}
901898
)
902899
resultes_struct = bbq.struct(results_df).rename("chunked_results")
903900
return resultes_struct
904901
else:
905-
return chunked_content_series.rename("chunked_content")
902+
return bbq.json_extract_string_array(res, "$").rename("chunked_content")
906903

907904
def audio_transcribe(
908905
self,

0 commit comments

Comments
 (0)