Skip to content

Commit 48b5c0d

Browse files
committed
add error handling, mypy not clean
1 parent 59eefb8 commit 48b5c0d

File tree

3 files changed

+64
-28
lines changed

3 files changed

+64
-28
lines changed

bigframes/blob/_functions.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def image_blur_to_bytes_func(
262262
)
263263
return json.dumps(result_dict)
264264
else:
265-
return result_dict["content"]
265+
return base64.b64encode(result_dict["content"]).decode("utf-8")
266266

267267

268268
image_blur_to_bytes_def = FunctionDef(
@@ -384,7 +384,7 @@ def image_resize_to_bytes_func(
384384
)
385385
return json.dumps(result_dict)
386386
else:
387-
return result_dict["content"]
387+
return base64.b64encode(result_dict["content"]).decode("utf-8")
388388

389389

390390
image_resize_to_bytes_def = FunctionDef(
@@ -530,7 +530,7 @@ def image_normalize_to_bytes_func(
530530

531531

532532
# Extracts all text from a PDF url
533-
def pdf_extract_func(src_obj_ref_rt: str) -> str:
533+
def pdf_extract_func(src_obj_ref_rt: str, verbose: bool) -> str:
534534
try:
535535
import io
536536
import json
@@ -563,8 +563,10 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
563563
except Exception as e:
564564
result_dict = {"status": str(e), "content": ""}
565565

566-
result_json = json.dumps(result_dict)
567-
return result_json
566+
if verbose:
567+
return json.dumps(result_dict)
568+
else:
569+
return result_dict["content"]
568570

569571

570572
pdf_extract_def = FunctionDef(
@@ -573,7 +575,9 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
573575

574576

575577
# Extracts text from a PDF url and chunks it simultaneously
576-
def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> str:
578+
def pdf_chunk_func(
579+
src_obj_ref_rt: str, chunk_size: int, overlap_size: int, verbose: bool
580+
) -> str:
577581
try:
578582
import io
579583
import json
@@ -619,8 +623,10 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
619623
except Exception as e:
620624
result_dict = {"status": str(e), "content": []}
621625

622-
result_json = json.dumps(result_dict)
623-
return result_json
626+
if verbose:
627+
return json.dumps(result_dict)
628+
else:
629+
return json.dumps(result_dict["content"])
624630

625631

626632
pdf_chunk_def = FunctionDef(

bigframes/operations/blob.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,12 @@ def image_blur(
422422
).rename("blurred_content")
423423

424424
if verbose:
425+
blurred_content_b64_series = res._apply_unary_op(
426+
ops.JSONValue(json_path="$.content")
427+
)
428+
blurred_content_series = bbq.sql_scalar(
429+
"FROM_BASE64({0})", columns=[blurred_content_b64_series]
430+
)
425431
blurred_status_series = res._apply_unary_op(
426432
ops.JSONValue(json_path="$.status")
427433
)
@@ -431,7 +437,10 @@ def image_blur(
431437
results_struct = bbq.struct(results_df).rename("blurred_results")
432438
return results_struct
433439
else:
434-
return blurred_content_series
440+
blurred_bytes = bbq.sql_scalar(
441+
"FROM_BASE64({0})", columns=[res]
442+
).rename("blurred_bytes")
443+
return blurred_bytes
435444

436445
if isinstance(dst, str):
437446
dst = os.path.join(dst, "")
@@ -547,16 +556,26 @@ def image_resize(
547556
).rename("resized_content")
548557

549558
if verbose:
559+
resized_content_b64_series = res._apply_unary_op(
560+
ops.JSONValue(json_path="$.content")
561+
)
562+
resized_content_series = bbq.sql_scalar(
563+
"FROM_BASE64({0})", columns=[resized_content_b64_series]
564+
)
565+
550566
resized_status_series = res._apply_unary_op(
551567
ops.JSONValue(json_path="$.status")
552568
)
553569
results_df = bpd.DataFrame(
554570
{"status": resized_status_series, "content": resized_content_series}
555571
)
556-
results_struct = bbq.strcut(results_df).rename("resized_results")
572+
results_struct = bbq.struct(results_df).rename("resized_results")
557573
return results_struct
558574
else:
559-
return resized_content_series
575+
resized_bytes = bbq.sql_scalar(
576+
"FROM_BASE64({0})", columns=[res]
577+
).rename("resized_bytes")
578+
return resized_bytes
560579

561580
if isinstance(dst, str):
562581
dst = os.path.join(dst, "")

tests/system/large/blob/test_function.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,39 +52,50 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5252

5353

5454
@pytest.mark.parametrize(
55-
"verbose, expected_type",
55+
"verbose",
5656
[
57-
(True, "struct"),
58-
(False, "json"),
57+
(True),
58+
(False),
5959
],
6060
)
6161
def test_blob_exif(
6262
bq_connection: str,
6363
session: bigframes.Session,
6464
verbose: bool,
65-
expected_type: str,
6665
):
6766
exif_image_df = session.from_glob_path(
6867
"gs://bigframes_blob_test/images_exif/*",
6968
name="blob_col",
7069
connection=bq_connection,
71-
verbose=verbose,
7270
)
7371

7472
actual = exif_image_df["blob_col"].blob.exif(
75-
engine="pillow", connection=bq_connection
76-
)
77-
expected = bpd.Series(
78-
['{"ExifOffset": 47, "Make": "MyCamera"}'],
79-
session=session,
80-
dtype=dtypes.JSON_DTYPE,
81-
)
82-
pd.testing.assert_series_equal(
83-
actual.to_pandas(),
84-
expected.to_pandas(),
85-
check_dtype=False,
86-
check_index_type=False,
73+
engine="pillow", connection=bq_connection, verbose=verbose
8774
)
75+
if verbose:
76+
assert hasattr(actual, "struct")
77+
actual_exploded = actual.struct.explode()
78+
assert "status" in actual_exploded.columns
79+
assert "content" in actual_exploded.columns
80+
81+
status_series = actual_exploded["status"]
82+
assert status_series.dtype == dtypes.STRING_DTYPE
83+
84+
content_series = actual_exploded["content"]
85+
assert content_series.dtype == dtypes.JSON_DTYPE
86+
87+
else:
88+
expected = bpd.Series(
89+
['{"ExifOffset": 47, "Make": "MyCamera"}'],
90+
session=session,
91+
dtype=dtypes.JSON_DTYPE,
92+
)
93+
pd.testing.assert_series_equal(
94+
actual.to_pandas(),
95+
expected.to_pandas(),
96+
check_dtype=False,
97+
check_index_type=False,
98+
)
8899

89100

90101
@pytest.mark.parametrize("verbose", [True, False])

0 commit comments

Comments
 (0)