Skip to content

Commit da9a681

Browse files
committed
add error handling, mypy not clean
1 parent e2d5aeb commit da9a681

File tree

3 files changed

+64
-28
lines changed

3 files changed

+64
-28
lines changed

bigframes/blob/_functions.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def image_blur_to_bytes_func(
263263
)
264264
return json.dumps(result_dict)
265265
else:
266-
return result_dict["content"]
266+
return base64.b64encode(result_dict["content"]).decode("utf-8")
267267

268268

269269
image_blur_to_bytes_def = FunctionDef(
@@ -385,7 +385,7 @@ def image_resize_to_bytes_func(
385385
)
386386
return json.dumps(result_dict)
387387
else:
388-
return result_dict["content"]
388+
return base64.b64encode(result_dict["content"]).decode("utf-8")
389389

390390

391391
image_resize_to_bytes_def = FunctionDef(
@@ -531,7 +531,7 @@ def image_normalize_to_bytes_func(
531531

532532

533533
# Extracts all text from a PDF url
534-
def pdf_extract_func(src_obj_ref_rt: str) -> str:
534+
def pdf_extract_func(src_obj_ref_rt: str, verbose: bool) -> str:
535535
try:
536536
import io
537537
import json
@@ -564,8 +564,10 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
564564
except Exception as e:
565565
result_dict = {"status": str(e), "content": ""}
566566

567-
result_json = json.dumps(result_dict)
568-
return result_json
567+
if verbose:
568+
return json.dumps(result_dict)
569+
else:
570+
return result_dict["content"]
569571

570572

571573
pdf_extract_def = FunctionDef(
@@ -574,7 +576,9 @@ def pdf_extract_func(src_obj_ref_rt: str) -> str:
574576

575577

576578
# Extracts text from a PDF url and chunks it simultaneously
577-
def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> str:
579+
def pdf_chunk_func(
580+
src_obj_ref_rt: str, chunk_size: int, overlap_size: int, verbose: bool
581+
) -> str:
578582
try:
579583
import io
580584
import json
@@ -620,8 +624,10 @@ def pdf_chunk_func(src_obj_ref_rt: str, chunk_size: int, overlap_size: int) -> s
620624
except Exception as e:
621625
result_dict = {"status": str(e), "content": []}
622626

623-
result_json = json.dumps(result_dict)
624-
return result_json
627+
if verbose:
628+
return json.dumps(result_dict)
629+
else:
630+
return json.dumps(result_dict["content"])
625631

626632

627633
pdf_chunk_def = FunctionDef(

bigframes/operations/blob.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,12 @@ def image_blur(
422422
).rename("blurred_content")
423423

424424
if verbose:
425+
blurred_content_b64_series = res._apply_unary_op(
426+
ops.JSONValue(json_path="$.content")
427+
)
428+
blurred_content_series = bbq.sql_scalar(
429+
"FROM_BASE64({0})", columns=[blurred_content_b64_series]
430+
)
425431
blurred_status_series = res._apply_unary_op(
426432
ops.JSONValue(json_path="$.status")
427433
)
@@ -431,7 +437,10 @@ def image_blur(
431437
results_struct = bbq.struct(results_df).rename("blurred_results")
432438
return results_struct
433439
else:
434-
return blurred_content_series
440+
blurred_bytes = bbq.sql_scalar(
441+
"FROM_BASE64({0})", columns=[res]
442+
).rename("blurred_bytes")
443+
return blurred_bytes
435444

436445
if isinstance(dst, str):
437446
dst = os.path.join(dst, "")
@@ -547,16 +556,26 @@ def image_resize(
547556
).rename("resized_content")
548557

549558
if verbose:
559+
resized_content_b64_series = res._apply_unary_op(
560+
ops.JSONValue(json_path="$.content")
561+
)
562+
resized_content_series = bbq.sql_scalar(
563+
"FROM_BASE64({0})", columns=[resized_content_b64_series]
564+
)
565+
550566
resized_status_series = res._apply_unary_op(
551567
ops.JSONValue(json_path="$.status")
552568
)
553569
results_df = bpd.DataFrame(
554570
{"status": resized_status_series, "content": resized_content_series}
555571
)
556-
results_struct = bbq.strcut(results_df).rename("resized_results")
572+
results_struct = bbq.struct(results_df).rename("resized_results")
557573
return results_struct
558574
else:
559-
return resized_content_series
575+
resized_bytes = bbq.sql_scalar(
576+
"FROM_BASE64({0})", columns=[res]
577+
).rename("resized_bytes")
578+
return resized_bytes
560579

561580
if isinstance(dst, str):
562581
dst = os.path.join(dst, "")

tests/system/large/blob/test_function.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -52,39 +52,50 @@ def images_output_uris(images_output_folder: str) -> list[str]:
5252

5353

5454
@pytest.mark.parametrize(
55-
"verbose, expected_type",
55+
"verbose",
5656
[
57-
(True, "struct"),
58-
(False, "json"),
57+
(True),
58+
(False),
5959
],
6060
)
6161
def test_blob_exif(
6262
bq_connection: str,
6363
session: bigframes.Session,
6464
verbose: bool,
65-
expected_type: str,
6665
):
6766
exif_image_df = session.from_glob_path(
6867
"gs://bigframes_blob_test/images_exif/*",
6968
name="blob_col",
7069
connection=bq_connection,
71-
verbose=verbose,
7270
)
7371

7472
actual = exif_image_df["blob_col"].blob.exif(
75-
engine="pillow", connection=bq_connection
76-
)
77-
expected = bpd.Series(
78-
['{"ExifOffset": 47, "Make": "MyCamera"}'],
79-
session=session,
80-
dtype=dtypes.JSON_DTYPE,
81-
)
82-
pd.testing.assert_series_equal(
83-
actual.to_pandas(),
84-
expected.to_pandas(),
85-
check_dtype=False,
86-
check_index_type=False,
73+
engine="pillow", connection=bq_connection, verbose=verbose
8774
)
75+
if verbose:
76+
assert hasattr(actual, "struct")
77+
actual_exploded = actual.struct.explode()
78+
assert "status" in actual_exploded.columns
79+
assert "content" in actual_exploded.columns
80+
81+
status_series = actual_exploded["status"]
82+
assert status_series.dtype == dtypes.STRING_DTYPE
83+
84+
content_series = actual_exploded["content"]
85+
assert content_series.dtype == dtypes.JSON_DTYPE
86+
87+
else:
88+
expected = bpd.Series(
89+
['{"ExifOffset": 47, "Make": "MyCamera"}'],
90+
session=session,
91+
dtype=dtypes.JSON_DTYPE,
92+
)
93+
pd.testing.assert_series_equal(
94+
actual.to_pandas(),
95+
expected.to_pandas(),
96+
check_dtype=False,
97+
check_index_type=False,
98+
)
8899

89100

90101
@pytest.mark.parametrize("verbose", [True, False])

0 commit comments

Comments
 (0)