Skip to content

Commit de5a12c

Browse files
committed
Merge remote-tracking branch 'origin/main' into refactor-isnull-op
2 parents 57c1c98 + c67ac28 commit de5a12c

File tree

13 files changed

+692
-44
lines changed

13 files changed

+692
-44
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,31 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [2.14.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.13.0...v2.14.0) (2025-08-05)
8+
9+
10+
### Features
11+
12+
* Dynamic table width for better display across devices (https://github.com/googleapis/python-bigquery-dataframes/issues/1948) ([a6d30ae](https://github.com/googleapis/python-bigquery-dataframes/commit/a6d30ae3f4358925c999c53b558c1ecd3ee03e6c)) ([a6d30ae](https://github.com/googleapis/python-bigquery-dataframes/commit/a6d30ae3f4358925c999c53b558c1ecd3ee03e6c))
13+
* Retry AI/ML jobs that fail more often ([#1965](https://github.com/googleapis/python-bigquery-dataframes/issues/1965)) ([25bde9f](https://github.com/googleapis/python-bigquery-dataframes/commit/25bde9f9b89112db0efcc119bf29b6d1f3896c33))
14+
* Support series input in managed function ([#1920](https://github.com/googleapis/python-bigquery-dataframes/issues/1920)) ([62a189f](https://github.com/googleapis/python-bigquery-dataframes/commit/62a189f4d69f6c05fe348a1acd1fbac364fa60b9))
15+
16+
17+
### Bug Fixes
18+
19+
* Enhance type error messages for bigframes functions ([#1958](https://github.com/googleapis/python-bigquery-dataframes/issues/1958)) ([770918e](https://github.com/googleapis/python-bigquery-dataframes/commit/770918e998bf1fde7a656e8f8a0ff0a8c68509f2))
20+
21+
22+
### Performance Improvements
23+
24+
* Use promote_offsets for consistent row number generation for index.get_loc ([#1957](https://github.com/googleapis/python-bigquery-dataframes/issues/1957)) ([c67a25a](https://github.com/googleapis/python-bigquery-dataframes/commit/c67a25a879ab2a35ca9053a81c9c85b5660206ae))
25+
26+
27+
### Documentation
28+
29+
* Add code snippet for storing dataframes to a CSV file ([#1943](https://github.com/googleapis/python-bigquery-dataframes/issues/1943)) ([a511e09](https://github.com/googleapis/python-bigquery-dataframes/commit/a511e09e6924d2e8302af2eb4a602c6b9e5d2d72))
30+
* Add code snippet for storing dataframes to a CSV file ([#1953](https://github.com/googleapis/python-bigquery-dataframes/issues/1953)) ([a298a02](https://github.com/googleapis/python-bigquery-dataframes/commit/a298a02b451f03ca200fe0756b9a7b57e3d1bf0e))
31+
732
## [2.13.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.12.0...v2.13.0) (2025-07-25)
833

934

bigframes/exceptions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ class FunctionAxisOnePreviewWarning(PreviewWarning):
103103
"""Remote Function and Managed UDF with axis=1 preview."""
104104

105105

106+
class FunctionPackageVersionWarning(PreviewWarning):
107+
"""
108+
Managed UDF package versions for Numpy, Pandas, and Pyarrow may not
109+
precisely match users' local environment or the exact versions specified.
110+
"""
111+
112+
106113
def format_message(message: str, fill: bool = True):
107114
"""Formats a warning message with ANSI color codes for the warning color.
108115

bigframes/functions/_function_client.py

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import logging
2020
import os
2121
import random
22-
import re
2322
import shutil
2423
import string
2524
import tempfile
@@ -247,7 +246,7 @@ def provision_bq_managed_function(
247246
# Augment user package requirements with any internal package
248247
# requirements.
249248
packages = _utils._get_updated_package_requirements(
250-
packages, is_row_processor, capture_references
249+
packages, is_row_processor, capture_references, ignore_package_version=True
251250
)
252251
if packages:
253252
managed_function_options["packages"] = packages
@@ -270,28 +269,6 @@ def provision_bq_managed_function(
270269
)
271270

272271
udf_name = func.__name__
273-
if capture_references:
274-
# This code path ensures that if the udf body contains any
275-
# references to variables and/or imports outside the body, they are
276-
# captured as well.
277-
import cloudpickle
278-
279-
pickled = cloudpickle.dumps(func)
280-
udf_code = textwrap.dedent(
281-
f"""
282-
import cloudpickle
283-
{udf_name} = cloudpickle.loads({pickled})
284-
"""
285-
)
286-
else:
287-
# This code path ensures that if the udf body is self contained,
288-
# i.e. there are no references to variables or imports outside the
289-
# body.
290-
udf_code = textwrap.dedent(inspect.getsource(func))
291-
match = re.search(r"^def ", udf_code, flags=re.MULTILINE)
292-
if match is None:
293-
raise ValueError("The UDF is not defined correctly.")
294-
udf_code = udf_code[match.start() :]
295272

296273
with_connection_clause = (
297274
(
@@ -301,6 +278,13 @@ def provision_bq_managed_function(
301278
else ""
302279
)
303280

281+
# Generate the complete Python code block for the managed Python UDF,
282+
# including the user's function, necessary imports, and the BigQuery
283+
# handler wrapper.
284+
python_code_block = bff_template.generate_managed_function_code(
285+
func, udf_name, is_row_processor, capture_references
286+
)
287+
304288
create_function_ddl = (
305289
textwrap.dedent(
306290
f"""
@@ -311,13 +295,11 @@ def provision_bq_managed_function(
311295
OPTIONS ({managed_function_options_str})
312296
AS r'''
313297
__UDF_PLACE_HOLDER__
314-
def bigframes_handler(*args):
315-
return {udf_name}(*args)
316298
'''
317299
"""
318300
)
319301
.strip()
320-
.replace("__UDF_PLACE_HOLDER__", udf_code)
302+
.replace("__UDF_PLACE_HOLDER__", python_code_block)
321303
)
322304

323305
self._ensure_dataset_exists()

bigframes/functions/_function_session.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -847,15 +847,15 @@ def wrapper(func):
847847
if output_type:
848848
py_sig = py_sig.replace(return_annotation=output_type)
849849

850-
udf_sig = udf_def.UdfSignature.from_py_signature(py_sig)
851-
852850
# The function will actually be receiving a pandas Series, but allow
853851
# both BigQuery DataFrames and pandas object types for compatibility.
854852
is_row_processor = False
855853
if new_sig := _convert_row_processor_sig(py_sig):
856854
py_sig = new_sig
857855
is_row_processor = True
858856

857+
udf_sig = udf_def.UdfSignature.from_py_signature(py_sig)
858+
859859
managed_function_client = _function_client.FunctionClient(
860860
dataset_ref.project,
861861
bq_location,

bigframes/functions/_utils.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import sys
1919
import typing
2020
from typing import cast, Optional, Set
21+
import warnings
2122

2223
import cloudpickle
2324
import google.api_core.exceptions
@@ -26,6 +27,7 @@
2627
import pandas
2728
import pyarrow
2829

30+
import bigframes.exceptions as bfe
2931
import bigframes.formatting_helpers as bf_formatting
3032
from bigframes.functions import function_typing
3133

@@ -61,21 +63,40 @@ def get_remote_function_locations(bq_location):
6163

6264

6365
def _get_updated_package_requirements(
64-
package_requirements=None, is_row_processor=False, capture_references=True
66+
package_requirements=None,
67+
is_row_processor=False,
68+
capture_references=True,
69+
ignore_package_version=False,
6570
):
6671
requirements = []
6772
if capture_references:
6873
requirements.append(f"cloudpickle=={cloudpickle.__version__}")
6974

7075
if is_row_processor:
71-
# bigframes function will send an entire row of data as json, which
72-
# would be converted to a pandas series and processed Ensure numpy
73-
# versions match to avoid unpickling problems. See internal issue
74-
# b/347934471.
75-
requirements.append(f"numpy=={numpy.__version__}")
76-
requirements.append(f"pandas=={pandas.__version__}")
77-
requirements.append(f"pyarrow=={pyarrow.__version__}")
78-
76+
if ignore_package_version:
77+
# TODO(jialuo): Add back the version after b/410924784 is resolved.
78+
# Due to current limitations on the packages version in Python UDFs,
79+
# we use `ignore_package_version` to optionally omit the version for
80+
# managed functions only.
81+
msg = bfe.format_message(
82+
"numpy, pandas, and pyarrow versions in the function execution"
83+
" environment may not precisely match your local environment."
84+
)
85+
warnings.warn(msg, category=bfe.FunctionPackageVersionWarning)
86+
requirements.append("pandas")
87+
requirements.append("pyarrow")
88+
requirements.append("numpy")
89+
else:
90+
# bigframes function will send an entire row of data as json, which
91+
# would be converted to a pandas series and processed Ensure numpy
92+
# versions match to avoid unpickling problems. See internal issue
93+
# b/347934471.
94+
requirements.append(f"pandas=={pandas.__version__}")
95+
requirements.append(f"pyarrow=={pyarrow.__version__}")
96+
requirements.append(f"numpy=={numpy.__version__}")
97+
98+
# TODO(b/435023957): Fix the issue of potential duplicate package versions
99+
# when `package_requirements` also contains `pandas/pyarrow/numpy`.
79100
if package_requirements:
80101
requirements.extend(package_requirements)
81102

bigframes/functions/function_template.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import inspect
1818
import logging
1919
import os
20+
import re
2021
import textwrap
2122
from typing import Tuple
2223

@@ -291,3 +292,55 @@ def generate_cloud_function_main_code(
291292
logger.debug(f"Wrote {os.path.abspath(main_py)}:\n{open(main_py).read()}")
292293

293294
return handler_func_name
295+
296+
297+
def generate_managed_function_code(
298+
def_,
299+
udf_name: str,
300+
is_row_processor: bool,
301+
capture_references: bool,
302+
) -> str:
303+
"""Generates the Python code block for managed Python UDF."""
304+
305+
if capture_references:
306+
# This code path ensures that if the udf body contains any
307+
# references to variables and/or imports outside the body, they are
308+
# captured as well.
309+
import cloudpickle
310+
311+
pickled = cloudpickle.dumps(def_)
312+
func_code = textwrap.dedent(
313+
f"""
314+
import cloudpickle
315+
{udf_name} = cloudpickle.loads({pickled})
316+
"""
317+
)
318+
else:
319+
# This code path ensures that if the udf body is self contained,
320+
# i.e. there are no references to variables or imports outside the
321+
# body.
322+
func_code = textwrap.dedent(inspect.getsource(def_))
323+
match = re.search(r"^def ", func_code, flags=re.MULTILINE)
324+
if match is None:
325+
raise ValueError("The UDF is not defined correctly.")
326+
func_code = func_code[match.start() :]
327+
328+
if is_row_processor:
329+
udf_code = textwrap.dedent(inspect.getsource(get_pd_series))
330+
udf_code = udf_code[udf_code.index("def") :]
331+
bigframes_handler_code = textwrap.dedent(
332+
f"""def bigframes_handler(str_arg):
333+
return {udf_name}({get_pd_series.__name__}(str_arg))"""
334+
)
335+
else:
336+
udf_code = ""
337+
bigframes_handler_code = textwrap.dedent(
338+
f"""def bigframes_handler(*args):
339+
return {udf_name}(*args)"""
340+
)
341+
342+
udf_code_block = textwrap.dedent(
343+
f"{udf_code}\n{func_code}\n{bigframes_handler_code}"
344+
)
345+
346+
return udf_code_block

bigframes/session/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import weakref
4141

4242
import bigframes_vendored.constants as constants
43+
import bigframes_vendored.google_cloud_bigquery.retry as third_party_gcb_retry
4344
import bigframes_vendored.ibis.backends.bigquery as ibis_bigquery # noqa
4445
import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
4546
import bigframes_vendored.pandas.io.parquet as third_party_pandas_parquet
@@ -2051,6 +2052,7 @@ def _start_query_ml_ddl(
20512052
project=None,
20522053
timeout=None,
20532054
query_with_job=True,
2055+
job_retry=third_party_gcb_retry.DEFAULT_ML_JOB_RETRY,
20542056
)
20552057
return iterator, query_job
20562058

bigframes/session/_io/bigquery/__init__.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import typing
2525
from typing import Dict, Iterable, Literal, Mapping, Optional, overload, Tuple, Union
2626

27+
import bigframes_vendored.google_cloud_bigquery.retry as third_party_gcb_retry
2728
import bigframes_vendored.pandas.io.gbq as third_party_pandas_gbq
2829
import google.api_core.exceptions
30+
import google.api_core.retry
2931
import google.cloud.bigquery as bigquery
3032

3133
from bigframes.core import log_adapter
@@ -245,7 +247,7 @@ def start_query_with_client(
245247
location: Optional[str],
246248
project: Optional[str],
247249
timeout: Optional[float],
248-
metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
250+
metrics: Optional[bigframes.session.metrics.ExecutionMetrics],
249251
query_with_job: Literal[True],
250252
) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
251253
...
@@ -260,8 +262,40 @@ def start_query_with_client(
260262
location: Optional[str],
261263
project: Optional[str],
262264
timeout: Optional[float],
263-
metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
265+
metrics: Optional[bigframes.session.metrics.ExecutionMetrics],
266+
query_with_job: Literal[False],
267+
) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]:
268+
...
269+
270+
271+
@overload
272+
def start_query_with_client(
273+
bq_client: bigquery.Client,
274+
sql: str,
275+
*,
276+
job_config: bigquery.QueryJobConfig,
277+
location: Optional[str],
278+
project: Optional[str],
279+
timeout: Optional[float],
280+
metrics: Optional[bigframes.session.metrics.ExecutionMetrics],
281+
query_with_job: Literal[True],
282+
job_retry: google.api_core.retry.Retry,
283+
) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
284+
...
285+
286+
287+
@overload
288+
def start_query_with_client(
289+
bq_client: bigquery.Client,
290+
sql: str,
291+
*,
292+
job_config: bigquery.QueryJobConfig,
293+
location: Optional[str],
294+
project: Optional[str],
295+
timeout: Optional[float],
296+
metrics: Optional[bigframes.session.metrics.ExecutionMetrics],
264297
query_with_job: Literal[False],
298+
job_retry: google.api_core.retry.Retry,
265299
) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]:
266300
...
267301

@@ -276,6 +310,11 @@ def start_query_with_client(
276310
timeout: Optional[float] = None,
277311
metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None,
278312
query_with_job: bool = True,
313+
# TODO(tswast): We can stop providing our own default once we use a
314+
# google-cloud-bigquery version with
315+
# https://github.com/googleapis/python-bigquery/pull/2256 merged, likely
316+
# version 3.36.0 or later.
317+
job_retry: google.api_core.retry.Retry = third_party_gcb_retry.DEFAULT_JOB_RETRY,
279318
) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]:
280319
"""
281320
Starts query job and waits for results.
@@ -292,6 +331,7 @@ def start_query_with_client(
292331
location=location,
293332
project=project,
294333
api_timeout=timeout,
334+
job_retry=job_retry,
295335
)
296336
if metrics is not None:
297337
metrics.count_job_stats(row_iterator=results_iterator)
@@ -303,6 +343,7 @@ def start_query_with_client(
303343
location=location,
304344
project=project,
305345
timeout=timeout,
346+
job_retry=job_retry,
306347
)
307348
except google.api_core.exceptions.Forbidden as ex:
308349
if "Drive credentials" in ex.message:

bigframes/version.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "2.13.0"
15+
__version__ = "2.14.0"
1616

1717
# {x-release-please-start-date}
18-
__release_date__ = "2025-07-25"
18+
__release_date__ = "2025-08-05"
1919
# {x-release-please-end}

0 commit comments

Comments
 (0)