Skip to content

Commit 8ca1c4d

Browse files
authored
Merge branch 'main' into main_chelsealin_addwindowtests
2 parents 4f5166c + a6f87a0 commit 8ca1c4d

File tree

14 files changed

+5196
-14
lines changed

14 files changed

+5196
-14
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,9 @@ def compile_agg_op(
493493
if isinstance(op, agg_ops.MedianOp):
494494
return pl.median(*inputs)
495495
if isinstance(op, agg_ops.AllOp):
496-
return pl.all(*inputs)
496+
return pl.col(inputs).cast(pl.Boolean).all()
497497
if isinstance(op, agg_ops.AnyOp):
498-
return pl.any(*inputs) # type: ignore
498+
return pl.col(inputs).cast(pl.Boolean).any()
499499
if isinstance(op, agg_ops.NuniqueOp):
500500
return pl.col(*inputs).drop_nulls().n_unique()
501501
if isinstance(op, agg_ops.MinOp):

bigframes/core/compile/sqlglot/expressions/generic_ops.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2424

2525
register_unary_op = scalar_compiler.scalar_op_compiler.register_unary_op
26+
register_ternary_op = scalar_compiler.scalar_op_compiler.register_ternary_op
2627

2728

2829
@register_unary_op(ops.AsTypeOp, pass_op=True)
@@ -66,6 +67,18 @@ def _(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
6667
return _cast(sg_expr, sg_to_type, op.safe)
6768

6869

70+
@register_ternary_op(ops.clip_op)
71+
def _(
72+
original: TypedExpr,
73+
lower: TypedExpr,
74+
upper: TypedExpr,
75+
) -> sge.Expression:
76+
return sge.Greatest(
77+
this=sge.Least(this=original.expr, expressions=[upper.expr]),
78+
expressions=[lower.expr],
79+
)
80+
81+
6982
@register_unary_op(ops.hash_op)
7083
def _(expr: TypedExpr) -> sge.Expression:
7184
return sge.func("FARM_FINGERPRINT", expr.expr)
@@ -94,6 +107,13 @@ def _(expr: TypedExpr) -> sge.Expression:
94107
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
95108

96109

110+
@register_ternary_op(ops.where_op)
111+
def _(
112+
original: TypedExpr, condition: TypedExpr, replacement: TypedExpr
113+
) -> sge.Expression:
114+
return sge.If(this=condition.expr, true=original.expr, false=replacement.expr)
115+
116+
97117
# Helper functions
98118
def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression:
99119
from_type = expr.dtype

bigframes/operations/blob.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,14 @@ def display(
228228
df._set_internal_query_job(query_job)
229229

230230
def display_single_url(
231-
read_url: str, content_type: Union[str, pd._libs.missing.NAType]
231+
read_url: Union[str, pd._libs.missing.NAType],
232+
content_type: Union[str, pd._libs.missing.NAType],
232233
):
233-
if content_type is pd.NA: # display as raw data or error
234+
if pd.isna(read_url):
235+
ipy_display.display("<NA>")
236+
return
237+
238+
if pd.isna(content_type): # display as raw data or error
234239
response = requests.get(read_url)
235240
ipy_display.display(response.content)
236241
return

bigframes/testing/polars_session.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,24 @@ def __init__(self):
9494
self._loader = None # type: ignore
9595

9696
def read_pandas(self, pandas_dataframe, write_engine="default"):
97+
original_input = pandas_dataframe
98+
9799
# override read_pandas to always keep data local-only
98-
if isinstance(pandas_dataframe, pandas.Series):
100+
if isinstance(pandas_dataframe, (pandas.Series, pandas.Index)):
99101
pandas_dataframe = pandas_dataframe.to_frame()
102+
100103
local_block = bigframes.core.blocks.Block.from_local(pandas_dataframe, self)
101-
return bigframes.dataframe.DataFrame(local_block)
104+
bf_df = bigframes.dataframe.DataFrame(local_block)
105+
106+
if isinstance(original_input, pandas.Series):
107+
series = bf_df[bf_df.columns[0]]
108+
series.name = original_input.name
109+
return series
110+
111+
if isinstance(original_input, pandas.Index):
112+
return bf_df.index
113+
114+
return bf_df
102115

103116
@property
104117
def bqclient(self):

noxfile.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@
4646
"3.11",
4747
]
4848

49-
# pytest-retry is not yet compatible with pytest 8.x.
50-
# https://github.com/str0zzapreti/pytest-retry/issues/32
51-
PYTEST_VERSION = "pytest<8.0.0dev"
49+
PYTEST_VERSION = "pytest==8.4.2"
5250
SPHINX_VERSION = "sphinx==4.5.0"
5351
LINT_PATHS = [
5452
"docs",
@@ -91,7 +89,7 @@
9189
# 3.10 is needed for Windows tests as it is the only version installed in the
9290
# bigframes-windows container image. For more information, search
9391
# bigframes/windows-docker, internally.
94-
SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.13"]
92+
SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]
9593
SYSTEM_TEST_STANDARD_DEPENDENCIES = [
9694
"jinja2",
9795
"mock",
@@ -115,7 +113,7 @@
115113
# Make sure we leave some versions without "extras" so we know those
116114
# dependencies are actually optional.
117115
"3.10": ["tests", "scikit-learn", "anywidget"],
118-
"3.11": ["tests", "scikit-learn", "polars", "anywidget"],
116+
LATEST_FULLY_SUPPORTED_PYTHON: ["tests", "scikit-learn", "polars", "anywidget"],
119117
"3.13": ["tests", "polars", "anywidget"],
120118
}
121119

@@ -126,8 +124,13 @@
126124
# Sessions are executed in the order so putting the smaller sessions
127125
# ahead to fail fast at presubmit running.
128126
nox.options.sessions = [
127+
# Include unit_noextras to ensure at least some unit tests contribute to
128+
# coverage.
129+
# TODO(tswast): Consider removing this when unit_noextras and cover is run
130+
# from GitHub actions.
131+
"unit_noextras",
129132
"system-3.9", # No extras.
130-
"system-3.11",
133+
f"system-{LATEST_FULLY_SUPPORTED_PYTHON}", # All extras.
131134
"cover",
132135
# TODO(b/401609005): remove
133136
"cleanup",

tests/system/large/blob/test_function.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import logging
1516
import os
1617
import traceback
1718
from typing import Generator
@@ -434,6 +435,15 @@ def test_blob_transcribe(
434435
actual_text = actual[0]["content"]
435436
else:
436437
actual_text = actual[0]
438+
439+
if pd.isna(actual_text) or actual_text == "":
440+
# Ensure the tests are robust to flakes in the model, which isn't
441+
# particularly useful information for the bigframes team.
442+
logging.warning(
443+
f"blob_transcribe() model {model_name} verbose={verbose} failure"
444+
)
445+
return
446+
437447
actual_len = len(actual_text)
438448

439449
relative_length_tolerance = 0.2

tests/system/small/blob/test_io.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from unittest import mock
16+
17+
import IPython.display
1518
import pandas as pd
1619

1720
import bigframes
@@ -92,3 +95,33 @@ def test_blob_create_read_gbq_object_table(
9295
pd.testing.assert_frame_equal(
9396
pd_blob_df, expected_df, check_dtype=False, check_index_type=False
9497
)
98+
99+
100+
def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame):
101+
mock_display = mock.Mock()
102+
monkeypatch.setattr(IPython.display, "display", mock_display)
103+
104+
images_mm_df["blob_col"].blob.display()
105+
106+
for call in mock_display.call_args_list:
107+
args, _ = call
108+
arg = args[0]
109+
assert isinstance(arg, IPython.display.Image)
110+
111+
112+
def test_display_nulls(
113+
monkeypatch,
114+
bq_connection: str,
115+
session: bigframes.Session,
116+
):
117+
uri_series = bpd.Series([None, None, None], dtype="string", session=session)
118+
blob_series = uri_series.str.to_blob(connection=bq_connection)
119+
mock_display = mock.Mock()
120+
monkeypatch.setattr(IPython.display, "display", mock_display)
121+
122+
blob_series.blob.display()
123+
124+
for call in mock_display.call_args_list:
125+
args, _ = call
126+
arg = args[0]
127+
assert arg == "<NA>"

tests/system/small/engines/test_generic_ops.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from bigframes.session import polars_executor
2323
from bigframes.testing.engine_utils import assert_equivalence_execution
2424

25-
pytest.importorskip("polars")
25+
polars = pytest.importorskip("polars")
2626

2727
# Polars used as reference as its fast and local. Generally though, prefer gbq engine where they disagree.
2828
REFERENCE_ENGINE = polars_executor.PolarsExecutor()
@@ -54,6 +54,12 @@ def apply_op(
5454

5555
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
5656
def test_engines_astype_int(scalars_array_value: array_value.ArrayValue, engine):
57+
polars_version = tuple([int(part) for part in polars.__version__.split(".")])
58+
if polars_version >= (1, 34, 0):
59+
# TODO(https://github.com/pola-rs/polars/issues/24841): Remove this when
60+
# polars fixes Decimal to Int cast.
61+
scalars_array_value = scalars_array_value.drop_columns(["numeric_col"])
62+
5763
arr = apply_op(
5864
scalars_array_value,
5965
ops.AsTypeOp(to_type=bigframes.dtypes.INT_DTYPE),
@@ -308,7 +314,7 @@ def test_engines_astype_timedelta(scalars_array_value: array_value.ArrayValue, e
308314
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
309315

310316

311-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
317+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
312318
def test_engines_where_op(scalars_array_value: array_value.ArrayValue, engine):
313319
arr, _ = scalars_array_value.compute_values(
314320
[
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`,
4+
`int64_too` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
GREATEST(LEAST(`bfcol_2`, `bfcol_1`), `bfcol_0`) AS `bfcol_3`
11+
FROM `bfcte_0`
12+
)
13+
SELECT
14+
`bfcol_3` AS `result_col`
15+
FROM `bfcte_1`
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`float64_col` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
IF(`bfcol_0`, `bfcol_1`, `bfcol_2`) AS `bfcol_3`
11+
FROM `bfcte_0`
12+
)
13+
SELECT
14+
`bfcol_3` AS `result_col`
15+
FROM `bfcte_1`

0 commit comments

Comments
 (0)