Skip to content

Commit cc4d803

Browse files
authored
Merge branch 'main' into sycai_ai_doc_fix
2 parents 9667d5a + 5e1e809 commit cc4d803

File tree

21 files changed

+1486
-96
lines changed

21 files changed

+1486
-96
lines changed

bigframes/core/compile/sqlglot/aggregations/nullary_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,4 @@ def _(
5050
if window is None:
5151
# ROW_NUMBER always needs an OVER clause.
5252
return sge.Window(this=result)
53-
return apply_window_if_present(result, window)
53+
return apply_window_if_present(result, window, include_framing_clauses=False)

bigframes/core/compile/sqlglot/aggregations/unary_compiler.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,51 @@ def _(
104104
column: typed_expr.TypedExpr,
105105
window: typing.Optional[window_spec.WindowSpec] = None,
106106
) -> sge.Expression:
107-
return apply_window_if_present(sge.func("DENSE_RANK"), window)
107+
return apply_window_if_present(
108+
sge.func("DENSE_RANK"), window, include_framing_clauses=False
109+
)
110+
111+
112+
@UNARY_OP_REGISTRATION.register(agg_ops.FirstOp)
113+
def _(
114+
op: agg_ops.FirstOp,
115+
column: typed_expr.TypedExpr,
116+
window: typing.Optional[window_spec.WindowSpec] = None,
117+
) -> sge.Expression:
118+
# FIRST_VALUE in BQ respects nulls by default.
119+
return apply_window_if_present(sge.FirstValue(this=column.expr), window)
120+
121+
122+
@UNARY_OP_REGISTRATION.register(agg_ops.FirstNonNullOp)
123+
def _(
124+
op: agg_ops.FirstNonNullOp,
125+
column: typed_expr.TypedExpr,
126+
window: typing.Optional[window_spec.WindowSpec] = None,
127+
) -> sge.Expression:
128+
return apply_window_if_present(
129+
sge.IgnoreNulls(this=sge.FirstValue(this=column.expr)), window
130+
)
131+
132+
133+
@UNARY_OP_REGISTRATION.register(agg_ops.LastOp)
134+
def _(
135+
op: agg_ops.LastOp,
136+
column: typed_expr.TypedExpr,
137+
window: typing.Optional[window_spec.WindowSpec] = None,
138+
) -> sge.Expression:
139+
# LAST_VALUE in BQ respects nulls by default.
140+
return apply_window_if_present(sge.LastValue(this=column.expr), window)
141+
142+
143+
@UNARY_OP_REGISTRATION.register(agg_ops.LastNonNullOp)
144+
def _(
145+
op: agg_ops.LastNonNullOp,
146+
column: typed_expr.TypedExpr,
147+
window: typing.Optional[window_spec.WindowSpec] = None,
148+
) -> sge.Expression:
149+
return apply_window_if_present(
150+
sge.IgnoreNulls(this=sge.LastValue(this=column.expr)), window
151+
)
108152

109153

110154
@UNARY_OP_REGISTRATION.register(agg_ops.MaxOp)
@@ -182,7 +226,9 @@ def _(
182226
column: typed_expr.TypedExpr,
183227
window: typing.Optional[window_spec.WindowSpec] = None,
184228
) -> sge.Expression:
185-
return apply_window_if_present(sge.func("RANK"), window)
229+
return apply_window_if_present(
230+
sge.func("RANK"), window, include_framing_clauses=False
231+
)
186232

187233

188234
@UNARY_OP_REGISTRATION.register(agg_ops.SizeUnaryOp)

bigframes/core/compile/sqlglot/aggregations/windows.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
def apply_window_if_present(
2626
value: sge.Expression,
2727
window: typing.Optional[window_spec.WindowSpec] = None,
28+
include_framing_clauses: bool = True,
2829
) -> sge.Expression:
2930
if window is None:
3031
return value
@@ -64,11 +65,11 @@ def apply_window_if_present(
6465
if not window.bounds and not order:
6566
return sge.Window(this=value, partition_by=group_by)
6667

67-
if not window.bounds:
68+
if not window.bounds and not include_framing_clauses:
6869
return sge.Window(this=value, partition_by=group_by, order=order)
6970

7071
kind = (
71-
"ROWS" if isinstance(window.bounds, window_spec.RowsWindowBounds) else "RANGE"
72+
"RANGE" if isinstance(window.bounds, window_spec.RangeWindowBounds) else "ROWS"
7273
)
7374

7475
start: typing.Union[int, float, None] = None
@@ -125,7 +126,7 @@ def get_window_order_by(
125126
nulls_first=nulls_first,
126127
)
127128
)
128-
elif not nulls_first and not desc:
129+
elif (not nulls_first) and (not desc):
129130
order_by.append(
130131
sge.Ordered(
131132
this=is_null_expr,

bigframes/core/indexes/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,16 @@ def to_series(
383383

384384
name = self.name if name is None else name
385385
if index is None:
386-
return bigframes.series.Series(data=self, index=self, name=name)
386+
return bigframes.series.Series(
387+
data=self, index=self, name=name, session=self._session
388+
)
387389
else:
388-
return bigframes.series.Series(data=self, index=Index(index), name=name)
390+
return bigframes.series.Series(
391+
data=self,
392+
index=Index(index, session=self._session),
393+
name=name,
394+
session=self._session,
395+
)
389396

390397
def get_level_values(self, level) -> Index:
391398
level_n = level if isinstance(level, int) else self.names.index(level)

bigframes/core/indexes/multi.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from __future__ import annotations
1616

17-
from typing import cast, Hashable, Iterable, Sequence
17+
from typing import cast, Hashable, Iterable, Optional, Sequence, TYPE_CHECKING
1818

1919
import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex
2020
import pandas
@@ -23,6 +23,9 @@
2323
from bigframes.core import expression as ex
2424
from bigframes.core.indexes.base import Index
2525

26+
if TYPE_CHECKING:
27+
import bigframes.session
28+
2629

2730
class MultiIndex(Index, vendored_pandas_multindex.MultiIndex):
2831
__doc__ = vendored_pandas_multindex.MultiIndex.__doc__
@@ -33,21 +36,25 @@ def from_tuples(
3336
tuples: Iterable[tuple[Hashable, ...]],
3437
sortorder: int | None = None,
3538
names: Sequence[Hashable] | Hashable | None = None,
39+
*,
40+
session: Optional[bigframes.session.Session] = None,
3641
) -> MultiIndex:
3742
pd_index = pandas.MultiIndex.from_tuples(tuples, sortorder, names)
3843
# Index.__new__ should detect multiple levels and properly create a multiindex
39-
return cast(MultiIndex, Index(pd_index))
44+
return cast(MultiIndex, Index(pd_index, session=session))
4045

4146
@classmethod
4247
def from_arrays(
4348
cls,
4449
arrays,
4550
sortorder: int | None = None,
4651
names=None,
52+
*,
53+
session: Optional[bigframes.session.Session] = None,
4754
) -> MultiIndex:
4855
pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names)
4956
# Index.__new__ should detect multiple levels and properly create a multiindex
50-
return cast(MultiIndex, Index(pd_index))
57+
return cast(MultiIndex, Index(pd_index, session=session))
5158

5259
def __eq__(self, other) -> Index: # type: ignore
5360
import bigframes.operations as ops
@@ -71,3 +78,38 @@ def __eq__(self, other) -> Index: # type: ignore
7178
index_labels=[None],
7279
)
7380
)
81+
82+
83+
class MultiIndexAccessor:
84+
"""Proxy to MultiIndex constructors to allow a session to be passed in."""
85+
86+
def __init__(self, session: bigframes.session.Session):
87+
self._session = session
88+
89+
def __call__(self, *args, **kwargs) -> MultiIndex:
90+
"""Construct a MultiIndex using the associated Session.
91+
92+
See :class:`bigframes.pandas.MultiIndex`.
93+
"""
94+
return MultiIndex(*args, session=self._session, **kwargs)
95+
96+
def from_arrays(self, *args, **kwargs) -> MultiIndex:
97+
"""Construct a MultiIndex using the associated Session.
98+
99+
See :func:`bigframes.pandas.MultiIndex.from_arrays`.
100+
"""
101+
return MultiIndex.from_arrays(*args, session=self._session, **kwargs)
102+
103+
def from_frame(self, *args, **kwargs) -> MultiIndex:
104+
"""Construct a MultiIndex using the associated Session.
105+
106+
See :func:`bigframes.pandas.MultiIndex.from_frame`.
107+
"""
108+
return cast(MultiIndex, MultiIndex.from_frame(*args, **kwargs))
109+
110+
def from_tuples(self, *args, **kwargs) -> MultiIndex:
111+
"""Construct a MultiIndex using the associated Session.
112+
113+
See :func:`bigframes.pandas.MultiIndex.from_tuples`.
114+
"""
115+
return MultiIndex.from_tuples(*args, session=self._session, **kwargs)

bigframes/core/log_adapter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ def method_logger(method=None, /, *, custom_base_name: Optional[str] = None):
155155
def outer_wrapper(method):
156156
@functools.wraps(method)
157157
def wrapper(*args, **kwargs):
158-
api_method_name = getattr(method, LOG_OVERRIDE_NAME, method.__name__)
158+
api_method_name = getattr(
159+
method, LOG_OVERRIDE_NAME, method.__name__
160+
).lower()
159161
if custom_base_name is None:
160162
qualname_parts = getattr(method, "__qualname__", method.__name__).split(
161163
"."

bigframes/core/reshape/tile.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import typing
18+
from typing import Optional, TYPE_CHECKING
1819

1920
import bigframes_vendored.constants as constants
2021
import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
@@ -31,6 +32,9 @@
3132
import bigframes.operations.aggregations as agg_ops
3233
import bigframes.series
3334

35+
if TYPE_CHECKING:
36+
import bigframes.session
37+
3438

3539
def cut(
3640
x,
@@ -42,6 +46,7 @@ def cut(
4246
*,
4347
right: typing.Optional[bool] = True,
4448
labels: typing.Union[typing.Iterable[str], bool, None] = None,
49+
session: Optional[bigframes.session.Session] = None,
4550
) -> bigframes.series.Series:
4651
if (
4752
labels is not None
@@ -65,7 +70,7 @@ def cut(
6570
raise ValueError("Cannot cut empty array.")
6671

6772
if not isinstance(x, bigframes.series.Series):
68-
x = bigframes.series.Series(x)
73+
x = bigframes.series.Series(x, session=session)
6974

7075
if isinstance(bins, int):
7176
if bins <= 0:

bigframes/core/tools/datetimes.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
from collections.abc import Mapping
1618
from datetime import date, datetime
17-
from typing import Optional, Union
19+
from typing import Optional, TYPE_CHECKING, Union
1820

1921
import bigframes_vendored.constants as constants
2022
import bigframes_vendored.pandas.core.tools.datetimes as vendored_pandas_datetimes
@@ -25,6 +27,9 @@
2527
import bigframes.operations as ops
2628
import bigframes.series
2729

30+
if TYPE_CHECKING:
31+
import bigframes.session
32+
2833

2934
def to_datetime(
3035
arg: Union[
@@ -37,6 +42,7 @@ def to_datetime(
3742
utc: bool = False,
3843
format: Optional[str] = None,
3944
unit: Optional[str] = None,
45+
session: Optional[bigframes.session.Session] = None,
4046
) -> Union[pd.Timestamp, datetime, bigframes.series.Series]:
4147
if isinstance(arg, (int, float, str, datetime, date)):
4248
return pd.to_datetime(
@@ -52,7 +58,7 @@ def to_datetime(
5258
f"to datetime is not implemented. {constants.FEEDBACK_LINK}"
5359
)
5460

55-
arg = bigframes.series.Series(arg)
61+
arg = bigframes.series.Series(arg, session=session)
5662

5763
if format and unit and arg.dtype in (bigframes.dtypes.INT_DTYPE, bigframes.dtypes.FLOAT_DTYPE): # type: ignore
5864
raise ValueError("cannot specify both format and unit")

bigframes/formatting_helpers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,14 @@ def progress_callback(
105105
"""Displays a progress bar while the query is running"""
106106
global current_display, current_display_id, previous_display_html
107107

108-
import bigframes._config
109-
import bigframes.core.events
108+
try:
109+
import bigframes._config
110+
import bigframes.core.events
111+
except ImportError:
112+
# Since this gets called from __del__, skip if the import fails to avoid
113+
# ImportError: sys.meta_path is None, Python is likely shutting down.
114+
# This will allow cleanup to continue.
115+
return
110116

111117
progress_bar = bigframes._config.options.display.progress_bar
112118

bigframes/operations/blob.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -804,35 +804,29 @@ def audio_transcribe(
804804
raise ValueError("Must specify the engine, supported value is 'bigquery'.")
805805

806806
import bigframes.bigquery as bbq
807-
import bigframes.ml.llm as llm
808807
import bigframes.pandas as bpd
809808

810809
# col name doesn't matter here. Rename to avoid column name conflicts
811810
audio_series = bigframes.series.Series(self._block)
812811

813812
prompt_text = "**Task:** Transcribe the provided audio. **Instructions:** - Your response must contain only the verbatim transcription of the audio. - Do not include any introductory text, summaries, or conversational filler in your response. The output should begin directly with the first word of the audio."
814813

815-
llm_model = llm.GeminiTextGenerator(
816-
model_name=model_name,
817-
session=self._block.session,
818-
connection_name=connection,
819-
)
814+
# Convert the audio series to the runtime representation required by the model.
815+
audio_runtime = audio_series.blob._get_runtime("R", with_metadata=True)
820816

821-
# transcribe audio using ML.GENERATE_TEXT
822-
transcribed_results = llm_model.predict(
823-
X=audio_series,
824-
prompt=[prompt_text, audio_series],
825-
temperature=0.0,
817+
transcribed_results = bbq.ai.generate(
818+
prompt=(prompt_text, audio_runtime),
819+
connection_id=connection,
820+
endpoint=model_name,
821+
model_params={"generationConfig": {"temperature": 0.0}},
826822
)
827823

828-
transcribed_content_series = cast(
829-
bpd.Series, transcribed_results["ml_generate_text_llm_result"]
830-
).rename("transcribed_content")
824+
transcribed_content_series = transcribed_results.struct.field("result").rename(
825+
"transcribed_content"
826+
)
831827

832828
if verbose:
833-
transcribed_status_series = cast(
834-
bpd.Series, transcribed_results["ml_generate_text_status"]
835-
)
829+
transcribed_status_series = transcribed_results.struct.field("status")
836830
results_df = bpd.DataFrame(
837831
{
838832
"status": transcribed_status_series,

0 commit comments

Comments
 (0)