Skip to content

Commit 42e0580

Browse files
committed
merge change
1 parent a66f7f2 commit 42e0580

File tree

4 files changed

+46
-82
lines changed

4 files changed

+46
-82
lines changed

bigframes/core/blocks.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,6 @@ def total_bytes_processed(self) -> Optional[int]:
124124
def __next__(self) -> pd.DataFrame:
125125
return next(self._dataframes)
126126

127-
def __iter__(self) -> Iterator[pd.DataFrame]:
128-
return self
129-
130127

131128
@dataclasses.dataclass()
132129
class MaterializationOptions:

bigframes/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,7 +1882,7 @@ def to_pandas_batches(
18821882
max_results: Optional[int] = None,
18831883
*,
18841884
allow_large_results: Optional[bool] = None,
1885-
) -> blocks.PandasBatches:
1885+
) -> Iterable[pandas.DataFrame]:
18861886
"""Stream DataFrame results to an iterable of pandas DataFrame.
18871887
18881888
page_size and max_results determine the size and number of batches,
@@ -1925,7 +1925,7 @@ def to_pandas_batches(
19251925
over the default size limit of 10 GB.
19261926
19271927
Returns:
1928-
blocks.PandasBatches:
1928+
Iterable[pandas.DataFrame]:
19291929
An iterable of smaller dataframes which combine to
19301930
form the original dataframe. Results stream from bigquery,
19311931
see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable

bigframes/display/anywidget.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import pandas as pd
2424

2525
import bigframes
26+
from bigframes.core.blocks import PandasBatches
27+
import bigframes.dataframe
2628
import bigframes.display.html
2729

2830
# anywidget and traitlets are optional dependencies. We don't want the import of
@@ -56,6 +58,7 @@ class TableWidget(WIDGET_BASE):
5658
row_count = traitlets.Int(0).tag(sync=True)
5759
table_html = traitlets.Unicode().tag(sync=True)
5860
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
61+
_batches: PandasBatches
5962

6063
def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6164
"""Initialize the TableWidget.
@@ -65,8 +68,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
6568
"""
6669
if not ANYWIDGET_INSTALLED:
6770
raise ImportError(
68-
"Please `pip install anywidget traitlets` or "
69-
"`pip install 'bigframes[anywidget]'` to use TableWidget."
71+
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
7072
)
7173

7274
self._dataframe = dataframe
@@ -85,16 +87,22 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
8587
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
8688
self._cached_batches: List[pd.DataFrame] = []
8789

88-
# Respect display options for initial page size
89-
self.page_size = bigframes.options.display.max_rows
90+
# respect display options for initial page size
91+
initial_page_size = bigframes.options.display.max_rows
9092

91-
# The query issued by `to_pandas_batches()` already contains
92-
# metadata about how many results there were. Use that to avoid
93-
# doing an extra COUNT(*) query that `len(...)` would do.
94-
self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
95-
# TODO (shuowei): total_rows=None Incorrectly Defaults to 0. b/452747934
93+
# set traitlets properties that trigger observers
94+
self.page_size = initial_page_size
95+
96+
# len(dataframe) is expensive, since it will trigger a
97+
# SELECT COUNT(*) query. It is a must have however.
98+
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
99+
# before we get here so that the count might already be cached.
100+
# TODO(b/452747934): Allow row_count to be None and check to see if
101+
# there are multiple pages and show "page 1 of many" in this case
102+
self._reset_batches_for_new_page_size()
96103
self.row_count = self._batches.total_rows or 0
97104

105+
# get the initial page
98106
self._set_table_html()
99107
self._initial_load_complete = True
100108
self._initializing = False
@@ -187,7 +195,7 @@ def _cached_data(self) -> pd.DataFrame:
187195

188196
def _reset_batches_for_new_page_size(self) -> None:
189197
"""Reset the batch iterator when page size changes."""
190-
self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
198+
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)
191199

192200
self._cached_batches = []
193201
self._batch_iter = None

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 26 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -128,34 +128,24 @@
128128
"metadata": {},
129129
"outputs": [
130130
{
131-
"data": {
132-
"text/html": [
133-
"Query job 6d85c081-49c7-408a-ab96-e0e9e5102419 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6d85c081-49c7-408a-ab96-e0e9e5102419&page=queryresults\">Open Job</a>"
134-
],
135-
"text/plain": [
136-
"<IPython.core.display.HTML object>"
137-
]
138-
},
139-
"metadata": {},
140-
"output_type": "display_data"
141-
},
142-
{
143-
"data": {
144-
"application/vnd.jupyter.widget-view+json": {
145-
"model_id": "31ba8e41e4ca4579b85409237cb7a566",
146-
"version_major": 2,
147-
"version_minor": 0
148-
},
149-
"text/plain": [
150-
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
151-
]
152-
},
153-
"metadata": {},
154-
"output_type": "display_data"
131+
"name": "stderr",
132+
"output_type": "stream",
133+
"text": [
134+
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py:868: UserWarning: Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to deferred mode. Error: Traceback (most recent call last):\n",
135+
" File \"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dataframe.py\", line 861, in _repr_html_\n",
136+
" widget = display.TableWidget(df.copy())\n",
137+
" File \"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/display/anywidget.py\", line 100, in __init__\n",
138+
" self.row_count = self._batches.total_rows or 0\n",
139+
"AttributeError: 'TableWidget' object has no attribute '_batches'\n",
140+
"\n",
141+
" warnings.warn(\n"
142+
]
155143
},
156144
{
157145
"data": {
158-
"text/html": [],
146+
"text/html": [
147+
"Computation deferred. Computation will process 171.4 MB"
148+
],
159149
"text/plain": [
160150
"Computation deferred. Computation will process 171.4 MB"
161151
]
@@ -184,37 +174,16 @@
184174
"metadata": {},
185175
"outputs": [
186176
{
187-
"data": {
188-
"text/html": [
189-
"Query job 48cb4908-a59a-420f-8fcb-200d0d9187ef is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:48cb4908-a59a-420f-8fcb-200d0d9187ef&page=queryresults\">Open Job</a>"
190-
],
191-
"text/plain": [
192-
"<IPython.core.display.HTML object>"
193-
]
194-
},
195-
"metadata": {},
196-
"output_type": "display_data"
197-
},
198-
{
199-
"name": "stdout",
200-
"output_type": "stream",
201-
"text": [
202-
"Total pages: 555246\n"
177+
"ename": "AttributeError",
178+
"evalue": "'TableWidget' object has no attribute '_batches'",
179+
"output_type": "error",
180+
"traceback": [
181+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
182+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
183+
"\u001b[0;32m<ipython-input-7-49e9807f0359>\u001b[0m in \u001b[0;36m<cell line: 5>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Create widget programmatically\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mwidget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTableWidget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Total pages: {math.ceil(widget.row_count / widget.page_size)}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
184+
"\u001b[0;32m~/src/github.com/googleapis/python-bigquery-dataframes/bigframes/display/anywidget.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, dataframe)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0;31m# TODO(b/452747934): Allow row_count to be None and check to see if\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;31m# there are multiple pages and show \"page 1 of many\" in this case\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 100\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrow_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_batches\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal_rows\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[0;31m# get the initial page\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
185+
"\u001b[0;31mAttributeError\u001b[0m: 'TableWidget' object has no attribute '_batches'"
203186
]
204-
},
205-
{
206-
"data": {
207-
"application/vnd.jupyter.widget-view+json": {
208-
"model_id": "5d22f3f19e4140b0ba51869e97c3f690",
209-
"version_major": 2,
210-
"version_minor": 0
211-
},
212-
"text/plain": [
213-
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
214-
]
215-
},
216-
"metadata": {},
217-
"output_type": "display_data"
218187
}
219188
],
220189
"source": [
@@ -239,20 +208,10 @@
239208
},
240209
{
241210
"cell_type": "code",
242-
"execution_count": 8,
211+
"execution_count": null,
243212
"id": "12b68f15",
244213
"metadata": {},
245-
"outputs": [
246-
{
247-
"name": "stdout",
248-
"output_type": "stream",
249-
"text": [
250-
"Current page: 0\n",
251-
"After next: 1\n",
252-
"After prev: 0\n"
253-
]
254-
}
255-
],
214+
"outputs": [],
256215
"source": [
257216
"# Simulate button clicks programmatically\n",
258217
"print(\"Current page:\", widget.page)\n",

0 commit comments

Comments
 (0)