1414
1515"""Unit tests for read_gbq_colab helper functions."""
1616
17- import textwrap
18- from unittest import mock
19-
20- from google .cloud import bigquery
21- import numpy
22- import pandas
23- import pytest
24-
2517from bigframes .testing import mocks
2618
2719
2820def test_read_gbq_colab_includes_label ():
2921 """Make sure we can tell direct colab usage apart from regular read_gbq usage."""
30- import bigframes .core .log_adapter as log_adapter
31- import bigframes .session ._io .bigquery as bq_io
32-
33- # Store the original add_and_trim_labels
34- original_add_and_trim = bq_io .add_and_trim_labels
35-
36- # Track API methods
37- tracked_methods = []
38-
39- def debug_add_api_method (name ):
40- tracked_methods .append (name )
41-
42- log_adapter .add_api_method = debug_add_api_method
43-
44- def intercept_add_and_trim_labels (job_config ):
45- # Ensure tracked methods are available before creating labels
46- if tracked_methods and "session-read_gbq_colab" in tracked_methods :
47- # Temporarily restore the methods for label creation
48- original_methods = list (log_adapter ._api_methods )
49- log_adapter ._api_methods .clear ()
50- log_adapter ._api_methods .extend (tracked_methods )
51-
52- # Call the original function
53- original_add_and_trim (job_config )
54-
55- # Restore original state
56- log_adapter ._api_methods .clear ()
57- log_adapter ._api_methods .extend (original_methods )
58- else :
59- original_add_and_trim (job_config )
60-
61- # Monkey patch add_and_trim_labels
62- bq_io .add_and_trim_labels = intercept_add_and_trim_labels
63-
64- # Clear any existing call stack and API methods
65- log_adapter ._call_stack .clear ()
66- log_adapter .get_and_reset_api_methods ()
67-
6822 session = mocks .create_bigquery_session ()
6923
70- # Ensure call stack is empty before calling the method
71- log_adapter ._call_stack .clear ()
72-
7324 _ = session ._read_gbq_colab ("SELECT 'read-gbq-colab-test'" )
7425
7526 configs = session ._job_configs # type: ignore
76-
7727 label_values = []
7828 for config in configs :
7929 if config is None :
@@ -83,71 +33,6 @@ def intercept_add_and_trim_labels(job_config):
8333 assert "session-read_gbq_colab" in label_values
8434
8535
86- @pytest .mark .parametrize ("dry_run" , [True , False ])
87- def test_read_gbq_colab_includes_formatted_values_in_dry_run (monkeypatch , dry_run ):
88- bqclient = mock .create_autospec (bigquery .Client , instance = True )
89- bqclient .project = "proj"
90- session = mocks .create_bigquery_session (bqclient = bqclient )
91- bf_df = mocks .create_dataframe (monkeypatch , session = session )
92- session ._create_temp_table = mock .Mock ( # type: ignore
93- return_value = bigquery .TableReference .from_string ("proj.dset.temp_table" )
94- )
95- session ._create_temp_view = mock .Mock ( # type: ignore
96- return_value = bigquery .TableReference .from_string ("proj.dset.temp_view" )
97- )
98-
99- # To avoid trouble with get_table() calls getting out of sync with mock
100- # "uploaded" data, make sure this is small enough to inline in the SQL as a
101- # view.
102- pd_df = pandas .DataFrame ({"rowindex" : numpy .arange (3 ), "value" : numpy .arange (3 )})
103-
104- pyformat_args = {
105- "some_integer" : 123 ,
106- "some_string" : "some_column" ,
107- "bf_df" : bf_df ,
108- "pd_df" : pd_df ,
109- # This is not a supported type, but ignored if not referenced.
110- "some_object" : object (),
111- }
112-
113- _ = session ._read_gbq_colab (
114- textwrap .dedent (
115- """
116- SELECT {some_integer} as some_integer,
117- {some_string} as some_string,
118- '{{escaped}}' as escaped
119- FROM {bf_df} AS bf_df
120- FULL OUTER JOIN {pd_df} AS pd_df
121- ON bf_df.rowindex = pd_df.rowindex
122- """
123- ),
124- pyformat_args = pyformat_args ,
125- dry_run = dry_run ,
126- )
127- expected = textwrap .dedent (
128- f"""
129- SELECT 123 as some_integer,
130- some_column as some_string,
131- '{{escaped}}' as escaped
132- FROM `proj`.`dset`.`temp_{ "table" if dry_run else "view" } ` AS bf_df
133- FULL OUTER JOIN `proj`.`dset`.`temp_{ "table" if dry_run else "view" } ` AS pd_df
134- ON bf_df.rowindex = pd_df.rowindex
135- """
136- )
137-
138- # This should be the most recent query.
139- query = session ._queries [- 1 ] # type: ignore
140- config = session ._job_configs [- 1 ] # type: ignore
141-
142- if dry_run :
143- assert config .dry_run
144- else :
145- # Allow for any "False-y" value.
146- assert not config .dry_run
147-
148- assert query .strip () == expected .strip ()
149-
150-
15136def test_read_gbq_colab_doesnt_set_destination_table ():
15237 """For best performance, we don't try to workaround the 10 GB query results limitation."""
15338 session = mocks .create_bigquery_session ()
0 commit comments