From b15f1f734c30e66286ce96197bb0e83b35f80331 Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 15 Jan 2025 16:33:06 -0600 Subject: [PATCH 1/4] chore: raise an error if the engine does not support a particular feature --- bigquery_magics/config.py | 42 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/bigquery_magics/config.py b/bigquery_magics/config.py index 4fe9c40..b877df3 100644 --- a/bigquery_magics/config.py +++ b/bigquery_magics/config.py @@ -25,8 +25,48 @@ def _get_default_credentials_with_project(): return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False) +class MissingReason(enum.Enum): + """Provides a way to disambiguate why an option is missing. + This is used instead of a None value to allow for custom validation and + docs generation. + """ + + # These missing reasons are because the engine doesn't support the feature, + # for example, use_rest_api on the bigframes engine. In this case, raise + # if the user has set it, as the magics woult otherwise act in a way + # contrary to that in which the user explicitly requested. + NOT_SUPPORTED_BY_ENGINE_INFEASIBLE = enum.auto() + + # This is like the above, but is theoretically possible. Include a call to + # action to reach out to the bigframes team if this is a feature they + # would like to use. + NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE = enum.auto() + + # This missing reason is for options that are magics-only and apply to all + # engines. For example, the destination variable is a magics-only setting + # and doesn't affect how queries are handled by the engine. + ENGINE_UNIVERSAL = enum.auto() + + +@dataclasses.dataclass(frozen=True) +class MagicsSetting: + """Encapsulates information about settings and how to set them. + This is used to generate documentation, merge settings across the various + ways they are duplicated, and to validate settings provided by a user. + """ + + description: str + cell_arg: Union[MissingReason, str] + magics_context: Union[MissingReason, str] + bigframes_option: Union[MissingReason, str] + + +magics_settings = [ + # TODO: copy table from sheets to here +] + -@dataclass +@dataclasses.dataclass class Context(object): """Storage for objects to be used throughout an IPython notebook session. From fe4b53fc186576e9379b0ffbc35db3565b778746 Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 17 Jan 2025 12:30:46 -0600 Subject: [PATCH 2/4] add validations --- bigquery_magics/bigquery.py | 7 +-- bigquery_magics/config.py | 113 ++++++++++++++++++++++++++++++++++-- tests/unit/test_bigquery.py | 4 +- 3 files changed, 115 insertions(+), 9 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 5e99883..4d254ca 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -467,10 +467,9 @@ def _query_with_bigframes(query: str, params: List[Any], args: Any): raise ValueError("Dry run is not supported by bigframes engine.") if bpd is None: - raise ValueError("Bigframes package is not installed.") - - bpd.options.bigquery.project = context.project - bpd.options.bigquery.credentials = context.credentials + raise ValueError( + "Please install the 'bigframes' package (pip install bigframes) to use the bigframes engine." + ) max_results = int(args.max_results) if args.max_results else None diff --git a/bigquery_magics/config.py b/bigquery_magics/config.py index b877df3..3bb7369 100644 --- a/bigquery_magics/config.py +++ b/bigquery_magics/config.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass -from typing import Optional +import dataclasses +import enum +from typing import Optional, Union import google.api_core.client_options as client_options import google.cloud.bigquery as bigquery @@ -25,15 +26,17 @@ def _get_default_credentials_with_project(): return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False) + class MissingReason(enum.Enum): """Provides a way to disambiguate why an option is missing. + This is used instead of a None value to allow for custom validation and docs generation. """ # These missing reasons are because the engine doesn't support the feature, # for example, use_rest_api on the bigframes engine. In this case, raise - # if the user has set it, as the magics woult otherwise act in a way + # if the user has set it, as the magics would otherwise act in a way # contrary to that in which the user explicitly requested. NOT_SUPPORTED_BY_ENGINE_INFEASIBLE = enum.auto() @@ -51,6 +54,7 @@ class MissingReason(enum.Enum): @dataclasses.dataclass(frozen=True) class MagicsSetting: """Encapsulates information about settings and how to set them. + This is used to generate documentation, merge settings across the various ways they are duplicated, and to validate settings provided by a user. """ @@ -62,7 +66,108 @@ class MagicsSetting: magics_settings = [ - # TODO: copy table from sheets to here + MagicsSetting( + description="Limits the number of rows in the returned DataFrame.", + cell_arg="--max_results", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option="read_gbq_query.max_results" + ), + + MagicsSetting( + description="Max bytes billed to use for executing a query.", + cell_arg="--maximum_bytes_billed", + magics_context="default_query_job_config", + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE + ), + + MagicsSetting( + description="Set it to a query to estimate costs.", + cell_arg="--dry_run", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option="read_gbq_query" + ), + + MagicsSetting( + description="Set it to use instead of Standard SQL.", + cell_arg="--use_legacy_sql", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE + ), + + MagicsSetting( + description="BigQuery REST API endpoint.", + cell_arg="--bigquery_api_endpoint", + magics_context="bqstorage_client_options", + bigframes_option="bigquery.client_endpoints_override['bqclient']" + ), + + MagicsSetting( + description="BigQery Storage API endpoint.", + cell_arg="--bqstorage_api_endpoint", + magics_context="bqstorage_client_options", + bigframes_option="bigquery.client_endpoints_override['bqclient']" + ), + + MagicsSetting( + description="Do not use cached query results.", + cell_arg="--no_query_cache", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option="read_gbq_query.use_cache" + ), + + MagicsSetting( + description="[Deprecated] Default is BigQuery Storage API.", + cell_arg="--use_bqstorage_api", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE + ), + + MagicsSetting( + description="Use the BigQuery REST API to download results instead of " + "the BigQuery Storage Read API.", + cell_arg="--use_rest_api", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE + ), + + MagicsSetting( + description="Print verbose output, including the query job ID and the" + "amount of time for the query to finish.", + cell_arg="--verbose", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE + ), + + MagicsSetting( + description= + "To format the query string. Should be followed by a string representation " + "of a dictionary or a reference to a dictionary in the same format by " + "including $ before the variable.", + cell_arg="--params", + magics_context="default_query_job_config", + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE + ), + + MagicsSetting( + description="To display a progress bar while executing the query.", + cell_arg="--progress_bar_type", + magics_context="progress_bar_type", + bigframes_option="progress_bar" + ), + + MagicsSetting( + description="Location to execute query.", + cell_arg="--location", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option="location" + ), + + MagicsSetting( + description="Set execution engine, either 'pandas' or 'bigframes'.", + cell_arg="--engine", + magics_context="engine", + bigframes_option=MissingReason.ENGINE_UNIVERSAL + ) ] diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 237f151..1de01ff 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -2015,7 +2015,9 @@ def test_bigquery_magic_bigframes__bigframes_is_not_installed__should_raise_erro ip.extension_manager.load_extension("bigquery_magics") sql = "SELECT 0 AS something" - with pytest.raises(ValueError, match="Bigframes package is not installed."): + with pytest.raises( + ValueError, match=re.escape("Please install the 'bigframes' package") + ): ip.run_cell_magic("bigquery", "", sql) From c5a069c5642ac0f4e7c33943750975e30446c3ad Mon Sep 17 00:00:00 2001 From: Arwa Date: Fri, 17 Jan 2025 16:47:25 -0600 Subject: [PATCH 3/4] edit validations --- bigquery_magics/config.py | 210 +++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 103 deletions(-) diff --git a/bigquery_magics/config.py b/bigquery_magics/config.py index 3bb7369..f11bc04 100644 --- a/bigquery_magics/config.py +++ b/bigquery_magics/config.py @@ -50,6 +50,12 @@ class MissingReason(enum.Enum): # and doesn't affect how queries are handled by the engine. ENGINE_UNIVERSAL = enum.auto() + # Similar to ENGINE_UNIVERSAL, these options do work, but there + # isn't a way to globally override them. Use this instead of ENGINE_UNIVERSAL + # for things that really do require some explicit support in the engine + # (e.g. query parameters) but don't have a way to set them for all queries. + SUPPORTED_BY_ENGINE_BUT_NO_OPTION = enum.auto() + @dataclasses.dataclass(frozen=True) class MagicsSetting: @@ -66,108 +72,106 @@ class MagicsSetting: magics_settings = [ - MagicsSetting( - description="Limits the number of rows in the returned DataFrame.", - cell_arg="--max_results", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option="read_gbq_query.max_results" - ), - - MagicsSetting( - description="Max bytes billed to use for executing a query.", - cell_arg="--maximum_bytes_billed", - magics_context="default_query_job_config", - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE - ), - - MagicsSetting( - description="Set it to a query to estimate costs.", - cell_arg="--dry_run", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option="read_gbq_query" - ), - - MagicsSetting( - description="Set it to use instead of Standard SQL.", - cell_arg="--use_legacy_sql", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE - ), - - MagicsSetting( - description="BigQuery REST API endpoint.", - cell_arg="--bigquery_api_endpoint", - magics_context="bqstorage_client_options", - bigframes_option="bigquery.client_endpoints_override['bqclient']" - ), - - MagicsSetting( - description="BigQery Storage API endpoint.", - cell_arg="--bqstorage_api_endpoint", - magics_context="bqstorage_client_options", - bigframes_option="bigquery.client_endpoints_override['bqclient']" - ), - - MagicsSetting( - description="Do not use cached query results.", - cell_arg="--no_query_cache", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option="read_gbq_query.use_cache" - ), - - MagicsSetting( - description="[Deprecated] Default is BigQuery Storage API.", - cell_arg="--use_bqstorage_api", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE - ), - - MagicsSetting( - description="Use the BigQuery REST API to download results instead of " - "the BigQuery Storage Read API.", - cell_arg="--use_rest_api", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE - ), - - MagicsSetting( - description="Print verbose output, including the query job ID and the" - "amount of time for the query to finish.", - cell_arg="--verbose", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE - ), - - MagicsSetting( - description= - "To format the query string. Should be followed by a string representation " - "of a dictionary or a reference to a dictionary in the same format by " - "including $ before the variable.", - cell_arg="--params", - magics_context="default_query_job_config", - bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE - ), - - MagicsSetting( - description="To display a progress bar while executing the query.", - cell_arg="--progress_bar_type", - magics_context="progress_bar_type", - bigframes_option="progress_bar" - ), - - MagicsSetting( - description="Location to execute query.", - cell_arg="--location", - magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, - bigframes_option="location" - ), - - MagicsSetting( - description="Set execution engine, either 'pandas' or 'bigframes'.", - cell_arg="--engine", - magics_context="engine", - bigframes_option=MissingReason.ENGINE_UNIVERSAL - ) + MagicsSetting( + description="Limits the number of rows in the returned DataFrame.", + cell_arg="--max_results", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + ), + MagicsSetting( + description="Max bytes billed to use for executing a query.", + cell_arg="--maximum_bytes_billed", + magics_context="default_query_job_config", + bigframes_option="bigframes.options.compute.maximum_bytes_billed", + ), + MagicsSetting( + description="Set it to a query to estimate costs.", + cell_arg="--dry_run", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="Set it to use instead of Standard SQL.", + cell_arg="--use_legacy_sql", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="BigQuery REST API endpoint.", + cell_arg="--bigquery_api_endpoint", + magics_context="bqstorage_client_options", + bigframes_option="bigquery.client_endpoints_override['bqclient']", # BROKEN: cell arg override doesn't work with bigframes + ), + MagicsSetting( + description="BigQery Storage API endpoint.", + cell_arg="--bqstorage_api_endpoint", + magics_context="bqstorage_client_options", + bigframes_option="bigquery.client_endpoints_override['bqclient']", # BROKEN: cell arg override doesn't work with bigframes + ), + MagicsSetting( + description="Do not use cached query results.", + cell_arg="--no_query_cache", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="[Deprecated] Default is BigQuery Storage API.", + cell_arg="--use_bqstorage_api", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE, + ), + MagicsSetting( + description="Use the BigQuery REST API to download results instead of " + "the BigQuery Storage Read API.", + cell_arg="--use_rest_api", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_INFEASIBLE, + ), + MagicsSetting( + description="Print verbose output, including the query job ID and the" + " amount of time for the query to finish.", + cell_arg="--verbose", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="To format the query string. Should be followed by a string representation" + " of a dictionary or a reference to a dictionary in the same format by" + " including $ before the variable.", + cell_arg="--params", + magics_context="default_query_job_config", + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="To display a progress bar while executing the query.", + cell_arg="--progress_bar_type", + magics_context="progress_bar_type", + bigframes_option="progress_bar", # BROKEN: cell arg override doesn't work with bigframes + ), + MagicsSetting( + description="Location to execute query.", + cell_arg="--location", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option="location", # BROKEN: cell arg override doesn't work with bigframes + ), + MagicsSetting( + description="Set execution engine, either 'pandas' or 'bigframes'.", + cell_arg="--connection", + magics_context="_connection", + bigframes_option="bq_connection", + ), + MagicsSetting( + description="Set execution engine, either 'pandas' or 'bigframes'.", + cell_arg="--engine", + magics_context="engine", + bigframes_option=MissingReason.ENGINE_UNIVERSAL, + ), + MagicsSetting( + description="Set execution engine, either 'pandas' or 'bigframes'.", + cell_arg="--credentials", + magics_context=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + bigframes_option="credentials", # BROKEN: cell arg override doesn't work with bigframes + ), ] @@ -206,7 +210,7 @@ class Context(object): special network connections are required. Normally you would be using the https://bigquery.googleapis.com/ end point. - Example: + Example:g Manually setting the endpoint: >>> from google.cloud.bigquery import magics From 458eda09d93aa5153d1149adc704b8dd5a346039 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 21 Jan 2025 11:51:07 -0600 Subject: [PATCH 4/4] add missing validations --- bigquery_magics/config.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/bigquery_magics/config.py b/bigquery_magics/config.py index f11bc04..573327f 100644 --- a/bigquery_magics/config.py +++ b/bigquery_magics/config.py @@ -72,6 +72,25 @@ class MagicsSetting: magics_settings = [ + MagicsSetting( + description="Destination variable name.", + cell_arg="destination_var", + magics_context="default_variable", + bigframes_option=MissingReason.ENGINE_UNIVERSAL, + ), + MagicsSetting( + description="To save the output of the query to a new BigQuery table.", + cell_arg="--destination_table", + magics_context=MissingReason.SUPPORTED_BY_ENGINE_BUT_NO_OPTION, + bigframes_option=MissingReason.NOT_SUPPORTED_BY_ENGINE_BUT_POSSIBLE, + ), + MagicsSetting( + description="Project to use for executing this query. Defaults to the" + " context project.", + cell_arg="--project", + magics_context="project", + bigframes_option="bigquery.project", + ), MagicsSetting( description="Limits the number of rows in the returned DataFrame.", cell_arg="--max_results", @@ -338,7 +357,7 @@ def engine(self) -> str: If using "bigframes", the query result will be stored in a bigframes dataframe instead. Example: - Manully setting the content engine: + Manually setting the content engine: >>> from google.cloud.bigquery import magics >>> bigquery_magics.context.engine = 'bigframes'