diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py index 52b47e3e9a..1302f6cc03 100644 --- a/bigframes/_config/__init__.py +++ b/bigframes/_config/__init__.py @@ -17,175 +17,24 @@ DataFrames from this package. """ -from __future__ import annotations - -import copy -from dataclasses import dataclass, field -import threading -from typing import Optional - -import bigframes_vendored.pandas._config.config as pandas_config - -import bigframes._config.bigquery_options as bigquery_options -import bigframes._config.compute_options as compute_options -import bigframes._config.display_options as display_options -import bigframes._config.experiment_options as experiment_options -import bigframes._config.sampling_options as sampling_options - - -@dataclass -class ThreadLocalConfig(threading.local): - # If unset, global settings will be used - bigquery_options: Optional[bigquery_options.BigQueryOptions] = None - # Note: use default factory instead of default instance so each thread initializes to default values - display_options: display_options.DisplayOptions = field( - default_factory=display_options.DisplayOptions - ) - sampling_options: sampling_options.SamplingOptions = field( - default_factory=sampling_options.SamplingOptions - ) - compute_options: compute_options.ComputeOptions = field( - default_factory=compute_options.ComputeOptions - ) - experiment_options: experiment_options.ExperimentOptions = field( - default_factory=experiment_options.ExperimentOptions - ) - - -class Options: - """Global options affecting BigQuery DataFrames behavior.""" - - def __init__(self): - self.reset() - - def reset(self) -> Options: - """Reset the option settings to defaults. - - Returns: - bigframes._config.Options: Options object with default values. - """ - self._local = ThreadLocalConfig() - - # BigQuery options are special because they can only be set once per - # session, so we need an indicator as to whether we are using the - # thread-local session or the global session. - self._bigquery_options = bigquery_options.BigQueryOptions() - return self - - def _init_bigquery_thread_local(self): - """Initialize thread-local options, based on current global options.""" - - # Already thread-local, so don't reset any options that have been set - # already. No locks needed since this only modifies thread-local - # variables. - if self._local.bigquery_options is not None: - return - - self._local.bigquery_options = copy.deepcopy(self._bigquery_options) - self._local.bigquery_options._session_started = False - - @property - def bigquery(self) -> bigquery_options.BigQueryOptions: - """Options to use with the BigQuery engine. - - Returns: - bigframes._config.bigquery_options.BigQueryOptions: - Options for BigQuery engine. - """ - if self._local.bigquery_options is not None: - # The only way we can get here is if someone called - # _init_bigquery_thread_local. - return self._local.bigquery_options - - return self._bigquery_options - - @property - def display(self) -> display_options.DisplayOptions: - """Options controlling object representation. - - Returns: - bigframes._config.display_options.DisplayOptions: - Options for controlling object representation. - """ - return self._local.display_options - - @property - def sampling(self) -> sampling_options.SamplingOptions: - """Options controlling downsampling when downloading data - to memory. - - The data can be downloaded into memory explicitly - (e.g., to_pandas, to_numpy, values) or implicitly (e.g., - matplotlib plotting). This option can be overridden by - parameters in specific functions. - - Returns: - bigframes._config.sampling_options.SamplingOptions: - Options for controlling downsampling. - """ - return self._local.sampling_options - - @property - def compute(self) -> compute_options.ComputeOptions: - """Thread-local options controlling object computation. - - Returns: - bigframes._config.compute_options.ComputeOptions: - Thread-local options for controlling object computation - """ - return self._local.compute_options - - @property - def experiments(self) -> experiment_options.ExperimentOptions: - """Options controlling experiments - - Returns: - bigframes._config.experiment_options.ExperimentOptions: - Thread-local options for controlling experiments - """ - return self._local.experiment_options - - @property - def is_bigquery_thread_local(self) -> bool: - """Indicator that we're using a thread-local session. - - A thread-local session can be started by using - `with bigframes.option_context("bigquery.some_option", "some-value"):`. - - Returns: - bool: - A boolean value, where a value is True if a thread-local session - is in use; otherwise False. - """ - return self._local.bigquery_options is not None - - @property - def _allow_large_results(self) -> bool: - """The effective 'allow_large_results' setting. - - This value is `self.compute.allow_large_results` if set (not `None`), - otherwise it defaults to `self.bigquery.allow_large_results`. - - Returns: - bool: - Whether large query results are permitted. - - `True`: The BigQuery result size limit (e.g., 10 GB) is removed. - - `False`: Results are restricted to this limit (potentially faster). - BigQuery will raise an error if this limit is exceeded. - """ - if self.compute.allow_large_results is None: - return self.bigquery.allow_large_results - return self.compute.allow_large_results - - -options = Options() -"""Global options for default session.""" - -option_context = pandas_config.option_context +from bigframes._config.bigquery_options import BigQueryOptions +from bigframes._config.compute_options import ComputeOptions +from bigframes._config.display_options import DisplayOptions +from bigframes._config.experiment_options import ExperimentOptions +from bigframes._config.global_options import option_context, Options +import bigframes._config.global_options as global_options +from bigframes._config.sampling_options import SamplingOptions +options = global_options.options +"""Global options for the default session.""" __all__ = ( "Options", "options", "option_context", + "BigQueryOptions", + "ComputeOptions", + "DisplayOptions", + "ExperimentOptions", + "SamplingOptions", ) diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py index 97cd6e99af..7810ee897f 100644 --- a/bigframes/_config/compute_options.py +++ b/bigframes/_config/compute_options.py @@ -29,7 +29,7 @@ class ComputeOptions: >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") >>> bpd.options.compute.maximum_bytes_billed = 500 - >>> # df.to_pandas() # this should fail + >>> df.to_pandas() # this should fail # doctest: +SKIP google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required. >>> bpd.options.compute.maximum_bytes_billed = None # reset option @@ -53,68 +53,112 @@ class ComputeOptions: >>> del bpd.options.compute.extra_query_labels["test1"] >>> bpd.options.compute.extra_query_labels {'test2': 'abc', 'test3': False} - - Attributes: - ai_ops_confirmation_threshold (int | None): - Guards against unexpected processing of large amount of rows by semantic operators. - If the number of rows exceeds the threshold, the user will be asked to confirm - their operations to resume. The default value is 0. Set the value to None - to turn off the guard. - - ai_ops_threshold_autofail (bool): - Guards against unexpected processing of large amount of rows by semantic operators. - When set to True, the operation automatically fails without asking for user inputs. - - allow_large_results (bool | None): - Specifies whether query results can exceed 10 GB. Defaults to False. Setting this - to False (the default) restricts results to 10 GB for potentially faster execution; - BigQuery will raise an error if this limit is exceeded. Setting to True removes - this result size limit. - - enable_multi_query_execution (bool | None): - If enabled, large queries may be factored into multiple smaller queries - in order to avoid generating queries that are too complex for the query - engine to handle. However this comes at the cost of increase cost and latency. - - extra_query_labels (Dict[str, Any] | None): - Stores additional custom labels for query configuration. - - maximum_bytes_billed (int | None): - Limits the bytes billed for query jobs. Queries that will have - bytes billed beyond this limit will fail (without incurring a - charge). If unspecified, this will be set to your project default. - See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed. - - maximum_result_rows (int | None): - Limits the number of rows in an execution result. When converting - a BigQuery DataFrames object to a pandas DataFrame or Series (e.g., - using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct - iteration), the data is downloaded from BigQuery to the client - machine. This option restricts the number of rows that can be - downloaded. If the number of rows to be downloaded exceeds this - limit, a ``bigframes.exceptions.MaximumResultRowsExceeded`` - exception is raised. - - semantic_ops_confirmation_threshold (int | None): - .. deprecated:: 1.42.0 - Semantic operators are deprecated. Please use AI operators instead - - semantic_ops_threshold_autofail (bool): - .. deprecated:: 1.42.0 - Semantic operators are deprecated. Please use AI operators instead """ ai_ops_confirmation_threshold: Optional[int] = 0 + """ + Guards against unexpected processing of large amount of rows by semantic operators. + + If the number of rows exceeds the threshold, the user will be asked to confirm + their operations to resume. The default value is 0. Set the value to None + to turn off the guard. + + Returns: + Optional[int]: Number of rows. + """ + ai_ops_threshold_autofail: bool = False + """ + Guards against unexpected processing of large amount of rows by semantic operators. + + When set to True, the operation automatically fails without asking for user inputs. + + Returns: + bool: True if the guard is enabled. + """ + allow_large_results: Optional[bool] = None + """ + Specifies whether query results can exceed 10 GB. + + Defaults to False. Setting this to False (the default) restricts results to + 10 GB for potentially faster execution; BigQuery will raise an error if this + limit is exceeded. Setting to True removes this result size limit. + + + Returns: + bool | None: True if results > 10 GB are enabled. + """ enable_multi_query_execution: bool = False + """ + If enabled, large queries may be factored into multiple smaller queries. + + This is in order to avoid generating queries that are too complex for the + query engine to handle. However this comes at the cost of increase cost and + latency. + + + Returns: + bool | None: True if enabled. + """ + extra_query_labels: Dict[str, Any] = dataclasses.field( default_factory=dict, init=False ) + """ + Stores additional custom labels for query configuration. + + Returns: + Dict[str, Any] | None: Additional labels. + """ + maximum_bytes_billed: Optional[int] = None + """ + Limits the bytes billed for query jobs. + + Queries that will have bytes billed beyond this limit will fail (without + incurring a charge). If unspecified, this will be set to your project + default. See `maximum_bytes_billed`: + https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed. + + Returns: + int | None: Number of bytes, if set. + """ + maximum_result_rows: Optional[int] = None + """ + Limits the number of rows in an execution result. + + When converting a BigQuery DataFrames object to a pandas DataFrame or Series + (e.g., using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct + iteration), the data is downloaded from BigQuery to the client machine. This + option restricts the number of rows that can be downloaded. If the number + of rows to be downloaded exceeds this limit, a + ``bigframes.exceptions.MaximumResultRowsExceeded`` exception is raised. + + Returns: + int | None: Number of rows, if set. + """ + semantic_ops_confirmation_threshold: Optional[int] = 0 + """ + Deprecated. + + .. deprecated:: 1.42.0 + Semantic operators are deprecated. Please use the functions in + :mod:`bigframes.bigquery.ai` instead. + + """ + semantic_ops_threshold_autofail = False + """ + Deprecated. + + .. deprecated:: 1.42.0 + Semantic operators are deprecated. Please use the functions in + :mod:`bigframes.bigquery.ai` instead. + + """ def assign_extra_query_labels(self, **kwargs: Any) -> None: """ diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py index b7ce29e47e..34c5c77d57 100644 --- a/bigframes/_config/display_options.py +++ b/bigframes/_config/display_options.py @@ -15,38 +15,15 @@ """Options for displaying objects.""" import contextlib -import dataclasses -from typing import Literal, Optional import bigframes_vendored.pandas.core.config_init as vendored_pandas_config import pandas as pd - -@dataclasses.dataclass -class DisplayOptions: - __doc__ = vendored_pandas_config.display_options_doc - - # Options borrowed from pandas. - max_columns: int = 20 - max_rows: int = 10 - precision: int = 6 - - # Options unique to BigQuery DataFrames. - progress_bar: Optional[str] = "auto" - repr_mode: Literal["head", "deferred", "anywidget"] = "head" - - max_colwidth: Optional[int] = 50 - max_info_columns: int = 100 - max_info_rows: Optional[int] = 200000 - memory_usage: bool = True - - blob_display: bool = True - blob_display_width: Optional[int] = None - blob_display_height: Optional[int] = None +DisplayOptions = vendored_pandas_config.DisplayOptions @contextlib.contextmanager -def pandas_repr(display_options: DisplayOptions): +def pandas_repr(display_options: vendored_pandas_config.DisplayOptions): """Use this when visualizing with pandas. This context manager makes sure we reset the pandas options when we're done diff --git a/bigframes/_config/global_options.py b/bigframes/_config/global_options.py new file mode 100644 index 0000000000..4a3da6d380 --- /dev/null +++ b/bigframes/_config/global_options.py @@ -0,0 +1,186 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Configuration for BigQuery DataFrames. Do not depend on other parts of BigQuery +DataFrames from this package. +""" + +from __future__ import annotations + +import copy +from dataclasses import dataclass, field +import threading +from typing import Optional + +import bigframes_vendored.pandas._config.config as pandas_config + +import bigframes._config.bigquery_options as bigquery_options +import bigframes._config.compute_options as compute_options +import bigframes._config.display_options as display_options +import bigframes._config.experiment_options as experiment_options +import bigframes._config.sampling_options as sampling_options + + +@dataclass +class ThreadLocalConfig(threading.local): + # If unset, global settings will be used + bigquery_options: Optional[bigquery_options.BigQueryOptions] = None + # Note: use default factory instead of default instance so each thread initializes to default values + display_options: display_options.DisplayOptions = field( + default_factory=display_options.DisplayOptions + ) + sampling_options: sampling_options.SamplingOptions = field( + default_factory=sampling_options.SamplingOptions + ) + compute_options: compute_options.ComputeOptions = field( + default_factory=compute_options.ComputeOptions + ) + experiment_options: experiment_options.ExperimentOptions = field( + default_factory=experiment_options.ExperimentOptions + ) + + +class Options: + """Global options affecting BigQuery DataFrames behavior. + + Do not construct directly. Instead, refer to + :attr:`bigframes.pandas.options`. + """ + + def __init__(self): + self.reset() + + def reset(self) -> Options: + """Reset the option settings to defaults. + + Returns: + bigframes._config.Options: Options object with default values. + """ + self._local = ThreadLocalConfig() + + # BigQuery options are special because they can only be set once per + # session, so we need an indicator as to whether we are using the + # thread-local session or the global session. + self._bigquery_options = bigquery_options.BigQueryOptions() + return self + + def _init_bigquery_thread_local(self): + """Initialize thread-local options, based on current global options.""" + + # Already thread-local, so don't reset any options that have been set + # already. No locks needed since this only modifies thread-local + # variables. + if self._local.bigquery_options is not None: + return + + self._local.bigquery_options = copy.deepcopy(self._bigquery_options) + self._local.bigquery_options._session_started = False + + @property + def bigquery(self) -> bigquery_options.BigQueryOptions: + """Options to use with the BigQuery engine. + + Returns: + bigframes._config.bigquery_options.BigQueryOptions: + Options for BigQuery engine. + """ + if self._local.bigquery_options is not None: + # The only way we can get here is if someone called + # _init_bigquery_thread_local. + return self._local.bigquery_options + + return self._bigquery_options + + @property + def display(self) -> display_options.DisplayOptions: + """Options controlling object representation. + + Returns: + bigframes._config.display_options.DisplayOptions: + Options for controlling object representation. + """ + return self._local.display_options + + @property + def sampling(self) -> sampling_options.SamplingOptions: + """Options controlling downsampling when downloading data + to memory. + + The data can be downloaded into memory explicitly + (e.g., to_pandas, to_numpy, values) or implicitly (e.g., + matplotlib plotting). This option can be overridden by + parameters in specific functions. + + Returns: + bigframes._config.sampling_options.SamplingOptions: + Options for controlling downsampling. + """ + return self._local.sampling_options + + @property + def compute(self) -> compute_options.ComputeOptions: + """Thread-local options controlling object computation. + + Returns: + bigframes._config.compute_options.ComputeOptions: + Thread-local options for controlling object computation + """ + return self._local.compute_options + + @property + def experiments(self) -> experiment_options.ExperimentOptions: + """Options controlling experiments + + Returns: + bigframes._config.experiment_options.ExperimentOptions: + Thread-local options for controlling experiments + """ + return self._local.experiment_options + + @property + def is_bigquery_thread_local(self) -> bool: + """Indicator that we're using a thread-local session. + + A thread-local session can be started by using + `with bigframes.option_context("bigquery.some_option", "some-value"):`. + + Returns: + bool: + A boolean value, where a value is True if a thread-local session + is in use; otherwise False. + """ + return self._local.bigquery_options is not None + + @property + def _allow_large_results(self) -> bool: + """The effective 'allow_large_results' setting. + + This value is `self.compute.allow_large_results` if set (not `None`), + otherwise it defaults to `self.bigquery.allow_large_results`. + + Returns: + bool: + Whether large query results are permitted. + - `True`: The BigQuery result size limit (e.g., 10 GB) is removed. + - `False`: Results are restricted to this limit (potentially faster). + BigQuery will raise an error if this limit is exceeded. + """ + if self.compute.allow_large_results is None: + return self.bigquery.allow_large_results + return self.compute.allow_large_results + + +options = Options() +option_context = pandas_config.option_context diff --git a/bigframes/_config/sampling_options.py b/bigframes/_config/sampling_options.py index ddb2a49713..107142c3ba 100644 --- a/bigframes/_config/sampling_options.py +++ b/bigframes/_config/sampling_options.py @@ -19,18 +19,46 @@ import dataclasses from typing import Literal, Optional -import bigframes_vendored.pandas.core.config_init as vendored_pandas_config - @dataclasses.dataclass class SamplingOptions: - __doc__ = vendored_pandas_config.sampling_options_doc + """ + Encapsulates the configuration for data sampling. + """ max_download_size: Optional[int] = 500 - # Enable downsampling + """ + Download size threshold in MB. Default 500. + + If value set to None, the download size won't be checked. + """ + enable_downsampling: bool = False + """ + Whether to enable downsampling. Default False. + + If max_download_size is exceeded when downloading data (e.g., to_pandas()), + the data will be downsampled if enable_downsampling is True, otherwise, an + error will be raised. + """ + sampling_method: Literal["head", "uniform"] = "uniform" + """ + Downsampling algorithms to be chosen from. Default "uniform". + + The choices are: "head": This algorithm returns a portion of the data from + the beginning. It is fast and requires minimal computations to perform the + downsampling.; "uniform": This algorithm returns uniform random samples of + the data. + """ + random_state: Optional[int] = None + """ + The seed for the uniform downsampling algorithm. Default None. + + If provided, the uniform method may take longer to execute and require more + computation. + """ def with_max_download_size(self, max_rows: Optional[int]) -> SamplingOptions: """Configures the maximum download size for data sampling in MB diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 0650953fc7..2edd3d71e9 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -18,7 +18,7 @@ import sys -from bigframes.bigquery._operations import ai +from bigframes.bigquery import ai from bigframes.bigquery._operations.approx_agg import approx_top_count from bigframes.bigquery._operations.array import ( array_agg, @@ -105,9 +105,54 @@ struct, ] -__all__ = [f.__name__ for f in _functions] + ["ai"] - _module = sys.modules[__name__] for f in _functions: _decorated_object = log_adapter.method_logger(f, custom_base_name="bigquery") setattr(_module, f.__name__, _decorated_object) + del f + +__all__ = [ + # approximate aggregate ops + "approx_top_count", + # array ops + "array_agg", + "array_length", + "array_to_string", + # datetime ops + "unix_micros", + "unix_millis", + "unix_seconds", + # geo ops + "st_area", + "st_buffer", + "st_centroid", + "st_convexhull", + "st_difference", + "st_distance", + "st_intersection", + "st_isclosed", + "st_length", + "st_regionstats", + "st_simplify", + # json ops + "json_extract", + "json_extract_array", + "json_extract_string_array", + "json_query", + "json_query_array", + "json_set", + "json_value", + "json_value_array", + "parse_json", + "to_json", + "to_json_string", + # search ops + "create_vector_index", + "vector_search", + # sql ops + "sql_scalar", + # struct ops + "struct", + # Modules / SQL namespaces + "ai", +] diff --git a/bigframes/bigquery/ai.py b/bigframes/bigquery/ai.py new file mode 100644 index 0000000000..3af52205a6 --- /dev/null +++ b/bigframes/bigquery/ai.py @@ -0,0 +1,39 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module integrates BigQuery built-in AI functions for use with Series/DataFrame objects, +such as AI.GENERATE_BOOL: +https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-generate-bool""" + +from bigframes.bigquery._operations.ai import ( + classify, + forecast, + generate, + generate_bool, + generate_double, + generate_int, + if_, + score, +) + +__all__ = [ + "classify", + "forecast", + "generate", + "generate_bool", + "generate_double", + "generate_int", + "if_", + "score", +] diff --git a/bigframes/enums.py b/bigframes/enums.py index fd7b5545bb..aa5e1c830f 100644 --- a/bigframes/enums.py +++ b/bigframes/enums.py @@ -21,7 +21,7 @@ class OrderingMode(enum.Enum): - """[Preview] Values used to determine the ordering mode. + """Values used to determine the ordering mode. Default is 'strict'. """ @@ -37,5 +37,6 @@ class DefaultIndexKind(enum.Enum): #: ``n - 3``, ``n - 2``, ``n - 1``, where ``n`` is the number of items in #: the index. SEQUENTIAL_INT64 = enum.auto() + # A completely null index incapable of indexing or alignment. NULL = enum.auto() diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index 1fb86d7bd6..9facb40e8e 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -127,7 +127,9 @@ class FunctionPackageVersionWarning(PreviewWarning): def format_message(message: str, fill: bool = True): - """Formats a warning message with ANSI color codes for the warning color. + """[Private] Formats a warning message. + + :meta private: Args: message: The warning message string. diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index c36457d0b5..fe468cb28f 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -15,10 +15,12 @@ """ Wraps primitives for machine learning with BQML -This library is an evolving attempt to -- implement BigQuery DataFrames API for BQML -- follow as close as possible the API design of SKLearn +This library is an evolving attempt to: + +* implement BigQuery DataFrames API for BQML +* follow as close as possible the API design of SKLearn https://arxiv.org/pdf/1309.0238.pdf + """ import abc @@ -46,12 +48,16 @@ class BaseEstimator(bigframes_vendored.sklearn.base.BaseEstimator, abc.ABC): assumed to be the list of hyperparameters. All descendents of this class should implement: + + .. code-block:: python + def __init__(self, hyperparameter_1=default_1, hyperparameter_2=default_2, hyperparameter3, ...): '''Set hyperparameters''' self.hyperparameter_1 = hyperparameter_1 self.hyperparameter_2 = hyperparameter_2 self.hyperparameter3 = hyperparameter3 ... + Note: the object variable names must be exactly the same with parameter names. In order to utilize __repr__. fit(X, y) method is optional. diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 7b633f6dc8..e4d82b8884 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -306,11 +306,22 @@ def clean_up_by_session_id( # pandas dtype attributes NA = pandas.NA +"""Alias for :class:`pandas.NA`.""" + BooleanDtype = pandas.BooleanDtype +"""Alias for :class:`pandas.BooleanDtype`.""" + Float64Dtype = pandas.Float64Dtype +"""Alias for :class:`pandas.Float64Dtype`.""" + Int64Dtype = pandas.Int64Dtype +"""Alias for :class:`pandas.Int64Dtype`.""" + StringDtype = pandas.StringDtype +"""Alias for :class:`pandas.StringDtype`.""" + ArrowDtype = pandas.ArrowDtype +"""Alias for :class:`pandas.ArrowDtype`.""" # Class aliases # TODO(swast): Make these real classes so we can refer to these in type diff --git a/bigframes/streaming/__init__.py b/bigframes/streaming/__init__.py index d439d622a2..477c7a99e0 100644 --- a/bigframes/streaming/__init__.py +++ b/bigframes/streaming/__init__.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + import inspect +import sys +from bigframes.core import log_adapter import bigframes.core.global_session as global_session from bigframes.pandas.io.api import _set_default_session_location_if_possible import bigframes.session @@ -32,3 +36,12 @@ def read_gbq_table(table: str) -> streaming_dataframe.StreamingDataFrame: ) StreamingDataFrame = streaming_dataframe.StreamingDataFrame + +_module = sys.modules[__name__] +_functions = [read_gbq_table] + +for _function in _functions: + _decorated_object = log_adapter.method_logger(_function, custom_base_name="pandas") + setattr(_module, _function.__name__, _decorated_object) + +__all__ = ["read_gbq_table", "StreamingDataFrame"] diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst new file mode 120000 index 0000000000..bd84850996 --- /dev/null +++ b/docs/_templates/autosummary/class.rst @@ -0,0 +1 @@ +../../../third_party/sphinx/ext/autosummary/templates/autosummary/class.rst \ No newline at end of file diff --git a/docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst new file mode 120000 index 0000000000..f330261ac5 --- /dev/null +++ b/docs/_templates/autosummary/module.rst @@ -0,0 +1 @@ +../../../third_party/sphinx/ext/autosummary/templates/autosummary/module.rst \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 9d9e9ebd79..2fc97bc1d0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -65,7 +65,8 @@ autoclass_content = "both" autodoc_default_options = {"members": True} autosummary_generate = True - +autosummary_imported_members = True +autosummary_ignore_module_all = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -369,7 +370,7 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("https://python.readthedocs.org/en/latest/", None), + "python": ("https://docs.python.org/3/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ( "https://googleapis.dev/python/google-api-core/latest/", diff --git a/docs/reference/.gitignore b/docs/reference/.gitignore new file mode 100644 index 0000000000..3f12795483 --- /dev/null +++ b/docs/reference/.gitignore @@ -0,0 +1 @@ +api/* diff --git a/docs/reference/bigframes.bigquery/ai.rst b/docs/reference/bigframes.bigquery/ai.rst deleted file mode 100644 index 2134125d6f..0000000000 --- a/docs/reference/bigframes.bigquery/ai.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.bigquery.ai -============================= - -.. automodule:: bigframes.bigquery._operations.ai - :members: - :inherited-members: - :undoc-members: \ No newline at end of file diff --git a/docs/reference/bigframes.bigquery/index.rst b/docs/reference/bigframes.bigquery/index.rst deleted file mode 100644 index f9d34f379d..0000000000 --- a/docs/reference/bigframes.bigquery/index.rst +++ /dev/null @@ -1,13 +0,0 @@ - -=========================== -BigQuery Built-in Functions -=========================== - -.. automodule:: bigframes.bigquery - :members: - :undoc-members: - -.. toctree:: - :maxdepth: 2 - - ai diff --git a/docs/reference/bigframes.geopandas/geoseries.rst b/docs/reference/bigframes.geopandas/geoseries.rst deleted file mode 100644 index 481eb73b9d..0000000000 --- a/docs/reference/bigframes.geopandas/geoseries.rst +++ /dev/null @@ -1,17 +0,0 @@ - -========= -GeoSeries -========= - -.. contents:: Table of Contents - :depth: 2 - :local: - :backlinks: none - -GeoSeries ---------- - -.. autoclass:: bigframes.geopandas.GeoSeries - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.geopandas/index.rst b/docs/reference/bigframes.geopandas/index.rst deleted file mode 100644 index e33946461c..0000000000 --- a/docs/reference/bigframes.geopandas/index.rst +++ /dev/null @@ -1,9 +0,0 @@ - -=============================== -BigQuery DataFrames (geopandas) -=============================== - -.. toctree:: - :maxdepth: 2 - - geoseries diff --git a/docs/reference/bigframes.ml/README.rst b/docs/reference/bigframes.ml/README.rst deleted file mode 100644 index 80a1fe97b7..0000000000 --- a/docs/reference/bigframes.ml/README.rst +++ /dev/null @@ -1,125 +0,0 @@ -BigQuery DataFrames ML -====================== - -As BigQuery DataFrames implements the Pandas API over top of BigQuery, BigQuery -DataFrame ML implements the SKLearn API over top of BigQuery Machine Learning. - -Tutorial --------- - -Start a session and initialize a dataframe for a BigQuery table - -.. code-block:: python - - import bigframes.pandas - - df = bigframes.pandas.read_gbq("bigquery-public-data.ml_datasets.penguins") - df - -Clean and prepare the data - -.. code-block:: python - - # filter down to the data we want to analyze - adelie_data = df[df.species == "Adelie Penguin (Pygoscelis adeliae)"] - - # drop the columns we don't care about - adelie_data = adelie_data.drop(columns=["species"]) - - # drop rows with nulls to get our training data - training_data = adelie_data.dropna() - - # take a peek at the training data - training_data - -.. code-block:: python - - # pick feature columns and label column - X = training_data[['island', 'culmen_length_mm', 'culmen_depth_mm', 'flipper_length_mm', 'sex']] - y = training_data[['body_mass_g']] - -Use train_test_split to create train and test datasets - -.. code-block:: python - - from bigframes.ml.model_selection import train_test_split - - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2) - -Define the model training pipeline - -.. code-block:: python - - from bigframes.ml.linear_model import LinearRegression - from bigframes.ml.pipeline import Pipeline - from bigframes.ml.compose import ColumnTransformer - from bigframes.ml.preprocessing import StandardScaler, OneHotEncoder - - preprocessing = ColumnTransformer([ - ("onehot", OneHotEncoder(), ["island", "species", "sex"]), - ("scaler", StandardScaler(), ["culmen_depth_mm", "culmen_length_mm", "flipper_length_mm"]), - ]) - - model = LinearRegression(fit_intercept=False) - - pipeline = Pipeline([ - ('preproc', preprocessing), - ('linreg', model) - ]) - - # view the pipeline - pipeline - -Train the pipeline - -.. code-block:: python - - pipeline.fit(X_train, y_train) - -Evaluate the model's performance on the test data - -.. code-block:: python - - from bigframes.ml.metrics import r2_score - - y_pred = pipeline.predict(X_test) - - r2_score(y_test, y_pred) - -Make predictions on new data - -.. code-block:: python - - import pandas - - new_penguins = bigframes.pandas.read_pandas( - pandas.DataFrame( - { - "tag_number": [1633, 1672, 1690], - "species": [ - "Adelie Penguin (Pygoscelis adeliae)", - "Adelie Penguin (Pygoscelis adeliae)", - "Adelie Penguin (Pygoscelis adeliae)", - ], - "island": ["Torgersen", "Torgersen", "Dream"], - "culmen_length_mm": [39.5, 38.5, 37.9], - "culmen_depth_mm": [18.8, 17.2, 18.1], - "flipper_length_mm": [196.0, 181.0, 188.0], - "sex": ["MALE", "FEMALE", "FEMALE"], - } - ).set_index("tag_number") - ) - - # view the new data - new_penguins - -.. code-block:: python - - pipeline.predict(new_penguins) - -Save the trained model to BigQuery, so we can load it later - -.. code-block:: python - - pipeline.to_gbq("bqml_tutorial.penguins_model", replace=True) diff --git a/docs/reference/bigframes.ml/cluster.rst b/docs/reference/bigframes.ml/cluster.rst deleted file mode 100644 index e91a28c051..0000000000 --- a/docs/reference/bigframes.ml/cluster.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.cluster -==================== - -.. automodule:: bigframes.ml.cluster - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/compose.rst b/docs/reference/bigframes.ml/compose.rst deleted file mode 100644 index 9992728362..0000000000 --- a/docs/reference/bigframes.ml/compose.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.compose -==================== - -.. automodule:: bigframes.ml.compose - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/decomposition.rst b/docs/reference/bigframes.ml/decomposition.rst deleted file mode 100644 index ec804ac8cd..0000000000 --- a/docs/reference/bigframes.ml/decomposition.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.decomposition -========================== - -.. automodule:: bigframes.ml.decomposition - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/ensemble.rst b/docs/reference/bigframes.ml/ensemble.rst deleted file mode 100644 index 2652ab5aa4..0000000000 --- a/docs/reference/bigframes.ml/ensemble.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.ensemble -===================== - -.. automodule:: bigframes.ml.ensemble - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/forecasting.rst b/docs/reference/bigframes.ml/forecasting.rst deleted file mode 100644 index 04015c9911..0000000000 --- a/docs/reference/bigframes.ml/forecasting.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.forecasting -======================== - -.. automodule:: bigframes.ml.forecasting - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/imported.rst b/docs/reference/bigframes.ml/imported.rst deleted file mode 100644 index c151cbda6f..0000000000 --- a/docs/reference/bigframes.ml/imported.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.imported -===================== - -.. automodule:: bigframes.ml.imported - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/impute.rst b/docs/reference/bigframes.ml/impute.rst deleted file mode 100644 index 3796e287ef..0000000000 --- a/docs/reference/bigframes.ml/impute.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.impute -========================== - -.. automodule:: bigframes.ml.impute - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/index.rst b/docs/reference/bigframes.ml/index.rst deleted file mode 100644 index c14efaede6..0000000000 --- a/docs/reference/bigframes.ml/index.rst +++ /dev/null @@ -1,38 +0,0 @@ -.. _bigframes_ml: -.. include:: README.rst - -API Reference -------------- - -.. toctree:: - :maxdepth: 3 - - cluster - - compose - - decomposition - - ensemble - - forecasting - - imported - - impute - - linear_model - - llm - - metrics - - metrics.pairwise - - model_selection - - pipeline - - preprocessing - - remote diff --git a/docs/reference/bigframes.ml/linear_model.rst b/docs/reference/bigframes.ml/linear_model.rst deleted file mode 100644 index 8c6c2765b1..0000000000 --- a/docs/reference/bigframes.ml/linear_model.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.linear_model -========================= - -.. automodule:: bigframes.ml.linear_model - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/llm.rst b/docs/reference/bigframes.ml/llm.rst deleted file mode 100644 index 20ae7793e7..0000000000 --- a/docs/reference/bigframes.ml/llm.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.llm -================ - -.. automodule:: bigframes.ml.llm - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/metrics.pairwise.rst b/docs/reference/bigframes.ml/metrics.pairwise.rst deleted file mode 100644 index c20772ef07..0000000000 --- a/docs/reference/bigframes.ml/metrics.pairwise.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.metrics.pairwise -============================= - -.. automodule:: bigframes.ml.metrics.pairwise - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/metrics.rst b/docs/reference/bigframes.ml/metrics.rst deleted file mode 100644 index aca11f7e9f..0000000000 --- a/docs/reference/bigframes.ml/metrics.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.metrics -==================== - -.. automodule:: bigframes.ml.metrics - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/model_selection.rst b/docs/reference/bigframes.ml/model_selection.rst deleted file mode 100644 index d662285f99..0000000000 --- a/docs/reference/bigframes.ml/model_selection.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.model_selection -============================ - -.. automodule:: bigframes.ml.model_selection - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/pipeline.rst b/docs/reference/bigframes.ml/pipeline.rst deleted file mode 100644 index 22e877dc5b..0000000000 --- a/docs/reference/bigframes.ml/pipeline.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.pipeline -===================== - -.. automodule:: bigframes.ml.pipeline - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/preprocessing.rst b/docs/reference/bigframes.ml/preprocessing.rst deleted file mode 100644 index eac72da173..0000000000 --- a/docs/reference/bigframes.ml/preprocessing.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.preprocessing -========================== - -.. automodule:: bigframes.ml.preprocessing - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.ml/remote.rst b/docs/reference/bigframes.ml/remote.rst deleted file mode 100644 index 7827acfe92..0000000000 --- a/docs/reference/bigframes.ml/remote.rst +++ /dev/null @@ -1,7 +0,0 @@ -bigframes.ml.remote -=================== - -.. automodule:: bigframes.ml.remote - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.pandas/frame.rst b/docs/reference/bigframes.pandas/frame.rst deleted file mode 100644 index ea4c6dec1c..0000000000 --- a/docs/reference/bigframes.pandas/frame.rst +++ /dev/null @@ -1,44 +0,0 @@ - -========= -DataFrame -========= - -.. contents:: Table of Contents - :depth: 2 - :local: - :backlinks: none - -DataFrame ---------- - -.. autoclass:: bigframes.dataframe.DataFrame - :members: - :inherited-members: - :undoc-members: - -Accessors ---------- - -Plotting handling -^^^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.plotting.PlotAccessor - :members: - :inherited-members: - :undoc-members: - -Struct handling -^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.structs.StructFrameAccessor - :members: - :inherited-members: - :undoc-members: - -AI operators -^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.ai.AIAccessor - :members: - :inherited-members: - :undoc-members: \ No newline at end of file diff --git a/docs/reference/bigframes.pandas/general_functions.rst b/docs/reference/bigframes.pandas/general_functions.rst deleted file mode 100644 index fff1a9ef59..0000000000 --- a/docs/reference/bigframes.pandas/general_functions.rst +++ /dev/null @@ -1,9 +0,0 @@ - -================= -General functions -================= - -.. automodule:: bigframes.pandas - :members: - :undoc-members: - :noindex: diff --git a/docs/reference/bigframes.pandas/groupby.rst b/docs/reference/bigframes.pandas/groupby.rst deleted file mode 100644 index 483340f348..0000000000 --- a/docs/reference/bigframes.pandas/groupby.rst +++ /dev/null @@ -1,20 +0,0 @@ - -======= -GroupBy -======= - -DataFrameGroupBy ----------------- - -.. autoclass:: bigframes.core.groupby.DataFrameGroupBy - :members: - :inherited-members: - :undoc-members: - -SeriesGroupBy -------------- - -.. autoclass:: bigframes.core.groupby.SeriesGroupBy - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.pandas/index.rst b/docs/reference/bigframes.pandas/index.rst deleted file mode 100644 index 3492f236ee..0000000000 --- a/docs/reference/bigframes.pandas/index.rst +++ /dev/null @@ -1,16 +0,0 @@ - -============================ -BigQuery DataFrames (pandas) -============================ - -.. toctree:: - :maxdepth: 2 - - general_functions - series - frame - indexers - indexing - window - groupby - options diff --git a/docs/reference/bigframes.pandas/indexers.rst b/docs/reference/bigframes.pandas/indexers.rst deleted file mode 100644 index 602b6de837..0000000000 --- a/docs/reference/bigframes.pandas/indexers.rst +++ /dev/null @@ -1,60 +0,0 @@ - -========= -Indexers -========= - -AtDataFrameIndexer --------------------- -.. autoclass:: bigframes.core.indexers.AtDataFrameIndexer - :members: - :inherited-members: - :undoc-members: - -AtSeriesIndexer --------------------- -.. autoclass:: bigframes.core.indexers.AtSeriesIndexer - :members: - :inherited-members: - :undoc-members: - -IatDataFrameIndexer --------------------- -.. autoclass:: bigframes.core.indexers.IatDataFrameIndexer - :members: - :inherited-members: - :undoc-members: - -IatSeriesIndexer --------------------- -.. autoclass:: bigframes.core.indexers.IatSeriesIndexer - :members: - :inherited-members: - :undoc-members: - -ILocDataFrameIndexer --------------------- -.. autoclass:: bigframes.core.indexers.ILocDataFrameIndexer - :members: - :inherited-members: - :undoc-members: - -IlocSeriesIndexer ------------------ -.. autoclass:: bigframes.core.indexers.IlocSeriesIndexer - :members: - :inherited-members: - :undoc-members: - -LocDataFrameIndexer -------------------- -.. autoclass:: bigframes.core.indexers.LocDataFrameIndexer - :members: - :inherited-members: - :undoc-members: - -LocSeriesIndexer ----------------- -.. autoclass:: bigframes.core.indexers.LocSeriesIndexer - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.pandas/indexing.rst b/docs/reference/bigframes.pandas/indexing.rst deleted file mode 100644 index e25e8652ec..0000000000 --- a/docs/reference/bigframes.pandas/indexing.rst +++ /dev/null @@ -1,21 +0,0 @@ - -============= -Index objects -============= - -.. autoclass:: bigframes.core.indexes.base.Index - :members: - :inherited-members: - :undoc-members: - - -.. autoclass:: bigframes.core.indexes.multi.MultiIndex - :members: - :inherited-members: - :undoc-members: - - -.. autoclass:: bigframes.core.indexes.datetimes.DatetimeIndex - :members: - :inherited-members: - :undoc-members: \ No newline at end of file diff --git a/docs/reference/bigframes.pandas/options.rst b/docs/reference/bigframes.pandas/options.rst deleted file mode 100644 index 60af8c826a..0000000000 --- a/docs/reference/bigframes.pandas/options.rst +++ /dev/null @@ -1,6 +0,0 @@ - -==================== -Options and settings -==================== - -``bigframes.pandas.options`` is an alias for :data:`bigframes.options`. diff --git a/docs/reference/bigframes.pandas/series.rst b/docs/reference/bigframes.pandas/series.rst deleted file mode 100644 index 41b1529b0c..0000000000 --- a/docs/reference/bigframes.pandas/series.rst +++ /dev/null @@ -1,69 +0,0 @@ - -====== -Series -====== - -.. contents:: Table of Contents - :depth: 2 - :local: - :backlinks: none - -Series ------- - -.. autoclass:: bigframes.series.Series - :members: - :inherited-members: - :undoc-members: - -Accessors ---------- - -Datetime properties -^^^^^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.datetimes.DatetimeMethods - :members: - :inherited-members: - :undoc-members: - -String handling -^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.strings.StringMethods - :members: - :inherited-members: - :undoc-members: - -List handling -^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.lists.ListAccessor - :members: - :inherited-members: - :undoc-members: - -Struct handling -^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.structs.StructAccessor - :members: - :inherited-members: - :undoc-members: - -Blob handling -^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.blob.BlobAccessor - :members: - :inherited-members: - :undoc-members: - -Plotting handling -^^^^^^^^^^^^^^^^^ - -.. autoclass:: bigframes.operations.plotting.PlotAccessor - :members: - :inherited-members: - :undoc-members: - :noindex: diff --git a/docs/reference/bigframes.pandas/window.rst b/docs/reference/bigframes.pandas/window.rst deleted file mode 100644 index 55d911ecf4..0000000000 --- a/docs/reference/bigframes.pandas/window.rst +++ /dev/null @@ -1,9 +0,0 @@ - -====== -Window -====== - -.. autoclass:: bigframes.core.window.Window - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes.streaming/dataframe.rst b/docs/reference/bigframes.streaming/dataframe.rst deleted file mode 100644 index 79ec64961c..0000000000 --- a/docs/reference/bigframes.streaming/dataframe.rst +++ /dev/null @@ -1,6 +0,0 @@ -bigframes.streaming.dataframe -============================= - -.. autoclass:: bigframes.streaming.dataframe.StreamingDataFrame - :members: - :inherited-members: diff --git a/docs/reference/bigframes.streaming/index.rst b/docs/reference/bigframes.streaming/index.rst deleted file mode 100644 index 20a22072e5..0000000000 --- a/docs/reference/bigframes.streaming/index.rst +++ /dev/null @@ -1,13 +0,0 @@ - -============================ -BigQuery DataFrame Streaming -============================ - -.. automodule:: bigframes.streaming - :members: - :undoc-members: - -.. toctree:: - :maxdepth: 2 - - dataframe diff --git a/docs/reference/bigframes/enums.rst b/docs/reference/bigframes/enums.rst deleted file mode 100644 index b0a198e184..0000000000 --- a/docs/reference/bigframes/enums.rst +++ /dev/null @@ -1,8 +0,0 @@ - -===== -Enums -===== - -.. automodule:: bigframes.enums - :members: - :undoc-members: diff --git a/docs/reference/bigframes/exceptions.rst b/docs/reference/bigframes/exceptions.rst deleted file mode 100644 index c471aecdf7..0000000000 --- a/docs/reference/bigframes/exceptions.rst +++ /dev/null @@ -1,8 +0,0 @@ - -======================= -Exceptions and Warnings -======================= - -.. automodule:: bigframes.exceptions - :members: - :undoc-members: diff --git a/docs/reference/bigframes/index.rst b/docs/reference/bigframes/index.rst deleted file mode 100644 index f56883dc8e..0000000000 --- a/docs/reference/bigframes/index.rst +++ /dev/null @@ -1,22 +0,0 @@ - -============ -Core objects -============ - -.. toctree:: - :maxdepth: 2 - - enums - exceptions - options - - -Session -------- - -.. autofunction:: bigframes.connect - -.. autoclass:: bigframes.session.Session - :members: - :inherited-members: - :undoc-members: diff --git a/docs/reference/bigframes/options.rst b/docs/reference/bigframes/options.rst deleted file mode 100644 index 991399eb88..0000000000 --- a/docs/reference/bigframes/options.rst +++ /dev/null @@ -1,16 +0,0 @@ -Options and settings -==================== - -.. currentmodule:: bigframes - -.. autodata:: options - -.. autoclass:: bigframes._config.Options - -.. autoclass:: bigframes._config.bigquery_options.BigQueryOptions - -.. autoclass:: bigframes._config.display_options.DisplayOptions - -.. autoclass:: bigframes._config.sampling_options.SamplingOptions - -.. autoclass:: bigframes._config.compute_options.ComputeOptions diff --git a/docs/reference/index.rst b/docs/reference/index.rst index a0f96f751a..7e94784a67 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -4,12 +4,38 @@ API Reference Refer to these pages for details about the public objects in the ``bigframes`` packages. -.. toctree:: - :maxdepth: 2 - - bigframes/index - bigframes.bigquery/index - bigframes.geopandas/index - bigframes.ml/index - bigframes.pandas/index - bigframes.streaming/index +.. autosummary:: + :toctree: api + + bigframes._config + bigframes.bigquery + bigframes.bigquery.ai + bigframes.enums + bigframes.exceptions + bigframes.geopandas + bigframes.pandas + bigframes.streaming + +ML APIs +~~~~~~~ + +BigQuery DataFrames provides many machine learning modules, inspired by +scikit-learn. + + +.. autosummary:: + :toctree: api + + bigframes.ml.cluster + bigframes.ml.compose + bigframes.ml.decomposition + bigframes.ml.ensemble + bigframes.ml.forecasting + bigframes.ml.imported + bigframes.ml.impute + bigframes.ml.linear_model + bigframes.ml.llm + bigframes.ml.model_selection + bigframes.ml.pipeline + bigframes.ml.preprocessing + bigframes.ml.remote diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index dc2b11ab94..194ec4a8a7 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -10,109 +10,147 @@ module is imported, register them here rather than in the module. """ + from __future__ import annotations -display_options_doc = """ -Encapsulates the configuration for displaying objects. +import dataclasses +from typing import Literal, Optional -**Examples:** -Define Repr mode to "deferred" will prevent job execution in repr. +@dataclasses.dataclass +class DisplayOptions: + """ + Encapsulates the configuration for displaying objects. - >>> import bigframes.pandas as bpd - >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + **Examples:** - >>> bpd.options.display.repr_mode = "deferred" - >>> df.head(20) # will no longer run the job - Computation deferred. Computation will process 28.9 kB + Define Repr mode to "deferred" will prevent job execution in repr. -Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. + >>> import bigframes.pandas as bpd + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - >>> df.query_job.total_bytes_processed - 28947 + >>> bpd.options.display.repr_mode = "deferred" + >>> df.head(20) # will no longer run the job + Computation deferred. Computation will process 28.9 kB -User can execute the job by calling .to_pandas() + Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. - >>> # df.to_pandas() + >>> df.query_job.total_bytes_processed + 28947 -Reset repr_mode option + User can execute the job by calling .to_pandas() - >>> bpd.options.display.repr_mode = "head" + >>> # df.to_pandas() -Can also set the progress_bar option to see the progress bar in terminal, + Reset repr_mode option - >>> bpd.options.display.progress_bar = "terminal" + >>> bpd.options.display.repr_mode = "head" -notebook, + Can also set the progress_bar option to see the progress bar in terminal, - >>> bpd.options.display.progress_bar = "notebook" + >>> bpd.options.display.progress_bar = "terminal" -or just remove it. + notebook, + >>> bpd.options.display.progress_bar = "notebook" -Setting to default value "auto" will detect and show progress bar automatically. + or just remove it. - >>> bpd.options.display.progress_bar = "auto" + Setting to default value "auto" will detect and show progress bar automatically. -Attributes: - max_columns (int, default 20): - If `max_columns` is exceeded, switch to truncate view. - max_rows (int, default 25): - If `max_rows` is exceeded, switch to truncate view. - progress_bar (Optional(str), default "auto"): - Determines if progress bars are shown during job runs. - Valid values are `auto`, `notebook`, and `terminal`. Set - to `None` to remove progress bars. - repr_mode (Literal[`head`, `deferred`]): - `head`: - Execute, download, and display results (limited to head) from - Dataframe and Series objects during repr. - `deferred`: - Prevent executions from repr statements in DataFrame and Series objects. - Instead, estimated bytes processed will be shown. DataFrame and Series - objects can still be computed with methods that explicitly execute and - download results. - max_info_columns (int): - max_info_columns is used in DataFrame.info method to decide if - information in each column will be printed. - max_info_rows (int or None): - df.info() will usually show null-counts for each column. - For large frames, this can be quite slow. max_info_rows and max_info_cols - limit this null check only to frames with smaller dimensions than - specified. - memory_usage (bool): - This specifies if the memory usage of a DataFrame should be displayed when - df.info() is called. Valid values True,False, - precision (int): - Controls the floating point output precision, similar to - `pandas.options.display.precision`. - blob_display (bool): - Whether to display the blob content in notebook DataFrame preview. Default True. - blob_display_width (int or None): - Width in pixels that the blob constrained to. - blob_display_height (int or None): - Height in pixels that the blob constrained to. -""" + >>> bpd.options.display.progress_bar = "auto" + """ -sampling_options_doc = """ -Encapsulates the configuration for data sampling. - -Attributes: - max_download_size (int, default 500): - Download size threshold in MB. If value set to None, the download size - won't be checked. - enable_downsampling (bool, default False): - Whether to enable downsampling, If max_download_size is exceeded when - downloading data (e.g., to_pandas()), the data will be downsampled - if enable_downsampling is True, otherwise, an error will be raised. - sampling_method (str, default "uniform"): - Downsampling algorithms to be chosen from, the choices are: - "head": This algorithm returns a portion of the data from - the beginning. It is fast and requires minimal computations - to perform the downsampling.; "uniform": This algorithm returns - uniform random samples of the data. - random_state (int, default None): - The seed for the uniform downsampling algorithm. If provided, - the uniform method may take longer to execute and require more - computation. -""" + # Options borrowed from pandas. + max_columns: int = 20 + """ + Maximum number of columns to display. Default 20. + + If `max_columns` is exceeded, switch to truncate view. + """ + + max_rows: int = 10 + """ + Maximum number of rows to display. Default 10. + + If `max_rows` is exceeded, switch to truncate view. + """ + + precision: int = 6 + """ + Controls the floating point output precision. Defaults to 6. + + See :attr:`pandas.options.display.precision`. + """ + + # Options unique to BigQuery DataFrames. + progress_bar: Optional[str] = "auto" + """ + Determines if progress bars are shown during job runs. Default "auto". + + Valid values are `auto`, `notebook`, and `terminal`. Set + to `None` to remove progress bars. + """ + + repr_mode: Literal["head", "deferred", "anywidget"] = "head" + """ + Determines how to display a DataFrame or Series. Default "head". + + `head` + Execute, download, and display results (limited to head) from + Dataframe and Series objects during repr. + + `deferred` + Prevent executions from repr statements in DataFrame and Series objects. + Instead, estimated bytes processed will be shown. DataFrame and Series + objects can still be computed with methods that explicitly execute and + download results. + """ + + max_colwidth: Optional[int] = 50 + """ + The maximum width in characters of a column in the repr. Default 50. + + When the column overflows, a "..." placeholder is embedded in the output. A + 'None' value means unlimited. + """ + + max_info_columns: int = 100 + """ + Used in DataFrame.info method to decide if information in each column will + be printed. Default 100. + """ + + max_info_rows: Optional[int] = 200_000 + """ + Limit null check in ``df.info()`` only to frames with smaller dimensions than + max_info_rows. Default 200,000. + + df.info() will usually show null-counts for each column. + For large frames, this can be quite slow. max_info_rows and max_info_cols + limit this null check only to frames with smaller dimensions than + specified. + """ + + memory_usage: bool = True + """ + If True, memory usage of a DataFrame should be displayed when + df.info() is called. Default True. + + Valid values True, False. + """ + + blob_display: bool = True + """ + If True, display the blob content in notebook DataFrame preview. Default + True. + """ + + blob_display_width: Optional[int] = None + """ + Width in pixels that the blob constrained to. Default None.. + """ + blob_display_height: Optional[int] = None + """ + Height in pixels that the blob constrained to. Default None.. + """ diff --git a/third_party/sphinx/LICENSE.rst b/third_party/sphinx/LICENSE.rst new file mode 100644 index 0000000000..de3688cd2c --- /dev/null +++ b/third_party/sphinx/LICENSE.rst @@ -0,0 +1,31 @@ +License for Sphinx +================== + +Unless otherwise indicated, all code in the Sphinx project is licenced under the +two clause BSD licence below. + +Copyright (c) 2007-2025 by the Sphinx team (see AUTHORS file). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/sphinx/ext/autosummary/templates/autosummary/class.rst b/third_party/sphinx/ext/autosummary/templates/autosummary/class.rst new file mode 100644 index 0000000000..89550cb386 --- /dev/null +++ b/third_party/sphinx/ext/autosummary/templates/autosummary/class.rst @@ -0,0 +1,31 @@ +{{ fullname | escape | underline}} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :no-members: + + {% block methods %} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + :toctree: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + :toctree: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/third_party/sphinx/ext/autosummary/templates/autosummary/module.rst b/third_party/sphinx/ext/autosummary/templates/autosummary/module.rst new file mode 100644 index 0000000000..98d86d1523 --- /dev/null +++ b/third_party/sphinx/ext/autosummary/templates/autosummary/module.rst @@ -0,0 +1,57 @@ +{{ fullname | escape | underline}} + +.. + Originally at + https://github.com/sphinx-doc/sphinx/blob/master/sphinx/ext/autosummary/templates/autosummary/module.rst + with modifications to support recursive generation from + https://github.com/sphinx-doc/sphinx/issues/7912 + +.. automodule:: {{ fullname }} + :no-members: + + {% block functions %} + {%- if functions %} + .. rubric:: {{ _('Functions') }} + + .. autosummary:: + :toctree: + {% for item in functions %} + {{ item }} + {%- endfor %} + {% endif %} + {%- endblock %} + + {%- block classes %} + {%- if classes %} + .. rubric:: {{ _('Classes') }} + + .. autosummary:: + :toctree: + {% for item in classes %}{% if item not in attributes %} + {{ item }} + {% endif %}{%- endfor %} + {% endif %} + {%- endblock %} + + {%- block exceptions %} + {%- if exceptions %} + .. rubric:: {{ _('Exceptions') }} + + .. autosummary:: + :toctree: + {% for item in exceptions %} + {{ item }} + {%- endfor %} + {% endif %} + {%- endblock %} + +{%- block attributes %} +{%- if attributes %} +.. rubric:: {{ _('Module Attributes') }} + +{% for item in attributes %} +.. autoattribute:: {{ fullname }}.{{ item }} + :no-index: +{% endfor %} +{% endif %} +{%- endblock %}