docs: use autosummary

tswast · tswast · commit 4ad474bbca36 · 2025-11-19T19:55:56.000Z
diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
@@ -29,7 +29,7 @@ class ComputeOptions:
         >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
 
         >>> bpd.options.compute.maximum_bytes_billed = 500
-        >>> # df.to_pandas() # this should fail
+        >>> df.to_pandas() # this should fail  # doctest: +SKIP
         google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required.
 
         >>> bpd.options.compute.maximum_bytes_billed = None  # reset option
diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py
@@ -15,38 +15,15 @@
 """Options for displaying objects."""
 
 import contextlib
-import dataclasses
-from typing import Literal, Optional
 
 import bigframes_vendored.pandas.core.config_init as vendored_pandas_config
 import pandas as pd
 
-
-@dataclasses.dataclass
-class DisplayOptions:
-    __doc__ = vendored_pandas_config.display_options_doc
-
-    # Options borrowed from pandas.
-    max_columns: int = 20
-    max_rows: int = 10
-    precision: int = 6
-
-    # Options unique to BigQuery DataFrames.
-    progress_bar: Optional[str] = "auto"
-    repr_mode: Literal["head", "deferred", "anywidget"] = "head"
-
-    max_colwidth: Optional[int] = 50
-    max_info_columns: int = 100
-    max_info_rows: Optional[int] = 200000
-    memory_usage: bool = True
-
-    blob_display: bool = True
-    blob_display_width: Optional[int] = None
-    blob_display_height: Optional[int] = None
+DisplayOptions = vendored_pandas_config.DisplayOptions
 
 
 @contextlib.contextmanager
-def pandas_repr(display_options: DisplayOptions):
+def pandas_repr(display_options: vendored_pandas_config.DisplayOptions):
     """Use this when visualizing with pandas.
 
     This context manager makes sure we reset the pandas options when we're done
diff --git a/bigframes/_config/sampling_options.py b/bigframes/_config/sampling_options.py
@@ -19,18 +19,46 @@
 import dataclasses
 from typing import Literal, Optional
 
-import bigframes_vendored.pandas.core.config_init as vendored_pandas_config
-
 
 @dataclasses.dataclass
 class SamplingOptions:
-    __doc__ = vendored_pandas_config.sampling_options_doc
+    """
+    Encapsulates the configuration for data sampling.
+    """
 
     max_download_size: Optional[int] = 500
-    # Enable downsampling
+    """
+    Download size threshold in MB. Default 500.
+
+    If value set to None, the download size won't be checked.
+    """
+
     enable_downsampling: bool = False
+    """
+    Whether to enable downsampling. Default False.
+
+    If max_download_size is exceeded when downloading data (e.g., to_pandas()),
+    the data will be downsampled if enable_downsampling is True, otherwise, an
+    error will be raised.
+    """
+
     sampling_method: Literal["head", "uniform"] = "uniform"
+    """
+    Downsampling algorithms to be chosen from. Default "uniform".
+
+    The choices are: "head": This algorithm returns a portion of the data from
+    the beginning. It is fast and requires minimal computations to perform the
+    downsampling.; "uniform": This algorithm returns uniform random samples of
+    the data.
+    """
+
     random_state: Optional[int] = None
+    """
+    The seed for the uniform downsampling algorithm. Default None.
+
+    If provided, the uniform method may take longer to execute and require more
+    computation.
+    """
 
     def with_max_download_size(self, max_rows: Optional[int]) -> SamplingOptions:
         """Configures the maximum download size for data sampling in MB
diff --git a/docs/conf.py b/docs/conf.py
@@ -370,7 +370,7 @@
 
 # Example configuration for intersphinx: refer to the Python standard library.
 intersphinx_mapping = {
-    "python": ("https://docs.python.org/3/objects.inv", None),
+    "python": ("https://docs.python.org/3/", None),
     "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None),
     "google.api_core": (
         "https://googleapis.dev/python/google-api-core/latest/",
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
@@ -8,22 +8,24 @@ packages.
     :toctree: api
 
     bigframes._config
+    bigframes.bigquery
+    bigframes.bigquery.ai
+    bigframes.enums
+    bigframes.exceptions
+    bigframes.geopandas
+    bigframes.pandas
+    bigframes.streaming
 
 ML APIs
 ~~~~~~~
 
 BigQuery DataFrames provides many machine learning modules, inspired by
 scikit-learn.
 
-.. code-block::
 
-    bigframes.bigquery
-    bigframes.bigquery.ai
-    bigframes.enums
-    bigframes.exceptions
-    bigframes.geopandas
-    bigframes.pandas
-    bigframes.streaming
+.. autosummary::
+    :toctree: api
+
     bigframes.ml.cluster
     bigframes.ml.compose
     bigframes.ml.decomposition
diff --git a/noxfile.py b/noxfile.py
@@ -534,7 +534,7 @@ def docs(session):
     )
     session.run(
         "sphinx-build",
-        # "-W",  # warnings as errors
+        "-W",  # warnings as errors
         "-T",  # show full traceback on exception
         "-N",  # no colors
         "-b",
diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -10,109 +10,147 @@
 module is imported, register them here rather than in the module.
 
 """
+
 from __future__ import annotations
 
-display_options_doc = """
-Encapsulates the configuration for displaying objects.
+import dataclasses
+from typing import Literal, Optional
 
-**Examples:**
 
-Define Repr mode to "deferred" will prevent job execution in repr.
+@dataclasses.dataclass
+class DisplayOptions:
+    """
+    Encapsulates the configuration for displaying objects.
 
-    >>> import bigframes.pandas as bpd
-    >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
+    **Examples:**
 
-    >>> bpd.options.display.repr_mode = "deferred"
-    >>> df.head(20) # will no longer run the job
-    Computation deferred. Computation will process 28.9 kB
+    Define Repr mode to "deferred" will prevent job execution in repr.
 
-Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect.
+        >>> import bigframes.pandas as bpd
+        >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
 
-    >>> df.query_job.total_bytes_processed
-    28947
+        >>> bpd.options.display.repr_mode = "deferred"
+        >>> df.head(20) # will no longer run the job
+        Computation deferred. Computation will process 28.9 kB
 
-User can execute the job by calling .to_pandas()
+    Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect.
 
-    >>> # df.to_pandas()
+        >>> df.query_job.total_bytes_processed
+        28947
 
-Reset repr_mode option
+    User can execute the job by calling .to_pandas()
 
-    >>> bpd.options.display.repr_mode = "head"
+        >>> # df.to_pandas()
 
-Can also set the progress_bar option to see the progress bar in terminal,
+    Reset repr_mode option
 
-    >>> bpd.options.display.progress_bar = "terminal"
+        >>> bpd.options.display.repr_mode = "head"
 
-notebook,
+    Can also set the progress_bar option to see the progress bar in terminal,
 
-    >>> bpd.options.display.progress_bar = "notebook"
+        >>> bpd.options.display.progress_bar = "terminal"
 
-or just remove it.
+    notebook,
 
+        >>> bpd.options.display.progress_bar = "notebook"
 
-Setting to default value "auto" will detect and show progress bar automatically.
+    or just remove it.
 
-    >>> bpd.options.display.progress_bar = "auto"
+    Setting to default value "auto" will detect and show progress bar automatically.
 
-Attributes:
-    max_columns (int, default 20):
-        If `max_columns` is exceeded, switch to truncate view.
-    max_rows (int, default 25):
-        If `max_rows` is exceeded, switch to truncate view.
-    progress_bar (Optional(str), default "auto"):
-        Determines if progress bars are shown during job runs.
-        Valid values are `auto`, `notebook`, and `terminal`. Set
-        to `None` to remove progress bars.
-    repr_mode (Literal[`head`, `deferred`]):
-        `head`:
-            Execute, download, and display results (limited to head) from
-            Dataframe and Series objects during repr.
-        `deferred`:
-            Prevent executions from repr statements in DataFrame and Series objects.
-            Instead, estimated bytes processed will be shown. DataFrame and Series
-            objects can still be computed with methods that explicitly execute and
-            download results.
-    max_info_columns (int):
-        max_info_columns is used in DataFrame.info method to decide if
-        information in each column will be printed.
-    max_info_rows (int or None):
-        df.info() will usually show null-counts for each column.
-        For large frames, this can be quite slow. max_info_rows and max_info_cols
-        limit this null check only to frames with smaller dimensions than
-        specified.
-    memory_usage (bool):
-        This specifies if the memory usage of a DataFrame should be displayed when
-        df.info() is called. Valid values True,False,
-    precision (int):
-        Controls the floating point output precision, similar to
-        `pandas.options.display.precision`.
-    blob_display (bool):
-        Whether to display the blob content in notebook DataFrame preview. Default True.
-    blob_display_width (int or None):
-        Width in pixels that the blob constrained to.
-    blob_display_height (int or None):
-        Height in pixels that the blob constrained to.
-"""
+        >>> bpd.options.display.progress_bar = "auto"
+    """
 
-sampling_options_doc = """
-Encapsulates the configuration for data sampling.
-
-Attributes:
-    max_download_size (int, default 500):
-        Download size threshold in MB. If value set to None, the download size
-        won't be checked.
-    enable_downsampling (bool, default False):
-        Whether to enable downsampling, If max_download_size is exceeded when
-        downloading data (e.g., to_pandas()), the data will be downsampled
-        if enable_downsampling is True, otherwise, an error will be raised.
-    sampling_method (str, default "uniform"):
-        Downsampling algorithms to be chosen from, the choices are:
-        "head": This algorithm returns a portion of the data from
-        the beginning. It is fast and requires minimal computations
-        to perform the downsampling.; "uniform": This algorithm returns
-        uniform random samples of the data.
-    random_state (int, default None):
-        The seed for the uniform downsampling algorithm. If provided,
-        the uniform method may take longer to execute and require more
-        computation.
-"""
+    # Options borrowed from pandas.
+    max_columns: int = 20
+    """
+    Maximum number of columns to display. Default 20.
+
+    If `max_columns` is exceeded, switch to truncate view.
+    """
+
+    max_rows: int = 10
+    """
+    Maximum number of rows to display. Default 10.
+
+    If `max_rows` is exceeded, switch to truncate view.
+    """
+
+    precision: int = 6
+    """
+    Controls the floating point output precision. Defaults to 6.
+
+    See :attr:`pandas.options.display.precision`.
+    """
+
+    # Options unique to BigQuery DataFrames.
+    progress_bar: Optional[str] = "auto"
+    """
+    Determines if progress bars are shown during job runs. Default "auto".
+
+    Valid values are `auto`, `notebook`, and `terminal`. Set
+    to `None` to remove progress bars.
+    """
+
+    repr_mode: Literal["head", "deferred", "anywidget"] = "head"
+    """
+    Determines how to display a DataFrame or Series. Default "head".
+
+    `head`
+        Execute, download, and display results (limited to head) from
+        Dataframe and Series objects during repr.
+
+    `deferred`
+        Prevent executions from repr statements in DataFrame and Series objects.
+        Instead, estimated bytes processed will be shown. DataFrame and Series
+        objects can still be computed with methods that explicitly execute and
+        download results.
+    """
+
+    max_colwidth: Optional[int] = 50
+    """
+    The maximum width in characters of a column in the repr. Default 50.
+
+    When the column overflows, a "..." placeholder is embedded in the output. A
+    'None' value means unlimited.
+    """
+
+    max_info_columns: int = 100
+    """
+    Used in DataFrame.info method to decide if information in each column will
+    be printed. Default 100.
+    """
+
+    max_info_rows: Optional[int] = 200_000
+    """
+    Limit null check in ``df.info()`` only to frames with smaller dimensions than
+    max_info_rows. Default 200,000.
+
+    df.info() will usually show null-counts for each column.
+    For large frames, this can be quite slow. max_info_rows and max_info_cols
+    limit this null check only to frames with smaller dimensions than
+    specified.
+    """
+
+    memory_usage: bool = True
+    """
+    If True, memory usage of a DataFrame should be displayed when
+    df.info() is called. Default True.
+
+    Valid values True, False.
+    """
+
+    blob_display: bool = True
+    """
+    If True, display the blob content in notebook DataFrame preview. Default
+    True.
+    """
+
+    blob_display_width: Optional[int] = None
+    """
+    Width in pixels that the blob constrained to. Default None..
+    """
+    blob_display_height: Optional[int] = None
+    """
+    Height in pixels that the blob constrained to. Default None..
+    """

Original file line number	Diff line number	Diff line change
`@@ -534,7 +534,7 @@ def docs(session):`
`534`	`534`	`)`
`535`	`535`	`session.run(`
`536`	`536`	`"sphinx-build",`
`537`		`- # "-W", # warnings as errors`
	`537`	`+ "-W", # warnings as errors`
`538`	`538`	`"-T", # show full traceback on exception`
`539`	`539`	`"-N", # no colors`
`540`	`540`	`"-b",`