|
10 | 10 | module is imported, register them here rather than in the module. |
11 | 11 |
|
12 | 12 | """ |
| 13 | + |
13 | 14 | from __future__ import annotations |
14 | 15 |
|
15 | | -display_options_doc = """ |
16 | | -Encapsulates the configuration for displaying objects. |
| 16 | +import dataclasses |
| 17 | +from typing import Literal, Optional |
17 | 18 |
|
18 | | -**Examples:** |
19 | 19 |
|
20 | | -Define Repr mode to "deferred" will prevent job execution in repr. |
| 20 | +@dataclasses.dataclass |
| 21 | +class DisplayOptions: |
| 22 | + """ |
| 23 | + Encapsulates the configuration for displaying objects. |
21 | 24 |
|
22 | | - >>> import bigframes.pandas as bpd |
23 | | - >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") |
| 25 | + **Examples:** |
24 | 26 |
|
25 | | - >>> bpd.options.display.repr_mode = "deferred" |
26 | | - >>> df.head(20) # will no longer run the job |
27 | | - Computation deferred. Computation will process 28.9 kB |
| 27 | + Define Repr mode to "deferred" will prevent job execution in repr. |
28 | 28 |
|
29 | | -Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. |
| 29 | + >>> import bigframes.pandas as bpd |
| 30 | + >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") |
30 | 31 |
|
31 | | - >>> df.query_job.total_bytes_processed |
32 | | - 28947 |
| 32 | + >>> bpd.options.display.repr_mode = "deferred" |
| 33 | + >>> df.head(20) # will no longer run the job |
| 34 | + Computation deferred. Computation will process 28.9 kB |
33 | 35 |
|
34 | | -User can execute the job by calling .to_pandas() |
| 36 | + Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. |
35 | 37 |
|
36 | | - >>> # df.to_pandas() |
| 38 | + >>> df.query_job.total_bytes_processed |
| 39 | + 28947 |
37 | 40 |
|
38 | | -Reset repr_mode option |
| 41 | + User can execute the job by calling .to_pandas() |
39 | 42 |
|
40 | | - >>> bpd.options.display.repr_mode = "head" |
| 43 | + >>> # df.to_pandas() |
41 | 44 |
|
42 | | -Can also set the progress_bar option to see the progress bar in terminal, |
| 45 | + Reset repr_mode option |
43 | 46 |
|
44 | | - >>> bpd.options.display.progress_bar = "terminal" |
| 47 | + >>> bpd.options.display.repr_mode = "head" |
45 | 48 |
|
46 | | -notebook, |
| 49 | + Can also set the progress_bar option to see the progress bar in terminal, |
47 | 50 |
|
48 | | - >>> bpd.options.display.progress_bar = "notebook" |
| 51 | + >>> bpd.options.display.progress_bar = "terminal" |
49 | 52 |
|
50 | | -or just remove it. |
| 53 | + notebook, |
51 | 54 |
|
| 55 | + >>> bpd.options.display.progress_bar = "notebook" |
52 | 56 |
|
53 | | -Setting to default value "auto" will detect and show progress bar automatically. |
| 57 | + or just remove it. |
54 | 58 |
|
55 | | - >>> bpd.options.display.progress_bar = "auto" |
| 59 | + Setting to default value "auto" will detect and show progress bar automatically. |
56 | 60 |
|
57 | | -Attributes: |
58 | | - max_columns (int, default 20): |
59 | | - If `max_columns` is exceeded, switch to truncate view. |
60 | | - max_rows (int, default 25): |
61 | | - If `max_rows` is exceeded, switch to truncate view. |
62 | | - progress_bar (Optional(str), default "auto"): |
63 | | - Determines if progress bars are shown during job runs. |
64 | | - Valid values are `auto`, `notebook`, and `terminal`. Set |
65 | | - to `None` to remove progress bars. |
66 | | - repr_mode (Literal[`head`, `deferred`]): |
67 | | - `head`: |
68 | | - Execute, download, and display results (limited to head) from |
69 | | - Dataframe and Series objects during repr. |
70 | | - `deferred`: |
71 | | - Prevent executions from repr statements in DataFrame and Series objects. |
72 | | - Instead, estimated bytes processed will be shown. DataFrame and Series |
73 | | - objects can still be computed with methods that explicitly execute and |
74 | | - download results. |
75 | | - max_info_columns (int): |
76 | | - max_info_columns is used in DataFrame.info method to decide if |
77 | | - information in each column will be printed. |
78 | | - max_info_rows (int or None): |
79 | | - df.info() will usually show null-counts for each column. |
80 | | - For large frames, this can be quite slow. max_info_rows and max_info_cols |
81 | | - limit this null check only to frames with smaller dimensions than |
82 | | - specified. |
83 | | - memory_usage (bool): |
84 | | - This specifies if the memory usage of a DataFrame should be displayed when |
85 | | - df.info() is called. Valid values True,False, |
86 | | - precision (int): |
87 | | - Controls the floating point output precision, similar to |
88 | | - `pandas.options.display.precision`. |
89 | | - blob_display (bool): |
90 | | - Whether to display the blob content in notebook DataFrame preview. Default True. |
91 | | - blob_display_width (int or None): |
92 | | - Width in pixels that the blob constrained to. |
93 | | - blob_display_height (int or None): |
94 | | - Height in pixels that the blob constrained to. |
95 | | -""" |
| 61 | + >>> bpd.options.display.progress_bar = "auto" |
| 62 | + """ |
96 | 63 |
|
97 | | -sampling_options_doc = """ |
98 | | -Encapsulates the configuration for data sampling. |
99 | | -
|
100 | | -Attributes: |
101 | | - max_download_size (int, default 500): |
102 | | - Download size threshold in MB. If value set to None, the download size |
103 | | - won't be checked. |
104 | | - enable_downsampling (bool, default False): |
105 | | - Whether to enable downsampling, If max_download_size is exceeded when |
106 | | - downloading data (e.g., to_pandas()), the data will be downsampled |
107 | | - if enable_downsampling is True, otherwise, an error will be raised. |
108 | | - sampling_method (str, default "uniform"): |
109 | | - Downsampling algorithms to be chosen from, the choices are: |
110 | | - "head": This algorithm returns a portion of the data from |
111 | | - the beginning. It is fast and requires minimal computations |
112 | | - to perform the downsampling.; "uniform": This algorithm returns |
113 | | - uniform random samples of the data. |
114 | | - random_state (int, default None): |
115 | | - The seed for the uniform downsampling algorithm. If provided, |
116 | | - the uniform method may take longer to execute and require more |
117 | | - computation. |
118 | | -""" |
| 64 | + # Options borrowed from pandas. |
| 65 | + max_columns: int = 20 |
| 66 | + """ |
| 67 | + Maximum number of columns to display. Default 20. |
| 68 | +
|
| 69 | + If `max_columns` is exceeded, switch to truncate view. |
| 70 | + """ |
| 71 | + |
| 72 | + max_rows: int = 10 |
| 73 | + """ |
| 74 | + Maximum number of rows to display. Default 10. |
| 75 | +
|
| 76 | + If `max_rows` is exceeded, switch to truncate view. |
| 77 | + """ |
| 78 | + |
| 79 | + precision: int = 6 |
| 80 | + """ |
| 81 | + Controls the floating point output precision. Defaults to 6. |
| 82 | +
|
| 83 | + See :attr:`pandas.options.display.precision`. |
| 84 | + """ |
| 85 | + |
| 86 | + # Options unique to BigQuery DataFrames. |
| 87 | + progress_bar: Optional[str] = "auto" |
| 88 | + """ |
| 89 | + Determines if progress bars are shown during job runs. Default "auto". |
| 90 | +
|
| 91 | + Valid values are `auto`, `notebook`, and `terminal`. Set |
| 92 | + to `None` to remove progress bars. |
| 93 | + """ |
| 94 | + |
| 95 | + repr_mode: Literal["head", "deferred", "anywidget"] = "head" |
| 96 | + """ |
| 97 | + Determines how to display a DataFrame or Series. Default "head". |
| 98 | +
|
| 99 | + `head` |
| 100 | + Execute, download, and display results (limited to head) from |
| 101 | + Dataframe and Series objects during repr. |
| 102 | +
|
| 103 | + `deferred` |
| 104 | + Prevent executions from repr statements in DataFrame and Series objects. |
| 105 | + Instead, estimated bytes processed will be shown. DataFrame and Series |
| 106 | + objects can still be computed with methods that explicitly execute and |
| 107 | + download results. |
| 108 | + """ |
| 109 | + |
| 110 | + max_colwidth: Optional[int] = 50 |
| 111 | + """ |
| 112 | + The maximum width in characters of a column in the repr. Default 50. |
| 113 | +
|
| 114 | + When the column overflows, a "..." placeholder is embedded in the output. A |
| 115 | + 'None' value means unlimited. |
| 116 | + """ |
| 117 | + |
| 118 | + max_info_columns: int = 100 |
| 119 | + """ |
| 120 | + Used in DataFrame.info method to decide if information in each column will |
| 121 | + be printed. Default 100. |
| 122 | + """ |
| 123 | + |
| 124 | + max_info_rows: Optional[int] = 200_000 |
| 125 | + """ |
| 126 | + Limit null check in ``df.info()`` only to frames with smaller dimensions than |
| 127 | + max_info_rows. Default 200,000. |
| 128 | +
|
| 129 | + df.info() will usually show null-counts for each column. |
| 130 | + For large frames, this can be quite slow. max_info_rows and max_info_cols |
| 131 | + limit this null check only to frames with smaller dimensions than |
| 132 | + specified. |
| 133 | + """ |
| 134 | + |
| 135 | + memory_usage: bool = True |
| 136 | + """ |
| 137 | + If True, memory usage of a DataFrame should be displayed when |
| 138 | + df.info() is called. Default True. |
| 139 | +
|
| 140 | + Valid values True, False. |
| 141 | + """ |
| 142 | + |
| 143 | + blob_display: bool = True |
| 144 | + """ |
| 145 | + If True, display the blob content in notebook DataFrame preview. Default |
| 146 | + True. |
| 147 | + """ |
| 148 | + |
| 149 | + blob_display_width: Optional[int] = None |
| 150 | + """ |
| 151 | + Width in pixels that the blob constrained to. Default None.. |
| 152 | + """ |
| 153 | + blob_display_height: Optional[int] = None |
| 154 | + """ |
| 155 | + Height in pixels that the blob constrained to. Default None.. |
| 156 | + """ |
0 commit comments