Skip to content

Commit 49f32b8

Browse files
committed
Merge remote-tracking branch 'origin/main' into tswast-doctest-boilerplate
2 parents 9db2c19 + 8f27e73 commit 49f32b8

File tree

6 files changed

+108
-70
lines changed

6 files changed

+108
-70
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,4 @@ system_tests/local_test_setup
6262
# Make sure a generated file isn't accidentally committed.
6363
pylintrc
6464
pylintrc.test
65+
dummy.pkl

tests/system/small/bigquery/test_ai.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,6 @@ def test_ai_if_multi_model(session):
285285

286286
def test_ai_classify(session):
287287
s = bpd.Series(["cat", "orchid"], session=session)
288-
bpd.options.display.repr_mode = "deferred"
289288

290289
result = bbq.ai.classify(s, ["animal", "plant"])
291290

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6743,7 +6743,7 @@ def query(self, expr: str) -> DataFrame | None:
67436743

67446744
def interpolate(self, method: str = "linear"):
67456745
"""
6746-
Fill NaN values using an interpolation method.
6746+
Fill NA (NULL in BigQuery) values using an interpolation method.
67476747
67486748
**Examples:**
67496749
@@ -6791,33 +6791,39 @@ def interpolate(self, method: str = "linear"):
67916791

67926792
def fillna(self, value):
67936793
"""
6794-
Fill NA/NaN values using the specified method.
6794+
Fill NA (NULL in BigQuery) values using the specified method.
67956795
6796-
**Examples:**
6796+
Note that empty strings ``''``, :attr:`numpy.inf`, and
6797+
:attr:`numpy.nan` are ***not*** considered NA values. This NA/NULL
6798+
logic differs from numpy, but it is the same as BigQuery and the
6799+
:class:`pandas.ArrowDtype`.
67976800
6801+
**Examples:**
67986802
6799-
>>> df = bpd.DataFrame([[np.nan, 2, np.nan, 0],
6800-
... [3, 4, np.nan, 1],
6801-
... [np.nan, np.nan, np.nan, np.nan],
6802-
... [np.nan, 3, np.nan, 4]],
6803-
... columns=list("ABCD")).astype("Float64")
6803+
>>> df = bpd.DataFrame(
6804+
... [
6805+
... pa.array([np.nan, 2, None, 0], type=pa.float64()),
6806+
... pa.array([3, np.nan, None, 1], type=pa.float64()),
6807+
... pa.array([None, None, np.nan, None], type=pa.float64()),
6808+
... pa.array([4, 5, None, np.nan], type=pa.float64()),
6809+
... ], columns=list("ABCD"), dtype=pd.ArrowDtype(pa.float64()))
68046810
>>> df
6805-
A B C D
6806-
0 <NA> 2.0 <NA> 0.0
6807-
1 3.0 4.0 <NA> 1.0
6808-
2 <NA> <NA> <NA> <NA>
6809-
3 <NA> 3.0 <NA> 4.0
6811+
A B C D
6812+
0 NaN 2.0 <NA> 0.0
6813+
1 3.0 NaN <NA> 1.0
6814+
2 <NA> <NA> NaN <NA>
6815+
3 4.0 5.0 <NA> NaN
68106816
<BLANKLINE>
68116817
[4 rows x 4 columns]
68126818
6813-
Replace all NA elements with 0s.
6819+
Replace all NA (NULL) elements with 0s.
68146820
68156821
>>> df.fillna(0)
68166822
A B C D
6817-
0 0.0 2.0 0.0 0.0
6818-
1 3.0 4.0 0.0 1.0
6819-
2 0.0 0.0 0.0 0.0
6820-
3 0.0 3.0 0.0 4.0
6823+
0 NaN 2.0 0.0 0.0
6824+
1 3.0 NaN 0.0 1.0
6825+
2 0.0 0.0 NaN 0.0
6826+
3 4.0 5.0 0.0 NaN
68216827
<BLANKLINE>
68226828
[4 rows x 4 columns]
68236829
@@ -6833,11 +6839,11 @@ def fillna(self, value):
68336839
<BLANKLINE>
68346840
[3 rows x 4 columns]
68356841
>>> df.fillna(df_fill)
6836-
A B C D
6837-
0 0.0 2.0 2.0 0.0
6838-
1 3.0 4.0 6.0 1.0
6839-
2 8.0 9.0 10.0 11.0
6840-
3 <NA> 3.0 <NA> 4.0
6842+
A B C D
6843+
0 NaN 2.0 2.0 0.0
6844+
1 3.0 NaN 6.0 1.0
6845+
2 8.0 9.0 NaN 11.0
6846+
3 4.0 5.0 <NA> NaN
68416847
<BLANKLINE>
68426848
[4 rows x 4 columns]
68436849

third_party/bigframes_vendored/pandas/core/generic.py

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -796,72 +796,88 @@ def bfill(self, *, limit: Optional[int] = None):
796796
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
797797

798798
def isna(self) -> NDFrame:
799-
"""Detect missing values.
799+
"""Detect missing (NULL) values.
800800
801-
Return a boolean same-sized object indicating if the values are NA.
802-
NA values get mapped to True values. Everything else gets mapped to
803-
False values. Characters such as empty strings ``''`` or
804-
:attr:`numpy.inf` are not considered NA values.
801+
Return a boolean same-sized object indicating if the values are NA
802+
(NULL in BigQuery). NA/NULL values get mapped to True values.
803+
Everything else gets mapped to False values.
805804
806-
**Examples:**
805+
Note that empty strings ``''``, :attr:`numpy.inf`, and
806+
:attr:`numpy.nan` are ***not*** considered NA values. This NA/NULL
807+
logic differs from numpy, but it is the same as BigQuery and the
808+
:class:`pandas.ArrowDtype`.
807809
810+
**Examples:**
808811
809812
>>> df = bpd.DataFrame(dict(
810-
... age=[5, 6, np.nan],
811-
... born=[pd.NA, "1940-04-25", "1940-04-25"],
812-
... name=['Alfred', 'Batman', ''],
813-
... toy=[None, 'Batmobile', 'Joker'],
813+
... age=pd.Series(pa.array(
814+
... [5, 6, None, 4],
815+
... type=pa.int64(),
816+
... ), dtype=pd.ArrowDtype(pa.int64())),
817+
... born=pd.to_datetime([pd.NA, "1940-04-25", "1940-04-25", "1941-08-25"]),
818+
... name=['Alfred', 'Batman', '', 'Plastic Man'],
819+
... toy=[None, 'Batmobile', 'Joker', 'Play dough'],
820+
... height=pd.Series(pa.array(
821+
... [6.1, 5.9, None, np.nan],
822+
... type=pa.float64(),
823+
... ), dtype=pd.ArrowDtype(pa.float64())),
814824
... ))
815825
>>> df
816-
age born name toy
817-
0 5.0 <NA> Alfred <NA>
818-
1 6.0 1940-04-25 Batman Batmobile
819-
2 <NA> 1940-04-25 Joker
826+
age born name toy height
827+
0 5 <NA> Alfred <NA> 6.1
828+
1 6 1940-04-25 00:00:00 Batman Batmobile 5.9
829+
2 <NA> 1940-04-25 00:00:00 Joker <NA>
830+
3 4 1941-08-25 00:00:00 Plastic Man Play dough NaN
820831
<BLANKLINE>
821-
[3 rows x 4 columns]
832+
[4 rows x 5 columns]
822833
823-
Show which entries in a DataFrame are NA:
834+
Show which entries in a DataFrame are NA (NULL in BigQuery):
824835
825836
>>> df.isna()
826-
age born name toy
827-
0 False True False True
828-
1 False False False False
829-
2 True False False False
837+
age born name toy height
838+
0 False True False True False
839+
1 False False False False False
840+
2 True False False False True
841+
3 False False False False False
830842
<BLANKLINE>
831-
[3 rows x 4 columns]
843+
[4 rows x 5 columns]
832844
833845
>>> df.isnull()
834-
age born name toy
835-
0 False True False True
836-
1 False False False False
837-
2 True False False False
846+
age born name toy height
847+
0 False True False True False
848+
1 False False False False False
849+
2 True False False False True
850+
3 False False False False False
838851
<BLANKLINE>
839-
[3 rows x 4 columns]
852+
[4 rows x 5 columns]
840853
841-
Show which entries in a Series are NA:
854+
Show which entries in a Series are NA (NULL in BigQuery):
842855
843-
>>> ser = bpd.Series([5, None, 6, np.nan, pd.NA])
856+
>>> ser = bpd.Series(pa.array(
857+
... [5, None, 6, np.nan, None],
858+
... type=pa.float64(),
859+
... ), dtype=pd.ArrowDtype(pa.float64()))
844860
>>> ser
845-
0 5
861+
0 5.0
846862
1 <NA>
847-
2 6
848-
3 <NA>
863+
2 6.0
864+
3 NaN
849865
4 <NA>
850-
dtype: Int64
866+
dtype: Float64
851867
852868
>>> ser.isna()
853869
0 False
854870
1 True
855871
2 False
856-
3 True
872+
3 False
857873
4 True
858874
dtype: boolean
859875
860876
>>> ser.isnull()
861877
0 False
862878
1 True
863879
2 False
864-
3 True
880+
3 False
865881
4 True
866882
dtype: boolean
867883

third_party/bigframes_vendored/pandas/core/indexes/base.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -900,14 +900,23 @@ def value_counts(
900900

901901
def fillna(self, value) -> Index:
902902
"""
903-
Fill NA/NaN values with the specified value.
903+
Fill NA (NULL in BigQuery) values using the specified method.
904904
905-
**Examples:**
905+
Note that empty strings ``''``, :attr:`numpy.inf`, and
906+
:attr:`numpy.nan` are ***not*** considered NA values. This NA/NULL
907+
logic differs from numpy, but it is the same as BigQuery and the
908+
:class:`pandas.ArrowDtype`.
906909
910+
**Examples:**
907911
908-
>>> idx = bpd.Index([np.nan, np.nan, 3])
912+
>>> idx = bpd.Index(
913+
... pa.array([None, np.nan, 3, None], type=pa.float64()),
914+
... dtype=pd.ArrowDtype(pa.float64()),
915+
... )
916+
>>> idx
917+
Index([<NA>, nan, 3.0, <NA>], dtype='Float64')
909918
>>> idx.fillna(0)
910-
Index([0.0, 0.0, 3.0], dtype='Float64')
919+
Index([0.0, nan, 3.0, 0.0], dtype='Float64')
911920
912921
Args:
913922
value (scalar):

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,23 +2320,30 @@ def fillna(
23202320
value=None,
23212321
) -> Series | None:
23222322
"""
2323-
Fill NA/NaN values using the specified method.
2323+
Fill NA (NULL in BigQuery) values using the specified method.
23242324
2325-
**Examples:**
2325+
Note that empty strings ``''``, :attr:`numpy.inf`, and
2326+
:attr:`numpy.nan` are ***not*** considered NA values. This NA/NULL
2327+
logic differs from numpy, but it is the same as BigQuery and the
2328+
:class:`pandas.ArrowDtype`.
23262329
2330+
**Examples:**
23272331
2328-
>>> s = bpd.Series([np.nan, 2, np.nan, -1])
2332+
>>> s = bpd.Series(
2333+
... pa.array([np.nan, 2, None, -1], type=pa.float64()),
2334+
... dtype=pd.ArrowDtype(pa.float64()),
2335+
... )
23292336
>>> s
2330-
0 <NA>
2337+
0 NaN
23312338
1 2.0
23322339
2 <NA>
23332340
3 -1.0
23342341
dtype: Float64
23352342
2336-
Replace all NA elements with 0s.
2343+
Replace all NA (NULL) elements with 0s.
23372344
23382345
>>> s.fillna(0)
2339-
0 0.0
2346+
0 NaN
23402347
1 2.0
23412348
2 0.0
23422349
3 -1.0
@@ -2346,7 +2353,7 @@ def fillna(
23462353
23472354
>>> s_fill = bpd.Series([11, 22, 33])
23482355
>>> s.fillna(s_fill)
2349-
0 11.0
2356+
0 NaN
23502357
1 2.0
23512358
2 33.0
23522359
3 -1.0
@@ -4281,7 +4288,7 @@ def update(self, other) -> None:
42814288
2 6
42824289
dtype: Int64
42834290
4284-
If ``other`` contains NaNs the corresponding values are not updated
4291+
If ``other`` contains NA (NULL values) the corresponding values are not updated
42854292
in the original Series.
42864293
42874294
>>> s = bpd.Series([1, 2, 3])

0 commit comments

Comments
 (0)