Skip to content

Commit 9389250

Browse files
committed
Merge remote-tracking branch 'refs/remotes/github/main' into shobs-series-dot-df
2 parents 47bfeb9 + b62a07a commit 9389250

File tree

5 files changed

+196
-5
lines changed

5 files changed

+196
-5
lines changed

bigframes/dataframe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2797,7 +2797,8 @@ def get_right_id(id):
27972797
result = result[other_frame.columns]
27982798

27992799
if isinstance(other, bf_series.Series):
2800-
result = result[other.name].rename()
2800+
# There should be exactly one column in the result
2801+
result = result[result.columns[0]].rename()
28012802

28022803
return result
28032804

bigframes/operations/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _apply_binary_op(
141141
if isinstance(other, pd.Series):
142142
# TODO: Convert to BigQuery DataFrames series
143143
raise NotImplementedError(
144-
f"Pandas series not supported supported as operand. {constants.FEEDBACK_LINK}"
144+
f"Pandas series not supported as operand. {constants.FEEDBACK_LINK}"
145145
)
146146
if isinstance(other, series.Series):
147147
(left, right, block) = self._align(other, how=alignment)

tests/system/small/test_dataframe.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3493,6 +3493,29 @@ def test_df_dot_operator(
34933493
)
34943494

34953495

3496+
def test_df_dot_series_inline():
3497+
left = [[1, 2, 3], [2, 5, 7]]
3498+
right = [2, 1, 3]
3499+
3500+
bf1 = dataframe.DataFrame(left)
3501+
bf2 = series.Series(right)
3502+
bf_result = bf1.dot(bf2).to_pandas()
3503+
3504+
df1 = pd.DataFrame(left)
3505+
df2 = pd.Series(right)
3506+
pd_result = df1.dot(df2)
3507+
3508+
# Patch pandas dtypes for testing parity
3509+
# Pandas result is int64 instead of Int64 (nullable) dtype.
3510+
pd_result = pd_result.astype(pd.Int64Dtype())
3511+
pd_result.index = pd_result.index.astype(pd.Int64Dtype())
3512+
3513+
pd.testing.assert_series_equal(
3514+
bf_result,
3515+
pd_result,
3516+
)
3517+
3518+
34963519
def test_df_dot_series(
34973520
matrix_2by3_df, matrix_2by3_pandas_df, matrix_3by4_df, matrix_3by4_pandas_df
34983521
):

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 155 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2597,7 +2597,7 @@ def any(self, *, axis=0, bool_only: bool = False):
25972597
<BLANKLINE>
25982598
[2 rows x 2 columns]
25992599
2600-
Checking if each column contains at least one True element(the default behavior without an explicit axis parameter).
2600+
Checking if each column contains at least one True element (the default behavior without an explicit axis parameter).
26012601
26022602
>>> df.any()
26032603
A True
@@ -2644,7 +2644,7 @@ def all(self, axis=0, *, bool_only: bool = False):
26442644
<BLANKLINE>
26452645
[2 rows x 2 columns]
26462646
2647-
Checking if all values in each column are True(the default behavior without an explicit axis parameter).
2647+
Checking if all values in each column are True (the default behavior without an explicit axis parameter).
26482648
26492649
>>> df.all()
26502650
A True
@@ -2688,7 +2688,7 @@ def prod(self, axis=0, *, numeric_only: bool = False):
26882688
<BLANKLINE>
26892689
[3 rows x 2 columns]
26902690
2691-
Calculating the product of each column(the default behavior without an explicit axis parameter).
2691+
Calculating the product of each column (the default behavior without an explicit axis parameter).
26922692
26932693
>>> df.prod()
26942694
A 6.0
@@ -2721,6 +2721,33 @@ def min(self, axis=0, *, numeric_only: bool = False):
27212721
If you want the *index* of the minimum, use ``idxmin``. This is the
27222722
equivalent of the ``numpy.ndarray`` method ``argmin``.
27232723
2724+
**Examples:**
2725+
2726+
>>> import bigframes.pandas as bpd
2727+
>>> bpd.options.display.progress_bar = None
2728+
2729+
>>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
2730+
>>> df
2731+
A B
2732+
0 1 2
2733+
1 3 4
2734+
<BLANKLINE>
2735+
[2 rows x 2 columns]
2736+
2737+
Finding the minimum value in each column (the default behavior without an explicit axis parameter).
2738+
2739+
>>> df.min()
2740+
A 1.0
2741+
B 2.0
2742+
dtype: Float64
2743+
2744+
Finding the minimum value in each row.
2745+
2746+
>>> df.min(axis=1)
2747+
0 1.0
2748+
1 3.0
2749+
dtype: Float64
2750+
27242751
Args:
27252752
axis ({index (0), columns (1)}):
27262753
Axis for the function to be applied on.
@@ -2739,6 +2766,33 @@ def max(self, axis=0, *, numeric_only: bool = False):
27392766
If you want the *index* of the maximum, use ``idxmax``. This is
27402767
the equivalent of the ``numpy.ndarray`` method ``argmax``.
27412768
2769+
**Examples:**
2770+
2771+
>>> import bigframes.pandas as bpd
2772+
>>> bpd.options.display.progress_bar = None
2773+
2774+
>>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
2775+
>>> df
2776+
A B
2777+
0 1 2
2778+
1 3 4
2779+
<BLANKLINE>
2780+
[2 rows x 2 columns]
2781+
2782+
Finding the maximum value in each column (the default behavior without an explicit axis parameter).
2783+
2784+
>>> df.max()
2785+
A 3.0
2786+
B 4.0
2787+
dtype: Float64
2788+
2789+
Finding the maximum value in each row.
2790+
2791+
>>> df.max(axis=1)
2792+
0 2.0
2793+
1 4.0
2794+
dtype: Float64
2795+
27422796
Args:
27432797
axis ({index (0), columns (1)}):
27442798
Axis for the function to be applied on.
@@ -2756,6 +2810,33 @@ def sum(self, axis=0, *, numeric_only: bool = False):
27562810
27572811
This is equivalent to the method ``numpy.sum``.
27582812
2813+
**Examples:**
2814+
2815+
>>> import bigframes.pandas as bpd
2816+
>>> bpd.options.display.progress_bar = None
2817+
2818+
>>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]})
2819+
>>> df
2820+
A B
2821+
0 1 2
2822+
1 3 4
2823+
<BLANKLINE>
2824+
[2 rows x 2 columns]
2825+
2826+
Calculating the sum of each column (the default behavior without an explicit axis parameter).
2827+
2828+
>>> df.sum()
2829+
A 4.0
2830+
B 6.0
2831+
dtype: Float64
2832+
2833+
Calculating the sum of each row.
2834+
2835+
>>> df.sum(axis=1)
2836+
0 3.0
2837+
1 7.0
2838+
dtype: Float64
2839+
27592840
Args:
27602841
axis ({index (0), columns (1)}):
27612842
Axis for the function to be applied on.
@@ -3404,6 +3485,77 @@ def dot(self, other):
34043485
The dot method for Series computes the inner product, instead of the
34053486
matrix product here.
34063487
3488+
**Examples:**
3489+
3490+
>>> import bigframes.pandas as bpd
3491+
>>> bpd.options.display.progress_bar = None
3492+
3493+
>>> left = bpd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
3494+
>>> left
3495+
0 1 2 3
3496+
0 0 1 -2 -1
3497+
1 1 1 1 1
3498+
<BLANKLINE>
3499+
[2 rows x 4 columns]
3500+
>>> right = bpd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]])
3501+
>>> right
3502+
0 1
3503+
0 0 1
3504+
1 1 2
3505+
2 -1 -1
3506+
3 2 0
3507+
<BLANKLINE>
3508+
[4 rows x 2 columns]
3509+
>>> left.dot(right)
3510+
0 1
3511+
0 1 4
3512+
1 2 2
3513+
<BLANKLINE>
3514+
[2 rows x 2 columns]
3515+
3516+
You can also use the operator ``@`` for the dot product:
3517+
3518+
>>> left @ right
3519+
0 1
3520+
0 1 4
3521+
1 2 2
3522+
<BLANKLINE>
3523+
[2 rows x 2 columns]
3524+
3525+
The right input can be a Series, in which case the result will also be a
3526+
Series:
3527+
3528+
>>> right = bpd.Series([1, 2, -1,0])
3529+
>>> left @ right
3530+
0 4
3531+
1 2
3532+
dtype: Int64
3533+
3534+
Any user defined index of the left matrix and columns of the right
3535+
matrix will reflect in the result.
3536+
3537+
>>> left = bpd.DataFrame([[1, 2, 3], [2, 5, 7]], index=["alpha", "beta"])
3538+
>>> left
3539+
0 1 2
3540+
alpha 1 2 3
3541+
beta 2 5 7
3542+
<BLANKLINE>
3543+
[2 rows x 3 columns]
3544+
>>> right = bpd.DataFrame([[2, 4, 8], [1, 5, 10], [3, 6, 9]], columns=["red", "green", "blue"])
3545+
>>> right
3546+
red green blue
3547+
0 2 4 8
3548+
1 1 5 10
3549+
2 3 6 9
3550+
<BLANKLINE>
3551+
[3 rows x 3 columns]
3552+
>>> left.dot(right)
3553+
red green blue
3554+
alpha 13 32 55
3555+
beta 30 75 129
3556+
<BLANKLINE>
3557+
[2 rows x 3 columns]
3558+
34073559
Args:
34083560
other (Series or DataFrame):
34093561
The other object to compute the matrix product with.

third_party/bigframes_vendored/pandas/core/series.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,21 @@ def dot(self, other) -> Series | np.ndarray:
631631
BigQuery Dataframes does not validate this property and will produce
632632
incorrect results if indices are not equal.
633633
634+
**Examples:**
635+
636+
>>> import bigframes.pandas as bpd
637+
>>> bpd.options.display.progress_bar = None
638+
639+
>>> s = bpd.Series([0, 1, 2, 3])
640+
>>> other = bpd.Series([-1, 2, -3, 4])
641+
>>> s.dot(other)
642+
8
643+
644+
You can also use the operator ``@`` for the dot product:
645+
646+
>>> s @ other
647+
8
648+
634649
Args:
635650
other (Series):
636651
The other object to compute the dot product with its columns.

0 commit comments

Comments
 (0)