googleapis · shobsi · Nov 23, 2023 · Nov 27, 2023 · Nov 27, 2023 · Nov 27, 2023
@@ -53,6 +53,7 @@
 import bigframes.core.block_transforms as block_ops
 import bigframes.core.blocks as blocks
 import bigframes.core.expression as ex
+import bigframes.core.guid as guid
 import bigframes.core.indexers
 import bigframes.core.indexes as indexes
 import bigframes.core.ordering as order
@@ -1193,7 +1194,40 @@ def rdivmod(self, other) -> Tuple[Series, Series]:  # type: ignore
         return (self.rfloordiv(other), self.rmod(other))
 
     def dot(self, other):
-        return (self * other).sum()
+        if isinstance(other, Series):
+            return (self * other).sum()
+
+        # At this point other must be a DataFrame
+        if len(other.columns.names) == 1:
+            # Process single level columns in other
+            # Let's leverage the DataFrame.dot
+            self_named = self
+            if self_named.name is None:
+                self_named = self.copy()
+                self_named.name = guid.generate_guid()
+
+            self_as_row = self_named.to_frame().T
+            frame_dot_result_as_row = self_as_row.dot(other)
+            frame_dot_result_as_col = frame_dot_result_as_row.T
+            series_dot_result = frame_dot_result_as_col[self_named.name]
+
+            # take care of the NA values
+            na_mask = other.isna().any()
+            result = series_dot_result.mask(na_mask)
+            result.name = self.name
+        else:
+            # TODO: Remove this special code path after DataFrame.dot supports
+            # multi-level columns.
+            result = Series(
+                [
+                    pandas.NA if other[col].isna().any() else (self * other[col]).sum()
+                    for col in other.columns
+                ],
+                index=other.columns,
+                name=self.name,
+            )
+
+        return result
 
     def __matmul__(self, other):
         return self.dot(other)

@@ -1411,3 +1411,26 @@ def test_multi_index_contains(scalars_df_index, scalars_pandas_df_index, key):
     pd_result = key in scalars_pandas_df_index.set_index(col_name).index
 
     assert bf_result == pd_result
+
+
+def test_series_dot_df_column_multi_index():
+    left = [10, 11, 12, 13]  # series data
+    right = [[0, 1, 2], [-2, 3, -4], [4, -5, 6], [6, 7, -8]]  # dataframe data
+
+    multi_level_columns = pandas.MultiIndex.from_arrays(
+        [["col0", "col0", "col1"], ["col00", "col01", "col11"]]
+    )
+
+    bf_left_s = bpd.Series(left)
+    bf_right_df = bpd.DataFrame(right)
+    bf_right_df.columns = multi_level_columns
+    bf_result = bf_left_s @ bf_right_df
+
+    pd_left_s = pandas.Series(left)
+    pd_right_df = pandas.DataFrame(right)
+    pd_right_df.columns = multi_level_columns
+    pd_result = pd_left_s @ pd_right_df
+
+    pandas.testing.assert_series_equal(
+        bf_result.to_pandas(), pd_result, check_index_type=False, check_dtype=False
+    )
@@ -3170,6 +3170,54 @@ def test_dot(scalars_dfs):
     assert bf_result == pd_result
 
 
+def test_dot_df(matrix_3by4_df, matrix_3by4_pandas_df):
+    bf_result = matrix_3by4_df["w"] @ matrix_3by4_df
+    pd_result = matrix_3by4_pandas_df["w"] @ matrix_3by4_pandas_df
+
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(), pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+def test_dot_df_with_na(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_result = scalars_df["int64_too"] @ scalars_df[["int64_col", "int64_too"]]
+    pd_result = (
+        scalars_pandas_df["int64_too"] @ scalars_pandas_df[["int64_col", "int64_too"]]
+    )
+
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(),
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+        check_exact=False,
+    )
+
+
+def test_dot_df_unnamed(session):
+    ps = pd.Series([0, 1, 2, 3])
+    assert ps.name is None  # this is the scenario we are testing specifically
+
+    pdf = pd.DataFrame(
+        {"a": [-1, 2, -3, 4], "b": [-10, 20, -30, 40], "c": [-1, 2, -3, pd.NA]}
+    )
+
+    s = session.read_pandas(ps)
+    df = session.read_pandas(pdf)
+
+    pd_result = ps @ pdf
+    bf_result = s @ df
+
+    pd.testing.assert_series_equal(
+        bf_result.to_pandas(),
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
+        check_exact=False,
+    )
+
+
 @pytest.mark.parametrize(
     ("left", "right", "inclusive"),
     [

@@ -1449,8 +1449,7 @@ def dot(self, other) -> Series | np.ndarray:
         Compute the dot product between the Series and the columns of other.
 
         This method computes the dot product between the Series and another
-        one, or the Series and each columns of a DataFrame, or the Series and
-        each columns of an array.
+        one, or the Series and each columns of a DataFrame.
 
         It can also be called using `self @ other` in Python >= 3.5.
 
@@ -1475,8 +1474,19 @@ def dot(self, other) -> Series | np.ndarray:
             >>> s @ other
             np.int64(8)
 
+        The other operand can be a DataFrame:
+
+            >>> other = bpd.DataFrame({"a" : [-1, 2, -3, 4],
+            ...                        "b" : [-10, 20, -30, 40],
+            ...                        "c" : [-1, 2, -3, bpd.NA]})
+            >>> s @ other
+            a       8
+            b      80
+            c    <NA>
+            dtype: Int64
+
         Args:
-            other (Series):
+            other (Series, or DataFrame):
                 The other object to compute the dot product with its columns.
 
         Returns: