fix more doctests

tswast · tswast · commit 4334a44a3b4b · 2025-10-15T15:11:44.000Z
diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py
@@ -717,14 +717,22 @@ def _ordered_join(
             ]
         )
         if how != "cross":
+            # Note: join_nulls renamed to nulls_equal for polars 1.24
+            polars_version = tuple(
+                int(part) for part in pl.__version__.split(".") if part.isnumeric()
+            )
+            if polars_version >= (1, 24, 0):
+                join_kwargs = {"nulls_equal": join_nulls}
+            else:
+                join_kwargs = {"join_nulls": join_nulls}
+
             joined = left.join(
                 right,
                 how=how,
                 left_on=left_on,
                 right_on=right_on,
-                # Note: join_nulls renamed to nulls_equal for polars 1.24
-                join_nulls=join_nulls,  # type: ignore
                 coalesce=False,
+                **join_kwargs,  # type: ignore
             )
         else:
             joined = left.join(right, how=how, coalesce=False)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -405,6 +405,7 @@ def to_gbq(
 
         **Examples:**
 
+            >>> import bigframes.pandas as bpd
 
         Write a DataFrame to a BigQuery table.
 
@@ -513,7 +514,7 @@ def to_parquet(
 
         **Examples:**
 
-
+            >>> import bigframes.pandas as bpd
             >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
             >>> gcs_bucket = "gs://bigframes-dev-testing/sample_parquet*.parquet"
             >>> df.to_parquet(path=gcs_bucket)
@@ -4843,22 +4844,22 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
         to select only the necessary columns before calling `apply()`. Note: This
         feature is currently in **preview**.
 
-            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")  # doctest: +SKIP
             ... def foo(row: pd.Series) -> int:
             ...     result = 1
             ...     result += row["col1"]
             ...     result += row["col2"]*row["col2"]
             ...     return result
 
-            >>> df[["col1", "col2"]].apply(foo, axis=1)
+            >>> df[["col1", "col2"]].apply(foo, axis=1)  # doctest: +SKIP
             0    11
             1    19
             dtype: Int64
 
         You could return an array output for every input row from the remote
         function.
 
-            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")  # doctest: +SKIP
             ... def marks_analyzer(marks: pd.Series) -> list[float]:
             ...     import statistics
             ...     average = marks.mean()
@@ -4875,8 +4876,8 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
             ...     "chemistry": [88, 56, 72],
             ...     "algebra": [78, 91, 79]
             ... }, index=["Alice", "Bob", "Charlie"])
-            >>> stats = df.apply(marks_analyzer, axis=1)
-            >>> stats
+            >>> stats = df.apply(marks_analyzer, axis=1)  # doctest: +SKIP
+            >>> stats  # doctest: +SKIP
             Alice      [77.67 78.   77.19 76.71]
             Bob        [75.67 80.   74.15 72.56]
             Charlie    [75.33 75.   75.28 75.22]
@@ -4899,14 +4900,14 @@ def apply(self, func, *, axis=0, args=(), **kwargs):
             <BLANKLINE>
             [2 rows x 3 columns]
 
-            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")  # doctest: +SKIP
             ... def foo(x: int, y: int, z: int) -> float:
             ...     result = 1
             ...     result += x
             ...     result += y/z
             ...     return result
 
-            >>> df.apply(foo, axis=1)
+            >>> df.apply(foo, axis=1)  # doctest: +SKIP
             0    2.6
             1    3.8
             dtype: Float64
diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py
@@ -97,6 +97,7 @@ def dayofyear(self):
 
         **Examples:**
 
+            >>> import bigframes.pandas as bpd
             >>> s = bpd.Series(
             ...     pd.date_range('2016-12-28', '2017-01-03', freq='D').to_series()
             ... )
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
@@ -33,7 +33,7 @@ def cut(
 
     **Examples:**
 
-
+        >>> import bigframes.pandas as bpd
         >>> s = bpd.Series([0, 1, 5, 10])
         >>> s
         0     0
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
@@ -37,7 +37,7 @@ def dt(self):
 
         **Examples:**
 
-
+            >>> import bigframes.pandas as bpd
             >>> seconds_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
             >>> seconds_series
             0    2000-01-01 00:00:00
@@ -1053,6 +1053,7 @@ def duplicated(self, keep="first") -> Series:
 
         **Examples:**
 
+            >>> import bigframes.pandas as bpd
 
         By default, for each set of duplicated values, the first occurrence is
         set on False and all others on True:
@@ -1616,7 +1617,7 @@ def nlargest(
 
         **Examples:**
 
-
+            >>> import bigframes.pandas as bpd
             >>> countries_population = {"Italy": 59000000, "France": 65000000,
             ...                          "Malta": 434000, "Maldives": 434000,
             ...                          "Brunei": 434000, "Iceland": 337000,
@@ -1700,7 +1701,7 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
 
         **Examples:**
 
-
+            >>> import bigframes.pandas as bpd
             >>> countries_population = {"Italy": 59000000, "France": 65000000,
             ...                          "Malta": 434000, "Maldives": 434000,
             ...                          "Brunei": 434000, "Iceland": 337000,
@@ -4570,7 +4571,7 @@ def median(self, *, exact: bool = True):
 
         **Examples:**
 
-
+            >>> import bigframes.pandas as bpd
             >>> s = bpd.Series([1, 2, 3])
             >>> s.median()
             np.float64(2.0)
@@ -4870,7 +4871,6 @@ def mask(self, cond, other):
 
         **Examples:**
 
-
             >>> s = bpd.Series([10, 11, 12, 13, 14])
             >>> s
             0    10
@@ -4914,7 +4914,7 @@ def mask(self, cond, other):
         condition is evaluated based on a complicated business logic which cannot
         be expressed in form of a Series.
 
-            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")
+            >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default")  # doctest: +SKIP
             ... def should_mask(name: str) -> bool:
             ...     hash = 0
             ...     for char_ in name:
@@ -4927,12 +4927,12 @@ def mask(self, cond, other):
             1         Bob
             2    Caroline
             dtype: string
-            >>> s.mask(should_mask)
+            >>> s.mask(should_mask)  # doctest: +SKIP
             0        <NA>
             1         Bob
             2    Caroline
             dtype: string
-            >>> s.mask(should_mask, "REDACTED")
+            >>> s.mask(should_mask, "REDACTED")  # doctest: +SKIP
             0    REDACTED
             1         Bob
             2    Caroline
@@ -5469,7 +5469,6 @@ def map(
 
         **Examples:**
 
-
             >>> s = bpd.Series(['cat', 'dog', pd.NA, 'rabbit'])
             >>> s
             0       cat
@@ -5490,7 +5489,7 @@ def map(
 
         It also accepts a remote function:
 
-            >>> @bpd.remote_function(cloud_function_service_account="default")
+            >>> @bpd.remote_function(cloud_function_service_account="default")  # doctest: +SKIP
             ... def my_mapper(val: str) -> str:
             ...     vowels = ["a", "e", "i", "o", "u"]
             ...     if val:
@@ -5499,7 +5498,7 @@ def map(
             ...         ])
             ...     return "N/A"
 
-            >>> s.map(my_mapper)
+            >>> s.map(my_mapper)  # doctest: +SKIP
             0       cAt
             1       dOg
             2       N/A