add testcase

shuoweil · shuoweil · commit a217c20a9923 · 2025-07-15T17:11:39.000Z
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
@@ -601,6 +601,27 @@ def scalars_df_2_index(
     return session.read_gbq(scalars_table_id_2, index_col="rowindex")
 
 
+@pytest.fixture(scope="session")
+def scalars_df_null_index_partial_ordering(
+    scalars_table_id: str, unordered_session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """DataFrame pointing at test data with null index in partial ordering mode."""
+    return unordered_session.read_gbq(
+        scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
+    ).sort_values("rowindex")
+
+
+@pytest.fixture(scope="session")
+def scalars_series_null_index_partial_ordering(
+    scalars_table_id: str, unordered_session: bigframes.Session
+) -> bigframes.series.Series:
+    """Series pointing at test data with null index in partial ordering mode."""
+    df = unordered_session.read_gbq(
+        scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
+    ).sort_values("rowindex")
+    return df["int64_col"]
+
+
 @pytest.fixture(scope="session")
 def scalars_pandas_df_default_index() -> pd.DataFrame:
     """pd.DataFrame pointing at test data."""
@@ -1529,3 +1550,12 @@ def audio_mm_df(
     return session.from_glob_path(
         audio_gcs_path, name="audio", connection=bq_connection
     )
+
+
+@pytest.fixture(scope="session")
+def audio_mm_df_partial_ordering(
+    audio_gcs_path, unordered_session: bigframes.Session, bq_connection: str
+) -> bpd.DataFrame:
+    return unordered_session.from_glob_path(
+        audio_gcs_path, name="audio", connection=bq_connection
+    )
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
@@ -454,3 +454,27 @@ def test_blob_transcribe(
         assert (
             keyword.lower() in actual_text.lower()
         ), f"Item (verbose={verbose}): Expected keyword '{keyword}' not found in transcribed text. "
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    [
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+    ],
+)
+def test_audio_transcribe_partial_ordering_integration(
+    audio_mm_df_partial_ordering: bpd.DataFrame,
+    model_name: str,
+):
+    """Integration test for audio_transcribe with partial ordering mode."""
+    df = audio_mm_df_partial_ordering.copy()
+
+    bpd.options.bigquery.ordering_mode = "partial"
+
+    df["transcribed_text"] = df["audio"].blob.audio_transcribe(model_name=model_name)
+    result = df.to_pandas(ordered=False)
+
+    assert "transcribed_text" in result.columns
+    assert len(result) > 0
+    assert result["transcribed_text"].iloc[0] is not None
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -2949,26 +2949,41 @@ def test_df_join_series(scalars_dfs, how):
         assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
-def test_dataframe_assign_series_null_index_partial_ordering(
-    scalars_df_null_index: bigframes.dataframe.DataFrame,
-    unordered_session: bigframes.Session,
+def test_assign_series_with_null_index_should_add_column_correctly(
+    scalars_df_null_index_partial_ordering: bigframes.dataframe.DataFrame,
+    scalars_series_null_index_partial_ordering: bigframes.series.Series,
 ):
     """Test that DataFrame column assignment works with null indices in partial ordering mode."""
+    df = scalars_df_null_index_partial_ordering[["int64_col", "string_col"]].head(3)
+    series_to_assign = scalars_series_null_index_partial_ordering.head(3)
 
-    # Use existing null index DataFrame but create Series in unordered session
-    df = scalars_df_null_index[["int64_col", "string_col"]].head(3)
-    # Create Series with explicit values in unordered session
-    series_to_assign = bpd.Series([10, 20, 30], session=unordered_session)
+    expected_series = pd.Series(
+        [
+            -987654321,
+            -987654321,
+            -987654321,
+            314159,
+            314159,
+            314159,
+            123456789,
+            123456789,
+            123456789,
+        ],
+        dtype="Int64",
+    )
 
+    #  Assign the Series as a new column in the DataFrame
     df["new_col"] = series_to_assign
-    result_df = df.to_pandas(ordered=False)
 
-    # Verify the column was added and has the correct length
-    assert "new_col" in result_df.columns
-    assert len(result_df) == 3
+    # Materialize the full DataFrame to a pandas object to get the computed result.
+    result_df = df[["int64_col", "new_col"]].to_pandas()
+    result_series = result_df["new_col"]
 
-    # Verify the assigned values are exactly what we expect
-    assert result_df["new_col"].tolist() == [10, 20, 30]
+    pd.testing.assert_series_equal(
+        result_series.sort_values().reset_index(drop=True),
+        expected_series,
+        check_names=False,
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py
@@ -14,11 +14,11 @@
 
 
 import pandas as pd
+import pandas.testing
 import pytest
 
 import bigframes.core
 import bigframes.core.blocks as blocks
-import bigframes.core.local_data as local_data
 import bigframes.exceptions
 import bigframes.pandas as bpd
 
@@ -408,42 +408,32 @@ def test_null_index_transpose(scalars_df_null_index):
         pytest.param("unordered_session"),
     ],
 )
-def test_block_join_identity_null_index(request, session_fixture):
+def test_identity_join_with_null_index_should_return_cartesian_product(
+    request, session_fixture
+):
     """Test the Block.join method with block_identity_join=True and null indices."""
-
     session = request.getfixturevalue(session_fixture)
-
     left_data = pd.DataFrame({"a": [1, 2, 3]})
     right_data = pd.DataFrame({"b": [10, 20, 30]})
 
-    left_managed = local_data.ManagedArrowTable.from_pandas(left_data)
-    right_managed = local_data.ManagedArrowTable.from_pandas(right_data)
-
-    left_array = bigframes.core.ArrayValue.from_managed(left_managed, session=session)
-    right_array = bigframes.core.ArrayValue.from_managed(right_managed, session=session)
+    left_block = blocks.Block.from_local(left_data, session=session)
+    right_block = blocks.Block.from_local(right_data, session=session)
 
-    # Create blocks with empty index_columns to get null indices
-    left_block = blocks.Block(
-        left_array,
-        index_columns=[],
-        column_labels=["a"],
-    )
-    right_block = blocks.Block(
-        right_array,
-        index_columns=[],
-        column_labels=["b"],
+    expected_df = pd.DataFrame(
+        {
+            "a": [1, 2, 3],
+            "b": [10, 20, 30],
+        }
     )
 
-    # Test the join with block_identity_join=True
+    # Perform the identity join on the two blocks
     result_block, (left_mapping, right_mapping) = left_block.join(
         right_block, how="left", block_identity_join=True
     )
 
-    # Verify both have null indices
-    assert left_block.index.nlevels == 0
-    assert right_block.index.nlevels == 0
-
-    # Verify the join succeeded
-    assert result_block is not None
-    assert len(left_mapping) > 0
-    assert len(right_mapping) > 0
+    result_df, _ = result_block.to_pandas()
+    pandas.testing.assert_frame_equal(
+        result_df.sort_values(by=["a", "b"]).reset_index(drop=True),
+        expected_df,
+        check_dtype=False,
+    )