From 159ddc0a8ff5388c6551a7b9af7975bbd51a8309 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Mon, 14 Jul 2025 17:46:43 +0000 Subject: [PATCH 01/10] handle corner case of null ptr --- bigframes/core/blocks.py | 5 +++ bigframes/dataframe.py | 2 +- tests/system/small/test_dataframe.py | 22 ++++++++++++ tests/system/small/test_null_index.py | 51 +++++++++++++++++++++++++-- 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index c8632ebc8c..bf91f709b0 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -2488,6 +2488,11 @@ def join( ) if result is not None: return result + + # For block identify joins with null indices, perform cross join + if block_identity_join and how == "left": + return join_with_single_row(self, other) + raise bigframes.exceptions.NullIndexError( "Cannot implicitly align objects. Set an explicit index using set_index." ) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 7de4bdbc91..466e93d0da 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2289,7 +2289,7 @@ def _assign_series_join_on_index( self, label: str, series: bigframes.series.Series ) -> DataFrame: block, (get_column_left, get_column_right) = self._block.join( - series._block, how="left" + series._block, how="left", block_identity_join=True ) column_ids = [ diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index caf39bd9e9..938fb311b5 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2949,6 +2949,28 @@ def test_df_join_series(scalars_dfs, how): assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) +def test_dataframe_assign_series_null_index_partial_ordering( + scalars_df_null_index: bigframes.dataframe.DataFrame, + unordered_session: bigframes.Session, +): + """Test that DataFrame column assignment works with null indices in partial ordering mode.""" + + # Use existing null index DataFrame but create Series in unordered session + df = scalars_df_null_index[["int64_col", "string_col"]].head(3) + # Create Series with explicit values in unordered session + series_to_assign = bpd.Series([10, 20, 30], session=unordered_session) + + df["new_col"] = series_to_assign + result_df = df.to_pandas(ordered=False) + + # Verify the column was added and has the correct length + assert "new_col" in result_df.columns + assert len(result_df) == 3 + + # Verify the assigned values are exactly what we expect + assert result_df["new_col"].tolist() == [10, 20, 30] + + @pytest.mark.parametrize( ("by", "ascending", "na_position"), [ diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index a1c7c0f1a3..cdd8f148b1 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -16,6 +16,9 @@ import pandas as pd import pytest +import bigframes.core +import bigframes.core.blocks as blocks +import bigframes.core.local_data as local_data import bigframes.exceptions import bigframes.pandas as bpd @@ -398,5 +401,49 @@ def test_null_index_transpose(scalars_df_null_index): _ = scalars_df_null_index.T -def test_null_index_contains(scalars_df_null_index): - assert 3 not in scalars_df_null_index +@pytest.mark.parametrize( + ("session_fixture",), + [ + pytest.param("session"), + pytest.param("unordered_session"), + ], +) +def test_block_join_identity_null_index(request, session_fixture): + """Test the Block.join method with block_identity_join=True and null indices.""" + + session = request.getfixturevalue(session_fixture) + + left_data = pd.DataFrame({"a": [1, 2, 3]}) + right_data = pd.DataFrame({"b": [10, 20, 30]}) + + left_managed = local_data.ManagedArrowTable.from_pandas(left_data) + right_managed = local_data.ManagedArrowTable.from_pandas(right_data) + + left_array = bigframes.core.ArrayValue.from_managed(left_managed, session=session) + right_array = bigframes.core.ArrayValue.from_managed(right_managed, session=session) + + # Create blocks with empty index_columns to get null indices + left_block = blocks.Block( + left_array, + index_columns=[], + column_labels=["a"], + ) + right_block = blocks.Block( + right_array, + index_columns=[], + column_labels=["b"], + ) + + # Test the join with block_identity_join=True + result_block, (left_mapping, right_mapping) = left_block.join( + right_block, how="left", block_identity_join=True + ) + + # Verify both have null indices + assert left_block.index.nlevels == 0 + assert right_block.index.nlevels == 0 + + # Verify the join succeeded + assert result_block is not None + assert len(left_mapping) > 0 + assert len(right_mapping) > 0 From d264db08e9ba68947e9b18ae716af21c3417c785 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 01:57:13 +0000 Subject: [PATCH 02/10] add testcase --- tests/system/conftest.py | 30 ++++++++++++++++ tests/system/large/blob/test_function.py | 24 +++++++++++++ tests/system/small/test_dataframe.py | 41 ++++++++++++++------- tests/system/small/test_null_index.py | 46 ++++++++++-------------- 4 files changed, 100 insertions(+), 41 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index a75918ed23..16a0b72379 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -601,6 +601,27 @@ def scalars_df_2_index( return session.read_gbq(scalars_table_id_2, index_col="rowindex") +@pytest.fixture(scope="session") +def scalars_df_null_index_partial_ordering( + scalars_table_id: str, unordered_session: bigframes.Session +) -> bigframes.dataframe.DataFrame: + """DataFrame pointing at test data with null index in partial ordering mode.""" + return unordered_session.read_gbq( + scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL + ).sort_values("rowindex") + + +@pytest.fixture(scope="session") +def scalars_series_null_index_partial_ordering( + scalars_table_id: str, unordered_session: bigframes.Session +) -> bigframes.series.Series: + """Series pointing at test data with null index in partial ordering mode.""" + df = unordered_session.read_gbq( + scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL + ).sort_values("rowindex") + return df["int64_col"] + + @pytest.fixture(scope="session") def scalars_pandas_df_default_index() -> pd.DataFrame: """pd.DataFrame pointing at test data.""" @@ -1529,3 +1550,12 @@ def audio_mm_df( return session.from_glob_path( audio_gcs_path, name="audio", connection=bq_connection ) + + +@pytest.fixture(scope="session") +def audio_mm_df_partial_ordering( + audio_gcs_path, unordered_session: bigframes.Session, bq_connection: str +) -> bpd.DataFrame: + return unordered_session.from_glob_path( + audio_gcs_path, name="audio", connection=bq_connection + ) diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py index a594b144f5..1ced9ba718 100644 --- a/tests/system/large/blob/test_function.py +++ b/tests/system/large/blob/test_function.py @@ -454,3 +454,27 @@ def test_blob_transcribe( assert ( keyword.lower() in actual_text.lower() ), f"Item (verbose={verbose}): Expected keyword '{keyword}' not found in transcribed text. " + + +@pytest.mark.parametrize( + "model_name", + [ + "gemini-2.0-flash-001", + "gemini-2.0-flash-lite-001", + ], +) +def test_audio_transcribe_partial_ordering_integration( + audio_mm_df_partial_ordering: bpd.DataFrame, + model_name: str, +): + """Integration test for audio_transcribe with partial ordering mode.""" + df = audio_mm_df_partial_ordering.copy() + + bpd.options.bigquery.ordering_mode = "partial" + + df["transcribed_text"] = df["audio"].blob.audio_transcribe(model_name=model_name) + result = df.to_pandas(ordered=False) + + assert "transcribed_text" in result.columns + assert len(result) > 0 + assert result["transcribed_text"].iloc[0] is not None diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 938fb311b5..a06e02f85c 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2949,26 +2949,41 @@ def test_df_join_series(scalars_dfs, how): assert_pandas_df_equal(bf_result, pd_result, ignore_order=True) -def test_dataframe_assign_series_null_index_partial_ordering( - scalars_df_null_index: bigframes.dataframe.DataFrame, - unordered_session: bigframes.Session, +def test_assign_series_with_null_index_should_add_column_correctly( + scalars_df_null_index_partial_ordering: bigframes.dataframe.DataFrame, + scalars_series_null_index_partial_ordering: bigframes.series.Series, ): """Test that DataFrame column assignment works with null indices in partial ordering mode.""" + df = scalars_df_null_index_partial_ordering[["int64_col", "string_col"]].head(3) + series_to_assign = scalars_series_null_index_partial_ordering.head(3) - # Use existing null index DataFrame but create Series in unordered session - df = scalars_df_null_index[["int64_col", "string_col"]].head(3) - # Create Series with explicit values in unordered session - series_to_assign = bpd.Series([10, 20, 30], session=unordered_session) + expected_series = pd.Series( + [ + -987654321, + -987654321, + -987654321, + 314159, + 314159, + 314159, + 123456789, + 123456789, + 123456789, + ], + dtype="Int64", + ) + # Assign the Series as a new column in the DataFrame df["new_col"] = series_to_assign - result_df = df.to_pandas(ordered=False) - # Verify the column was added and has the correct length - assert "new_col" in result_df.columns - assert len(result_df) == 3 + # Materialize the full DataFrame to a pandas object to get the computed result. + result_df = df[["int64_col", "new_col"]].to_pandas() + result_series = result_df["new_col"] - # Verify the assigned values are exactly what we expect - assert result_df["new_col"].tolist() == [10, 20, 30] + pd.testing.assert_series_equal( + result_series.sort_values().reset_index(drop=True), + expected_series, + check_names=False, + ) @pytest.mark.parametrize( diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py index cdd8f148b1..fe0eb76366 100644 --- a/tests/system/small/test_null_index.py +++ b/tests/system/small/test_null_index.py @@ -14,11 +14,11 @@ import pandas as pd +import pandas.testing import pytest import bigframes.core import bigframes.core.blocks as blocks -import bigframes.core.local_data as local_data import bigframes.exceptions import bigframes.pandas as bpd @@ -408,42 +408,32 @@ def test_null_index_transpose(scalars_df_null_index): pytest.param("unordered_session"), ], ) -def test_block_join_identity_null_index(request, session_fixture): +def test_identity_join_with_null_index_should_return_cartesian_product( + request, session_fixture +): """Test the Block.join method with block_identity_join=True and null indices.""" - session = request.getfixturevalue(session_fixture) - left_data = pd.DataFrame({"a": [1, 2, 3]}) right_data = pd.DataFrame({"b": [10, 20, 30]}) - left_managed = local_data.ManagedArrowTable.from_pandas(left_data) - right_managed = local_data.ManagedArrowTable.from_pandas(right_data) - - left_array = bigframes.core.ArrayValue.from_managed(left_managed, session=session) - right_array = bigframes.core.ArrayValue.from_managed(right_managed, session=session) + left_block = blocks.Block.from_local(left_data, session=session) + right_block = blocks.Block.from_local(right_data, session=session) - # Create blocks with empty index_columns to get null indices - left_block = blocks.Block( - left_array, - index_columns=[], - column_labels=["a"], - ) - right_block = blocks.Block( - right_array, - index_columns=[], - column_labels=["b"], + expected_df = pd.DataFrame( + { + "a": [1, 2, 3], + "b": [10, 20, 30], + } ) - # Test the join with block_identity_join=True + # Perform the identity join on the two blocks result_block, (left_mapping, right_mapping) = left_block.join( right_block, how="left", block_identity_join=True ) - # Verify both have null indices - assert left_block.index.nlevels == 0 - assert right_block.index.nlevels == 0 - - # Verify the join succeeded - assert result_block is not None - assert len(left_mapping) > 0 - assert len(right_mapping) > 0 + result_df, _ = result_block.to_pandas() + pandas.testing.assert_frame_equal( + result_df.sort_values(by=["a", "b"]).reset_index(drop=True), + expected_df, + check_dtype=False, + ) From cddd7699550838c1bb32481b6330c6d5a7c409a8 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 20:33:32 +0000 Subject: [PATCH 03/10] snapshot update --- bigframes/dataframe.py | 7 ++- .../test_add_numeric/out.sql | 44 +++++++++++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 466e93d0da..0d6d9d60d0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2288,8 +2288,13 @@ def _assign_scalar(self, label: str, value: Union[int, float, str]) -> DataFrame def _assign_series_join_on_index( self, label: str, series: bigframes.series.Series ) -> DataFrame: + # Only use block_identity_join for null indices + use_block_identity_join = ( + self._block.index.nlevels == 0 and series._block.index.nlevels == 0 + ) + block, (get_column_left, get_column_right) = self._block.join( - series._block, how="left", block_identity_join=True + series._block, how="left", block_identity_join=use_block_identity_join ) column_ids = [ diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql index 1496f89f28..365db6d4eb 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql @@ -1,16 +1,42 @@ -WITH `bfcte_0` AS ( +WITH `bfcte_1` AS ( SELECT - `int64_col` AS `bfcol_0`, - `rowindex` AS `bfcol_1` + `rowindex` AS `bfcol_0` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( +), `bfcte_4` AS ( + SELECT + `bfcol_0` AS `bfcol_1` + FROM `bfcte_1` +), `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_2`, + `rowindex` AS `bfcol_3` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_2` AS ( SELECT *, - `bfcol_1` AS `bfcol_4`, - `bfcol_0` + `bfcol_0` AS `bfcol_5` + `bfcol_3` AS `bfcol_6`, + `bfcol_2` AS `bfcol_7`, + `bfcol_2` AS `bfcol_8` FROM `bfcte_0` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_7` + `bfcol_8` AS `bfcol_12` + FROM `bfcte_2` +), `bfcte_5` AS ( + SELECT + `bfcol_6` AS `bfcol_13`, + `bfcol_12` AS `bfcol_14` + FROM `bfcte_3` +), `bfcte_6` AS ( + SELECT + * + FROM `bfcte_4` + LEFT JOIN `bfcte_5` + ON COALESCE(`bfcol_1`, 0) = COALESCE(`bfcol_13`, 0) + AND COALESCE(`bfcol_1`, 1) = COALESCE(`bfcol_13`, 1) ) SELECT - `bfcol_4` AS `rowindex`, - `bfcol_5` AS `int64_col` -FROM `bfcte_1` \ No newline at end of file + `bfcol_1` AS `rowindex`, + `bfcol_14` AS `int64_col` +FROM `bfcte_6` From 8a36137c934f019793661e6de37ad98f3f1e1a86 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 20:56:19 +0000 Subject: [PATCH 04/10] revert change to tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql --- .../test_add_numeric/out.sql | 44 ++++--------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql index 365db6d4eb..a87f7821f7 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql @@ -1,42 +1,16 @@ -WITH `bfcte_1` AS ( +WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_0` + `int64_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_4` AS ( - SELECT - `bfcol_0` AS `bfcol_1` - FROM `bfcte_1` -), `bfcte_0` AS ( - SELECT - `int64_col` AS `bfcol_2`, - `rowindex` AS `bfcol_3` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_2` AS ( +), `bfcte_1` AS ( SELECT *, - `bfcol_3` AS `bfcol_6`, - `bfcol_2` AS `bfcol_7`, - `bfcol_2` AS `bfcol_8` + `bfcol_1` AS `bfcol_4`, + `bfcol_0` + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` -), `bfcte_3` AS ( - SELECT - *, - `bfcol_7` + `bfcol_8` AS `bfcol_12` - FROM `bfcte_2` -), `bfcte_5` AS ( - SELECT - `bfcol_6` AS `bfcol_13`, - `bfcol_12` AS `bfcol_14` - FROM `bfcte_3` -), `bfcte_6` AS ( - SELECT - * - FROM `bfcte_4` - LEFT JOIN `bfcte_5` - ON COALESCE(`bfcol_1`, 0) = COALESCE(`bfcol_13`, 0) - AND COALESCE(`bfcol_1`, 1) = COALESCE(`bfcol_13`, 1) ) SELECT - `bfcol_1` AS `rowindex`, - `bfcol_14` AS `int64_col` -FROM `bfcte_6` + `bfcol_4` AS `rowindex`, + `bfcol_5` AS `int64_col` +FROM `bfcte_1` From 7d799f6a54f4aa85ecc27b9694d8288b95b9e898 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:09:01 +0000 Subject: [PATCH 05/10] Restore out.sql to match main branch --- .../snapshots/test_compile_join/test_compile_join/out.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql index 04ee767f8a..c53b91f5b7 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql @@ -29,4 +29,4 @@ WITH `bfcte_1` AS ( SELECT `bfcol_3` AS `int64_col`, `bfcol_7` AS `int64_too` -FROM `bfcte_4` \ No newline at end of file +FROM `bfcte_4` From 5492808cc2497619da907bdf79a3e8a6f690d4a7 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:11:36 +0000 Subject: [PATCH 06/10] Revert "Restore out.sql to match main branch" This reverts commit 80e5298df38a1964fee98e99f6fd0d7e16f615cd. --- .../snapshots/test_compile_join/test_compile_join/out.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql index c53b91f5b7..04ee767f8a 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql @@ -29,4 +29,4 @@ WITH `bfcte_1` AS ( SELECT `bfcol_3` AS `int64_col`, `bfcol_7` AS `int64_too` -FROM `bfcte_4` +FROM `bfcte_4` \ No newline at end of file From 1811b47a67c4447483e61fdafb6ddf3b804e2c17 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:12:01 +0000 Subject: [PATCH 07/10] Revert "revert change to tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql" This reverts commit abc6daed038bda62c8b7bc39837ac744dcad237f. --- .../test_add_numeric/out.sql | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql index a87f7821f7..365db6d4eb 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql @@ -1,16 +1,42 @@ -WITH `bfcte_0` AS ( +WITH `bfcte_1` AS ( SELECT - `int64_col` AS `bfcol_0`, - `rowindex` AS `bfcol_1` + `rowindex` AS `bfcol_0` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_1` AS ( +), `bfcte_4` AS ( + SELECT + `bfcol_0` AS `bfcol_1` + FROM `bfcte_1` +), `bfcte_0` AS ( + SELECT + `int64_col` AS `bfcol_2`, + `rowindex` AS `bfcol_3` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` +), `bfcte_2` AS ( SELECT *, - `bfcol_1` AS `bfcol_4`, - `bfcol_0` + `bfcol_0` AS `bfcol_5` + `bfcol_3` AS `bfcol_6`, + `bfcol_2` AS `bfcol_7`, + `bfcol_2` AS `bfcol_8` FROM `bfcte_0` +), `bfcte_3` AS ( + SELECT + *, + `bfcol_7` + `bfcol_8` AS `bfcol_12` + FROM `bfcte_2` +), `bfcte_5` AS ( + SELECT + `bfcol_6` AS `bfcol_13`, + `bfcol_12` AS `bfcol_14` + FROM `bfcte_3` +), `bfcte_6` AS ( + SELECT + * + FROM `bfcte_4` + LEFT JOIN `bfcte_5` + ON COALESCE(`bfcol_1`, 0) = COALESCE(`bfcol_13`, 0) + AND COALESCE(`bfcol_1`, 1) = COALESCE(`bfcol_13`, 1) ) SELECT - `bfcol_4` AS `rowindex`, - `bfcol_5` AS `int64_col` -FROM `bfcte_1` + `bfcol_1` AS `rowindex`, + `bfcol_14` AS `int64_col` +FROM `bfcte_6` From 1e439afe9c10e217e9e81fd150a808e9bc1275fd Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:12:12 +0000 Subject: [PATCH 08/10] Revert "snapshot update" This reverts commit 123a50e34067d0d4f0ae415ba9b5e77fa605a015. --- bigframes/dataframe.py | 7 +-- .../test_add_numeric/out.sql | 44 ++++--------------- 2 files changed, 10 insertions(+), 41 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 0d6d9d60d0..466e93d0da 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2288,13 +2288,8 @@ def _assign_scalar(self, label: str, value: Union[int, float, str]) -> DataFrame def _assign_series_join_on_index( self, label: str, series: bigframes.series.Series ) -> DataFrame: - # Only use block_identity_join for null indices - use_block_identity_join = ( - self._block.index.nlevels == 0 and series._block.index.nlevels == 0 - ) - block, (get_column_left, get_column_right) = self._block.join( - series._block, how="left", block_identity_join=use_block_identity_join + series._block, how="left", block_identity_join=True ) column_ids = [ diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql index 365db6d4eb..1496f89f28 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric/out.sql @@ -1,42 +1,16 @@ -WITH `bfcte_1` AS ( +WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_0` + `int64_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_4` AS ( - SELECT - `bfcol_0` AS `bfcol_1` - FROM `bfcte_1` -), `bfcte_0` AS ( - SELECT - `int64_col` AS `bfcol_2`, - `rowindex` AS `bfcol_3` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` -), `bfcte_2` AS ( +), `bfcte_1` AS ( SELECT *, - `bfcol_3` AS `bfcol_6`, - `bfcol_2` AS `bfcol_7`, - `bfcol_2` AS `bfcol_8` + `bfcol_1` AS `bfcol_4`, + `bfcol_0` + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` -), `bfcte_3` AS ( - SELECT - *, - `bfcol_7` + `bfcol_8` AS `bfcol_12` - FROM `bfcte_2` -), `bfcte_5` AS ( - SELECT - `bfcol_6` AS `bfcol_13`, - `bfcol_12` AS `bfcol_14` - FROM `bfcte_3` -), `bfcte_6` AS ( - SELECT - * - FROM `bfcte_4` - LEFT JOIN `bfcte_5` - ON COALESCE(`bfcol_1`, 0) = COALESCE(`bfcol_13`, 0) - AND COALESCE(`bfcol_1`, 1) = COALESCE(`bfcol_13`, 1) ) SELECT - `bfcol_1` AS `rowindex`, - `bfcol_14` AS `int64_col` -FROM `bfcte_6` + `bfcol_4` AS `rowindex`, + `bfcol_5` AS `int64_col` +FROM `bfcte_1` \ No newline at end of file From 37a55b06485caa54e298fd516e1e745573464146 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:14:35 +0000 Subject: [PATCH 09/10] check if both perands hav null indices before applying the block identity join logic --- bigframes/dataframe.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 466e93d0da..0d6d9d60d0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2288,8 +2288,13 @@ def _assign_scalar(self, label: str, value: Union[int, float, str]) -> DataFrame def _assign_series_join_on_index( self, label: str, series: bigframes.series.Series ) -> DataFrame: + # Only use block_identity_join for null indices + use_block_identity_join = ( + self._block.index.nlevels == 0 and series._block.index.nlevels == 0 + ) + block, (get_column_left, get_column_right) = self._block.join( - series._block, how="left", block_identity_join=True + series._block, how="left", block_identity_join=use_block_identity_join ) column_ids = [ From 55609022038b18cf34016f9a9d599cd2d8cda693 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 15 Jul 2025 21:38:30 +0000 Subject: [PATCH 10/10] change the line sepearation --- tests/system/large/blob/test_function.py | 1 - tests/system/small/test_dataframe.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py index 1ced9ba718..03a8ffb61e 100644 --- a/tests/system/large/blob/test_function.py +++ b/tests/system/large/blob/test_function.py @@ -469,7 +469,6 @@ def test_audio_transcribe_partial_ordering_integration( ): """Integration test for audio_transcribe with partial ordering mode.""" df = audio_mm_df_partial_ordering.copy() - bpd.options.bigquery.ordering_mode = "partial" df["transcribed_text"] = df["audio"].blob.audio_transcribe(model_name=model_name) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index a06e02f85c..456a0be9be 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2956,7 +2956,6 @@ def test_assign_series_with_null_index_should_add_column_correctly( """Test that DataFrame column assignment works with null indices in partial ordering mode.""" df = scalars_df_null_index_partial_ordering[["int64_col", "string_col"]].head(3) series_to_assign = scalars_series_null_index_partial_ordering.head(3) - expected_series = pd.Series( [ -987654321,