diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py index a0bfc9e648..cf9c9fc031 100644 --- a/bigframes/testing/utils.py +++ b/bigframes/testing/utils.py @@ -448,12 +448,12 @@ def get_function_name(func, package_requirements=None, is_row_processor=False): return f"bigframes_{function_hash}" -def _apply_unary_ops( +def _apply_ops_to_sql( obj: bpd.DataFrame, ops_list: Sequence[ex.Expression], new_names: Sequence[str], ) -> str: - """Applies a list of unary ops to the given DataFrame and returns the SQL + """Applies a list of ops to the given DataFrame and returns the SQL representing the resulting DataFrame.""" array_value = obj._block.expr result, old_names = array_value.compute_values(ops_list) @@ -485,13 +485,6 @@ def _apply_nary_op( ) -> str: """Applies a nary op to the given DataFrame and return the SQL representing the resulting DataFrame.""" - array_value = obj._block.expr op_expr = op.as_expr(*args) - result, col_ids = array_value.compute_values([op_expr]) - - # Rename columns for deterministic golden SQL results. - assert len(col_ids) == 1 - result = result.rename_columns({col_ids[0]: args[0]}).select_columns([args[0]]) - - sql = result.session._executor.to_sql(result, enable_cache=False) + sql = _apply_ops_to_sql(obj, [op_expr], [args[0]]) # type: ignore return sql diff --git a/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py index 13481d88c6..45024fc691 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_ai_ops.py @@ -39,7 +39,7 @@ def test_ai_generate(scalar_types_df: dataframe.DataFrame, snapshot): output_schema=None, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -58,7 +58,7 @@ def test_ai_generate_with_output_schema(scalar_types_df: dataframe.DataFrame, sn output_schema="x INT64, y FLOAT64", ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -82,7 +82,7 @@ def test_ai_generate_with_model_param(scalar_types_df: dataframe.DataFrame, snap output_schema=None, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -100,7 +100,7 @@ def test_ai_generate_bool(scalar_types_df: dataframe.DataFrame, snapshot): model_params=None, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -125,7 +125,7 @@ def test_ai_generate_bool_with_model_param( model_params=json.dumps(dict()), ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -144,7 +144,7 @@ def test_ai_generate_int(scalar_types_df: dataframe.DataFrame, snapshot): model_params=None, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -170,7 +170,7 @@ def test_ai_generate_int_with_model_param( model_params=json.dumps(dict()), ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -189,7 +189,7 @@ def test_ai_generate_double(scalar_types_df: dataframe.DataFrame, snapshot): model_params=None, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -215,7 +215,7 @@ def test_ai_generate_double_with_model_param( model_params=json.dumps(dict()), ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -230,7 +230,7 @@ def test_ai_if(scalar_types_df: dataframe.DataFrame, snapshot): connection_id=CONNECTION_ID, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) @@ -246,7 +246,7 @@ def test_ai_classify(scalar_types_df: dataframe.DataFrame, snapshot): connection_id=CONNECTION_ID, ) - sql = utils._apply_unary_ops(scalar_types_df, [op.as_expr(col_name)], ["result"]) + sql = utils._apply_ops_to_sql(scalar_types_df, [op.as_expr(col_name)], ["result"]) snapshot.assert_match(sql, "out.sql") @@ -259,7 +259,7 @@ def test_ai_score(scalar_types_df: dataframe.DataFrame, snapshot): connection_id=CONNECTION_ID, ) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [op.as_expr(col_name, col_name)], ["result"] ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py index 407c7bbb3c..61b8b99479 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_array_ops.py @@ -25,7 +25,7 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name] ) @@ -35,7 +35,7 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot): def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [convert_index(1).as_expr(col_name)], [col_name] ) @@ -45,7 +45,7 @@ def test_array_index(repeated_types_df: bpd.DataFrame, snapshot): def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name] ) @@ -55,7 +55,7 @@ def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot) def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot): col_name = "string_list_col" bf_df = repeated_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name] ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py index f278a15f3c..52b57623b3 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py @@ -40,7 +40,7 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot): "float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py index 3261113806..6384dc79a9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py @@ -25,7 +25,7 @@ def test_date(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.date_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -33,7 +33,7 @@ def test_date(scalar_types_df: bpd.DataFrame, snapshot): def test_day(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -43,14 +43,14 @@ def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot): bf_df = scalar_types_df[col_names] ops_map = {col_name: ops.dayofweek_op.as_expr(col_name) for col_name in col_names} - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name] ) @@ -75,7 +75,7 @@ def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot): "datetime_col_us": ops.FloorDtOp("us").as_expr("datetime_col"), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -85,7 +85,7 @@ def test_floor_dt_op_invalid_freq(scalar_types_df: bpd.DataFrame): with pytest.raises( NotImplementedError, match="Unsupported freq paramater: invalid" ): - utils._apply_unary_ops( + utils._apply_ops_to_sql( bf_df, [ops.FloorDtOp(freq="invalid").as_expr(col_name)], # type:ignore [col_name], @@ -95,7 +95,7 @@ def test_floor_dt_op_invalid_freq(scalar_types_df: bpd.DataFrame): def test_hour(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.hour_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -103,7 +103,7 @@ def test_hour(scalar_types_df: bpd.DataFrame, snapshot): def test_minute(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.minute_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -111,7 +111,7 @@ def test_minute(scalar_types_df: bpd.DataFrame, snapshot): def test_month(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.month_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -119,7 +119,7 @@ def test_month(scalar_types_df: bpd.DataFrame, snapshot): def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.normalize_op.as_expr(col_name)], [col_name] ) @@ -129,7 +129,7 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot): def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -137,7 +137,7 @@ def test_quarter(scalar_types_df: bpd.DataFrame, snapshot): def test_second(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.second_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -145,7 +145,7 @@ def test_second(scalar_types_df: bpd.DataFrame, snapshot): def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name] ) @@ -155,7 +155,7 @@ def test_strftime(scalar_types_df: bpd.DataFrame, snapshot): def test_time(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.time_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -163,7 +163,7 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot): def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name] ) @@ -173,7 +173,7 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot): def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name] ) @@ -183,7 +183,7 @@ def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name] ) @@ -193,7 +193,7 @@ def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name] ) @@ -203,7 +203,7 @@ def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot): def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name] ) @@ -213,7 +213,7 @@ def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot): def test_year(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.year_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -221,7 +221,7 @@ def test_year(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -229,7 +229,9 @@ def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql( + bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -237,7 +239,9 @@ def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot): def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot): col_name = "timestamp_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql( + bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py index fd9732bf89..aa40c21fd9 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py @@ -43,7 +43,7 @@ def test_astype_int(scalar_types_df: bpd.DataFrame, snapshot): "str_const": ops.AsTypeOp(to_type=to_type).as_expr(ex.const("100")), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -56,7 +56,7 @@ def test_astype_float(scalar_types_df: bpd.DataFrame, snapshot): "str_const": ops.AsTypeOp(to_type=to_type).as_expr(ex.const("1.34235e4")), "bool_w_safe": ops.AsTypeOp(to_type=to_type, safe=True).as_expr("bool_col"), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -71,7 +71,7 @@ def test_astype_bool(scalar_types_df: bpd.DataFrame, snapshot): "float64_col" ), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -90,7 +90,7 @@ def test_astype_time_like(scalar_types_df: bpd.DataFrame, snapshot): to_type=dtypes.TIME_DTYPE, safe=True ).as_expr("int64_col"), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -103,7 +103,7 @@ def test_astype_string(scalar_types_df: bpd.DataFrame, snapshot): "bool_col": ops.AsTypeOp(to_type=to_type).as_expr("bool_col"), "bool_w_safe": ops.AsTypeOp(to_type=to_type, safe=True).as_expr("bool_col"), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -122,7 +122,7 @@ def test_astype_json(scalar_types_df: bpd.DataFrame, snapshot): "string_col" ), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -138,7 +138,7 @@ def test_astype_from_json(json_types_df: bpd.DataFrame, snapshot): "json_col" ), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -152,7 +152,7 @@ def test_astype_json_invalid( "datetime_col" ), } - utils._apply_unary_ops( + utils._apply_ops_to_sql( scalar_types_df, list(ops_map_to.values()), list(ops_map_to.keys()) ) @@ -163,7 +163,7 @@ def test_astype_json_invalid( "json_col" ), } - utils._apply_unary_ops( + utils._apply_ops_to_sql( json_types_df, list(ops_map_from.values()), list(ops_map_from.keys()) ) @@ -228,7 +228,7 @@ def test_clip(scalar_types_df: bpd.DataFrame, snapshot): def test_hash(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.hash_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -240,7 +240,7 @@ def test_invert(scalar_types_df: bpd.DataFrame, snapshot): "bytes_col": ops.invert_op.as_expr("bytes_col"), "bool_col": ops.invert_op.as_expr("bool_col"), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -248,7 +248,7 @@ def test_invert(scalar_types_df: bpd.DataFrame, snapshot): def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -256,14 +256,14 @@ def test_isnull(scalar_types_df: bpd.DataFrame, snapshot): def test_notnull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") def test_row_key(scalar_types_df: bpd.DataFrame, snapshot): column_ids = (col for col in scalar_types_df._block.expr.column_ids) - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( scalar_types_df, [ops.RowKey().as_expr(*column_ids)], ["row_key"] ) snapshot.assert_match(sql, "out.sql") @@ -283,7 +283,7 @@ def test_sql_scalar_op(scalar_types_df: bpd.DataFrame, snapshot): def test_map(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)], [col_name], diff --git a/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py index e136d172f6..9b99b37fb6 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_geo_ops.py @@ -24,7 +24,9 @@ def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql( + bf_df, [ops.geo_area_op.as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -32,7 +34,7 @@ def test_geo_area(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_astext_op.as_expr(col_name)], [col_name] ) @@ -42,7 +44,7 @@ def test_geo_st_astext(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_boundary_op.as_expr(col_name)], [col_name] ) @@ -52,7 +54,7 @@ def test_geo_st_boundary(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.GeoStBufferOp(1.0, 8.0, False).as_expr(col_name)], [col_name] ) @@ -62,7 +64,7 @@ def test_geo_st_buffer(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_centroid_op.as_expr(col_name)], [col_name] ) @@ -72,7 +74,7 @@ def test_geo_st_centroid(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_convexhull_op.as_expr(col_name)], [col_name] ) @@ -82,7 +84,7 @@ def test_geo_st_convexhull(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_geogfromtext_op.as_expr(col_name)], [col_name] ) @@ -92,7 +94,7 @@ def test_geo_st_geogfromtext(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.geo_st_isclosed_op.as_expr(col_name)], [col_name] ) @@ -102,7 +104,7 @@ def test_geo_st_isclosed(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.GeoStLengthOp(True).as_expr(col_name)], [col_name] ) @@ -112,7 +114,7 @@ def test_geo_st_length(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.geo_x_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -120,6 +122,6 @@ def test_geo_x(scalar_types_df: bpd.DataFrame, snapshot): def test_geo_y(scalar_types_df: bpd.DataFrame, snapshot): col_name = "geography_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.geo_y_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py index 75206091e0..ca0896bd03 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_json_ops.py @@ -25,7 +25,7 @@ def test_json_extract(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONExtract(json_path="$").as_expr(col_name)], [col_name] ) @@ -35,7 +35,7 @@ def test_json_extract(json_types_df: bpd.DataFrame, snapshot): def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONExtractArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -45,7 +45,7 @@ def test_json_extract_array(json_types_df: bpd.DataFrame, snapshot): def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONExtractStringArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -55,7 +55,7 @@ def test_json_extract_string_array(json_types_df: bpd.DataFrame, snapshot): def test_json_query(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONQuery(json_path="$").as_expr(col_name)], [col_name] ) @@ -65,7 +65,7 @@ def test_json_query(json_types_df: bpd.DataFrame, snapshot): def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONQueryArray(json_path="$").as_expr(col_name)], [col_name] ) @@ -75,7 +75,7 @@ def test_json_query_array(json_types_df: bpd.DataFrame, snapshot): def test_json_value(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.JSONValue(json_path="$").as_expr(col_name)], [col_name] ) @@ -85,7 +85,9 @@ def test_json_value(json_types_df: bpd.DataFrame, snapshot): def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql( + bf_df, [ops.ParseJSON().as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -93,7 +95,7 @@ def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot): def test_to_json_string(json_types_df: bpd.DataFrame, snapshot): col_name = "json_col" bf_df = json_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ToJSONString().as_expr(col_name)], [col_name] ) diff --git a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py index ab9fe53092..c66fe15c16 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_numeric_ops.py @@ -26,7 +26,7 @@ def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arccosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -34,7 +34,7 @@ def test_arccosh(scalar_types_df: bpd.DataFrame, snapshot): def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arccos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -42,7 +42,7 @@ def test_arccos(scalar_types_df: bpd.DataFrame, snapshot): def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arcsin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -50,7 +50,7 @@ def test_arcsin(scalar_types_df: bpd.DataFrame, snapshot): def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arcsinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -58,7 +58,7 @@ def test_arcsinh(scalar_types_df: bpd.DataFrame, snapshot): def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arctan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -66,7 +66,7 @@ def test_arctan(scalar_types_df: bpd.DataFrame, snapshot): def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.arctanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -74,7 +74,7 @@ def test_arctanh(scalar_types_df: bpd.DataFrame, snapshot): def test_abs(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.abs_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -82,7 +82,7 @@ def test_abs(scalar_types_df: bpd.DataFrame, snapshot): def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.ceil_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -90,7 +90,7 @@ def test_ceil(scalar_types_df: bpd.DataFrame, snapshot): def test_cos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.cos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -98,7 +98,7 @@ def test_cos(scalar_types_df: bpd.DataFrame, snapshot): def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.cosh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -106,7 +106,7 @@ def test_cosh(scalar_types_df: bpd.DataFrame, snapshot): def test_exp(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.exp_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -114,7 +114,7 @@ def test_exp(scalar_types_df: bpd.DataFrame, snapshot): def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.expm1_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -122,7 +122,7 @@ def test_expm1(scalar_types_df: bpd.DataFrame, snapshot): def test_floor(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.floor_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -130,7 +130,7 @@ def test_floor(scalar_types_df: bpd.DataFrame, snapshot): def test_ln(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.ln_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -138,7 +138,7 @@ def test_ln(scalar_types_df: bpd.DataFrame, snapshot): def test_log10(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.log10_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -146,7 +146,7 @@ def test_log10(scalar_types_df: bpd.DataFrame, snapshot): def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.log1p_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -154,7 +154,7 @@ def test_log1p(scalar_types_df: bpd.DataFrame, snapshot): def test_neg(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.neg_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -162,7 +162,7 @@ def test_neg(scalar_types_df: bpd.DataFrame, snapshot): def test_pos(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.pos_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -184,7 +184,7 @@ def test_round(scalar_types_df: bpd.DataFrame, snapshot): def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.sqrt_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -192,7 +192,7 @@ def test_sqrt(scalar_types_df: bpd.DataFrame, snapshot): def test_sin(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.sin_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -200,7 +200,7 @@ def test_sin(scalar_types_df: bpd.DataFrame, snapshot): def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.sinh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -208,7 +208,7 @@ def test_sinh(scalar_types_df: bpd.DataFrame, snapshot): def test_tan(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.tan_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -216,7 +216,7 @@ def test_tan(scalar_types_df: bpd.DataFrame, snapshot): def test_tanh(scalar_types_df: bpd.DataFrame, snapshot): col_name = "float64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.tanh_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py index 99dbce9410..b20c038ed0 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_string_ops.py @@ -25,7 +25,7 @@ def test_capitalize(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.capitalize_op.as_expr(col_name)], [col_name] ) @@ -40,14 +40,14 @@ def test_endswith(scalar_types_df: bpd.DataFrame, snapshot): "double": ops.EndsWithOp(pat=("ab", "cd")).as_expr(col_name), "empty": ops.EndsWithOp(pat=()).as_expr(col_name), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isalnum_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -55,7 +55,7 @@ def test_isalnum(scalar_types_df: bpd.DataFrame, snapshot): def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isalpha_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -63,7 +63,7 @@ def test_isalpha(scalar_types_df: bpd.DataFrame, snapshot): def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.isdecimal_op.as_expr(col_name)], [col_name] ) @@ -73,7 +73,7 @@ def test_isdecimal(scalar_types_df: bpd.DataFrame, snapshot): def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isdigit_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -81,7 +81,7 @@ def test_isdigit(scalar_types_df: bpd.DataFrame, snapshot): def test_islower(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.islower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -89,7 +89,7 @@ def test_islower(scalar_types_df: bpd.DataFrame, snapshot): def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.isnumeric_op.as_expr(col_name)], [col_name] ) @@ -99,7 +99,7 @@ def test_isnumeric(scalar_types_df: bpd.DataFrame, snapshot): def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isspace_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -107,7 +107,7 @@ def test_isspace(scalar_types_df: bpd.DataFrame, snapshot): def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.isupper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -115,7 +115,7 @@ def test_isupper(scalar_types_df: bpd.DataFrame, snapshot): def test_len(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.len_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -123,7 +123,7 @@ def test_len(scalar_types_df: bpd.DataFrame, snapshot): def test_lower(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.lower_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -131,7 +131,7 @@ def test_lower(scalar_types_df: bpd.DataFrame, snapshot): def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrLstripOp(" ").as_expr(col_name)], [col_name] ) @@ -141,7 +141,7 @@ def test_lstrip(scalar_types_df: bpd.DataFrame, snapshot): def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ReplaceStrOp("e", "a").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -150,7 +150,7 @@ def test_replace_str(scalar_types_df: bpd.DataFrame, snapshot): def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.RegexReplaceStrOp(r"e", "a").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -159,7 +159,7 @@ def test_regex_replace_str(scalar_types_df: bpd.DataFrame, snapshot): def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.reverse_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -167,7 +167,7 @@ def test_reverse(scalar_types_df: bpd.DataFrame, snapshot): def test_rstrip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrRstripOp(" ").as_expr(col_name)], [col_name] ) @@ -183,14 +183,16 @@ def test_startswith(scalar_types_df: bpd.DataFrame, snapshot): "double": ops.StartsWithOp(pat=("ab", "cd")).as_expr(col_name), "empty": ops.StartsWithOp(pat=()).as_expr(col_name), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_str_get(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql( + bf_df, [ops.StrGetOp(1).as_expr(col_name)], [col_name] + ) snapshot.assert_match(sql, "out.sql") @@ -203,14 +205,14 @@ def test_str_pad(scalar_types_df: bpd.DataFrame, snapshot): "right": ops.StrPadOp(length=10, fillchar="-", side="right").as_expr(col_name), "both": ops.StrPadOp(length=10, fillchar="-", side="both").as_expr(col_name), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrSliceOp(1, 3).as_expr(col_name)], [col_name] ) @@ -220,7 +222,7 @@ def test_str_slice(scalar_types_df: bpd.DataFrame, snapshot): def test_strip(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrStripOp(" ").as_expr(col_name)], [col_name] ) @@ -230,7 +232,7 @@ def test_strip(scalar_types_df: bpd.DataFrame, snapshot): def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrContainsOp("e").as_expr(col_name)], [col_name] ) @@ -240,7 +242,7 @@ def test_str_contains(scalar_types_df: bpd.DataFrame, snapshot): def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrContainsRegexOp("e").as_expr(col_name)], [col_name] ) @@ -250,7 +252,7 @@ def test_str_contains_regex(scalar_types_df: bpd.DataFrame, snapshot): def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrExtractOp(r"([a-z]*)", 1).as_expr(col_name)], [col_name] ) @@ -260,7 +262,7 @@ def test_str_extract(scalar_types_df: bpd.DataFrame, snapshot): def test_str_repeat(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StrRepeatOp(2).as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -275,7 +277,7 @@ def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): "none_end": ops.StrFindOp("e", start=None, end=5).as_expr(col_name), "start_end": ops.StrFindOp("e", start=2, end=5).as_expr(col_name), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") @@ -283,7 +285,7 @@ def test_str_find(scalar_types_df: bpd.DataFrame, snapshot): def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.StringSplitOp(pat=",").as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") @@ -292,7 +294,7 @@ def test_string_split(scalar_types_df: bpd.DataFrame, snapshot): def test_upper(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) + sql = utils._apply_ops_to_sql(bf_df, [ops.upper_op.as_expr(col_name)], [col_name]) snapshot.assert_match(sql, "out.sql") @@ -300,7 +302,7 @@ def test_upper(scalar_types_df: bpd.DataFrame, snapshot): def test_zfill(scalar_types_df: bpd.DataFrame, snapshot): col_name = "string_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.ZfillOp(width=10).as_expr(col_name)], [col_name] ) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py index 7e67e44cd3..0e24426fe8 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_struct_ops.py @@ -55,7 +55,7 @@ def test_struct_field(nested_structs_types_df: bpd.DataFrame, snapshot): # When an index integer is provided. "int": ops.StructFieldOp(0).as_expr(col_name), } - sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys())) + sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys())) snapshot.assert_match(sql, "out.sql") diff --git a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py index 1f01047ba9..8675b42bec 100644 --- a/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py +++ b/tests/unit/core/compile/sqlglot/expressions/test_timedelta_ops.py @@ -33,7 +33,7 @@ def test_to_timedelta(scalar_types_df: bpd.DataFrame, snapshot): def test_timedelta_floor(scalar_types_df: bpd.DataFrame, snapshot): col_name = "int64_col" bf_df = scalar_types_df[[col_name]] - sql = utils._apply_unary_ops( + sql = utils._apply_ops_to_sql( bf_df, [ops.timedelta_floor_op.as_expr(col_name)], [col_name] )