Skip to content

Commit 6a3a1c3

Browse files
committed
put util to bigframes.testing to avoid the importing error because of sqlglot module names matching with folder name
1 parent 9f3a050 commit 6a3a1c3

File tree

12 files changed

+179
-152
lines changed

12 files changed

+179
-152
lines changed

bigframes/testing/utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import base64
1616
import decimal
17-
from typing import Iterable, Optional, Set, Union
17+
from typing import Iterable, Optional, Sequence, Set, Union
1818

1919
import geopandas as gpd # type: ignore
2020
import google.api_core.operation
@@ -25,6 +25,7 @@
2525
import pyarrow as pa # type: ignore
2626
import pytest
2727

28+
from bigframes.core import expression as expr
2829
import bigframes.functions._utils as bff_utils
2930
import bigframes.pandas
3031

@@ -448,3 +449,22 @@ def get_function_name(func, package_requirements=None, is_row_processor=False):
448449
function_hash = bff_utils.get_hash(func, package_requirements)
449450

450451
return f"bigframes_{function_hash}"
452+
453+
454+
def _apply_unary_ops(
455+
obj: bigframes.pandas.DataFrame,
456+
ops_list: Sequence[expr.Expression],
457+
new_names: Sequence[str],
458+
) -> str:
459+
"""Applies a list of unary ops to the given DataFrame and returns the SQL
460+
representing the resulting DataFrames."""
461+
array_value = obj._block.expr
462+
result, old_names = array_value.compute_values(ops_list)
463+
464+
# Rename columns for deterministic golden SQL results.
465+
assert len(old_names) == len(new_names)
466+
col_ids = {old_name: new_name for old_name, new_name in zip(old_names, new_names)}
467+
result = result.rename_columns(col_ids).select_columns(new_names)
468+
469+
sql = result.session._executor.to_sql(result, enable_cache=False)
470+
return sql

tests/unit/core/compile/sqlglot/expressions/test_array_ops.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
from bigframes import operations as ops
1818
from bigframes.operations._op_converters import convert_index, convert_slice
1919
import bigframes.pandas as bpd
20-
from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops
20+
from bigframes.testing import utils
2121

2222
pytest.importorskip("pytest_snapshot")
2323

2424

2525
def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot):
2626
col_name = "string_list_col"
2727
bf_df = repeated_types_df[[col_name]]
28-
sql = _apply_unary_ops(
28+
sql = utils._apply_unary_ops(
2929
bf_df, [ops.ArrayToStringOp(delimiter=".").as_expr(col_name)], [col_name]
3030
)
3131

@@ -35,15 +35,17 @@ def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot):
3535
def test_array_index(repeated_types_df: bpd.DataFrame, snapshot):
3636
col_name = "string_list_col"
3737
bf_df = repeated_types_df[[col_name]]
38-
sql = _apply_unary_ops(bf_df, [convert_index(1).as_expr(col_name)], [col_name])
38+
sql = utils._apply_unary_ops(
39+
bf_df, [convert_index(1).as_expr(col_name)], [col_name]
40+
)
3941

4042
snapshot.assert_match(sql, "out.sql")
4143

4244

4345
def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot):
4446
col_name = "string_list_col"
4547
bf_df = repeated_types_df[[col_name]]
46-
sql = _apply_unary_ops(
48+
sql = utils._apply_unary_ops(
4749
bf_df, [convert_slice(slice(1, None)).as_expr(col_name)], [col_name]
4850
)
4951

@@ -53,7 +55,7 @@ def test_array_slice_with_only_start(repeated_types_df: bpd.DataFrame, snapshot)
5355
def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snapshot):
5456
col_name = "string_list_col"
5557
bf_df = repeated_types_df[[col_name]]
56-
sql = _apply_unary_ops(
58+
sql = utils._apply_unary_ops(
5759
bf_df, [convert_slice(slice(1, 5)).as_expr(col_name)], [col_name]
5860
)
5961

tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from bigframes import operations as ops
1818
import bigframes.pandas as bpd
19-
from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops
19+
from bigframes.testing import utils
2020

2121
pytest.importorskip("pytest_snapshot")
2222

@@ -40,5 +40,5 @@ def test_is_in(scalar_types_df: bpd.DataFrame, snapshot):
4040
"float_in_ints": ops.IsInOp(values=(1, 2, 3, None)).as_expr(float_col),
4141
}
4242

43-
sql = _apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))
43+
sql = utils._apply_unary_ops(bf_df, list(ops_map.values()), list(ops_map.keys()))
4444
snapshot.assert_match(sql, "out.sql")

tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,103 +16,111 @@
1616

1717
from bigframes import operations as ops
1818
import bigframes.pandas as bpd
19-
from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops
19+
from bigframes.testing import utils
2020

2121
pytest.importorskip("pytest_snapshot")
2222

2323

2424
def test_date(scalar_types_df: bpd.DataFrame, snapshot):
2525
col_name = "timestamp_col"
2626
bf_df = scalar_types_df[[col_name]]
27-
sql = _apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name])
27+
sql = utils._apply_unary_ops(bf_df, [ops.date_op.as_expr(col_name)], [col_name])
2828

2929
snapshot.assert_match(sql, "out.sql")
3030

3131

3232
def test_day(scalar_types_df: bpd.DataFrame, snapshot):
3333
col_name = "timestamp_col"
3434
bf_df = scalar_types_df[[col_name]]
35-
sql = _apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name])
35+
sql = utils._apply_unary_ops(bf_df, [ops.day_op.as_expr(col_name)], [col_name])
3636

3737
snapshot.assert_match(sql, "out.sql")
3838

3939

4040
def test_dayofweek(scalar_types_df: bpd.DataFrame, snapshot):
4141
col_name = "timestamp_col"
4242
bf_df = scalar_types_df[[col_name]]
43-
sql = _apply_unary_ops(bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name])
43+
sql = utils._apply_unary_ops(
44+
bf_df, [ops.dayofweek_op.as_expr(col_name)], [col_name]
45+
)
4446

4547
snapshot.assert_match(sql, "out.sql")
4648

4749

4850
def test_dayofyear(scalar_types_df: bpd.DataFrame, snapshot):
4951
col_name = "timestamp_col"
5052
bf_df = scalar_types_df[[col_name]]
51-
sql = _apply_unary_ops(bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name])
53+
sql = utils._apply_unary_ops(
54+
bf_df, [ops.dayofyear_op.as_expr(col_name)], [col_name]
55+
)
5256

5357
snapshot.assert_match(sql, "out.sql")
5458

5559

5660
def test_floor_dt(scalar_types_df: bpd.DataFrame, snapshot):
5761
col_name = "timestamp_col"
5862
bf_df = scalar_types_df[[col_name]]
59-
sql = _apply_unary_ops(bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name])
63+
sql = utils._apply_unary_ops(
64+
bf_df, [ops.FloorDtOp("D").as_expr(col_name)], [col_name]
65+
)
6066

6167
snapshot.assert_match(sql, "out.sql")
6268

6369

6470
def test_hour(scalar_types_df: bpd.DataFrame, snapshot):
6571
col_name = "timestamp_col"
6672
bf_df = scalar_types_df[[col_name]]
67-
sql = _apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name])
73+
sql = utils._apply_unary_ops(bf_df, [ops.hour_op.as_expr(col_name)], [col_name])
6874

6975
snapshot.assert_match(sql, "out.sql")
7076

7177

7278
def test_minute(scalar_types_df: bpd.DataFrame, snapshot):
7379
col_name = "timestamp_col"
7480
bf_df = scalar_types_df[[col_name]]
75-
sql = _apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name])
81+
sql = utils._apply_unary_ops(bf_df, [ops.minute_op.as_expr(col_name)], [col_name])
7682

7783
snapshot.assert_match(sql, "out.sql")
7884

7985

8086
def test_month(scalar_types_df: bpd.DataFrame, snapshot):
8187
col_name = "timestamp_col"
8288
bf_df = scalar_types_df[[col_name]]
83-
sql = _apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name])
89+
sql = utils._apply_unary_ops(bf_df, [ops.month_op.as_expr(col_name)], [col_name])
8490

8591
snapshot.assert_match(sql, "out.sql")
8692

8793

8894
def test_normalize(scalar_types_df: bpd.DataFrame, snapshot):
8995
col_name = "timestamp_col"
9096
bf_df = scalar_types_df[[col_name]]
91-
sql = _apply_unary_ops(bf_df, [ops.normalize_op.as_expr(col_name)], [col_name])
97+
sql = utils._apply_unary_ops(
98+
bf_df, [ops.normalize_op.as_expr(col_name)], [col_name]
99+
)
92100

93101
snapshot.assert_match(sql, "out.sql")
94102

95103

96104
def test_quarter(scalar_types_df: bpd.DataFrame, snapshot):
97105
col_name = "timestamp_col"
98106
bf_df = scalar_types_df[[col_name]]
99-
sql = _apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name])
107+
sql = utils._apply_unary_ops(bf_df, [ops.quarter_op.as_expr(col_name)], [col_name])
100108

101109
snapshot.assert_match(sql, "out.sql")
102110

103111

104112
def test_second(scalar_types_df: bpd.DataFrame, snapshot):
105113
col_name = "timestamp_col"
106114
bf_df = scalar_types_df[[col_name]]
107-
sql = _apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name])
115+
sql = utils._apply_unary_ops(bf_df, [ops.second_op.as_expr(col_name)], [col_name])
108116

109117
snapshot.assert_match(sql, "out.sql")
110118

111119

112120
def test_strftime(scalar_types_df: bpd.DataFrame, snapshot):
113121
col_name = "timestamp_col"
114122
bf_df = scalar_types_df[[col_name]]
115-
sql = _apply_unary_ops(
123+
sql = utils._apply_unary_ops(
116124
bf_df, [ops.StrftimeOp("%Y-%m-%d").as_expr(col_name)], [col_name]
117125
)
118126

@@ -122,78 +130,88 @@ def test_strftime(scalar_types_df: bpd.DataFrame, snapshot):
122130
def test_time(scalar_types_df: bpd.DataFrame, snapshot):
123131
col_name = "timestamp_col"
124132
bf_df = scalar_types_df[[col_name]]
125-
sql = _apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name])
133+
sql = utils._apply_unary_ops(bf_df, [ops.time_op.as_expr(col_name)], [col_name])
126134

127135
snapshot.assert_match(sql, "out.sql")
128136

129137

130138
def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
131139
col_name = "int64_col"
132140
bf_df = scalar_types_df[[col_name]]
133-
sql = _apply_unary_ops(bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name])
141+
sql = utils._apply_unary_ops(
142+
bf_df, [ops.ToDatetimeOp().as_expr(col_name)], [col_name]
143+
)
134144

135145
snapshot.assert_match(sql, "out.sql")
136146

137147

138148
def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
139149
col_name = "int64_col"
140150
bf_df = scalar_types_df[[col_name]]
141-
sql = _apply_unary_ops(bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name])
151+
sql = utils._apply_unary_ops(
152+
bf_df, [ops.ToTimestampOp().as_expr(col_name)], [col_name]
153+
)
142154

143155
snapshot.assert_match(sql, "out.sql")
144156

145157

146158
def test_unix_micros(scalar_types_df: bpd.DataFrame, snapshot):
147159
col_name = "timestamp_col"
148160
bf_df = scalar_types_df[[col_name]]
149-
sql = _apply_unary_ops(bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name])
161+
sql = utils._apply_unary_ops(
162+
bf_df, [ops.UnixMicros().as_expr(col_name)], [col_name]
163+
)
150164

151165
snapshot.assert_match(sql, "out.sql")
152166

153167

154168
def test_unix_millis(scalar_types_df: bpd.DataFrame, snapshot):
155169
col_name = "timestamp_col"
156170
bf_df = scalar_types_df[[col_name]]
157-
sql = _apply_unary_ops(bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name])
171+
sql = utils._apply_unary_ops(
172+
bf_df, [ops.UnixMillis().as_expr(col_name)], [col_name]
173+
)
158174

159175
snapshot.assert_match(sql, "out.sql")
160176

161177

162178
def test_unix_seconds(scalar_types_df: bpd.DataFrame, snapshot):
163179
col_name = "timestamp_col"
164180
bf_df = scalar_types_df[[col_name]]
165-
sql = _apply_unary_ops(bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name])
181+
sql = utils._apply_unary_ops(
182+
bf_df, [ops.UnixSeconds().as_expr(col_name)], [col_name]
183+
)
166184

167185
snapshot.assert_match(sql, "out.sql")
168186

169187

170188
def test_year(scalar_types_df: bpd.DataFrame, snapshot):
171189
col_name = "timestamp_col"
172190
bf_df = scalar_types_df[[col_name]]
173-
sql = _apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name])
191+
sql = utils._apply_unary_ops(bf_df, [ops.year_op.as_expr(col_name)], [col_name])
174192

175193
snapshot.assert_match(sql, "out.sql")
176194

177195

178196
def test_iso_day(scalar_types_df: bpd.DataFrame, snapshot):
179197
col_name = "timestamp_col"
180198
bf_df = scalar_types_df[[col_name]]
181-
sql = _apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name])
199+
sql = utils._apply_unary_ops(bf_df, [ops.iso_day_op.as_expr(col_name)], [col_name])
182200

183201
snapshot.assert_match(sql, "out.sql")
184202

185203

186204
def test_iso_week(scalar_types_df: bpd.DataFrame, snapshot):
187205
col_name = "timestamp_col"
188206
bf_df = scalar_types_df[[col_name]]
189-
sql = _apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name])
207+
sql = utils._apply_unary_ops(bf_df, [ops.iso_week_op.as_expr(col_name)], [col_name])
190208

191209
snapshot.assert_match(sql, "out.sql")
192210

193211

194212
def test_iso_year(scalar_types_df: bpd.DataFrame, snapshot):
195213
col_name = "timestamp_col"
196214
bf_df = scalar_types_df[[col_name]]
197-
sql = _apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name])
215+
sql = utils._apply_unary_ops(bf_df, [ops.iso_year_op.as_expr(col_name)], [col_name])
198216

199217
snapshot.assert_match(sql, "out.sql")

tests/unit/core/compile/sqlglot/expressions/test_generic_ops.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,39 +16,39 @@
1616

1717
from bigframes import operations as ops
1818
import bigframes.pandas as bpd
19-
from tests.unit.core.compile.sqlglot.expressions.utils import _apply_unary_ops
19+
from bigframes.testing import utils
2020

2121
pytest.importorskip("pytest_snapshot")
2222

2323

2424
def test_hash(scalar_types_df: bpd.DataFrame, snapshot):
2525
col_name = "string_col"
2626
bf_df = scalar_types_df[[col_name]]
27-
sql = _apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name])
27+
sql = utils._apply_unary_ops(bf_df, [ops.hash_op.as_expr(col_name)], [col_name])
2828

2929
snapshot.assert_match(sql, "out.sql")
3030

3131

3232
def test_isnull(scalar_types_df: bpd.DataFrame, snapshot):
3333
col_name = "float64_col"
3434
bf_df = scalar_types_df[[col_name]]
35-
sql = _apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name])
35+
sql = utils._apply_unary_ops(bf_df, [ops.isnull_op.as_expr(col_name)], [col_name])
3636

3737
snapshot.assert_match(sql, "out.sql")
3838

3939

4040
def test_notnull(scalar_types_df: bpd.DataFrame, snapshot):
4141
col_name = "float64_col"
4242
bf_df = scalar_types_df[[col_name]]
43-
sql = _apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name])
43+
sql = utils._apply_unary_ops(bf_df, [ops.notnull_op.as_expr(col_name)], [col_name])
4444

4545
snapshot.assert_match(sql, "out.sql")
4646

4747

4848
def test_map(scalar_types_df: bpd.DataFrame, snapshot):
4949
col_name = "string_col"
5050
bf_df = scalar_types_df[[col_name]]
51-
sql = _apply_unary_ops(
51+
sql = utils._apply_unary_ops(
5252
bf_df,
5353
[ops.MapOp(mappings=(("value1", "mapped1"),)).as_expr(col_name)],
5454
[col_name],

0 commit comments

Comments
 (0)