|
| 1 | +# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q18.py |
| 2 | + |
| 3 | +import typing |
| 4 | + |
| 5 | +import bigframes |
| 6 | +import bigframes.pandas as bpd |
| 7 | + |
| 8 | + |
| 9 | +def q(dataset_id: str, session: bigframes.Session): |
| 10 | + customer = session.read_gbq( |
| 11 | + f"bigframes-dev-perf.{dataset_id}.CUSTOMER", |
| 12 | + index_col=bigframes.enums.DefaultIndexKind.NULL, |
| 13 | + ) |
| 14 | + lineitem = session.read_gbq( |
| 15 | + f"bigframes-dev-perf.{dataset_id}.LINEITEM", |
| 16 | + index_col=bigframes.enums.DefaultIndexKind.NULL, |
| 17 | + ) |
| 18 | + orders = session.read_gbq( |
| 19 | + f"bigframes-dev-perf.{dataset_id}.ORDERS", |
| 20 | + index_col=bigframes.enums.DefaultIndexKind.NULL, |
| 21 | + ) |
| 22 | + |
| 23 | + var1 = 300 |
| 24 | + |
| 25 | + q1 = lineitem.groupby("L_ORDERKEY", as_index=False).agg( |
| 26 | + SUM_QUANTITY=bpd.NamedAgg(column="L_QUANTITY", aggfunc="sum") |
| 27 | + ) |
| 28 | + q1 = q1[q1["SUM_QUANTITY"] > var1] |
| 29 | + |
| 30 | + filtered_orders = orders.merge( |
| 31 | + q1, left_on="O_ORDERKEY", right_on="L_ORDERKEY", how="inner" |
| 32 | + ) |
| 33 | + |
| 34 | + result = filtered_orders.merge( |
| 35 | + lineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY" |
| 36 | + ) |
| 37 | + result = result.merge(customer, left_on="O_CUSTKEY", right_on="C_CUSTKEY") |
| 38 | + |
| 39 | + final_result = result.groupby( |
| 40 | + ["C_NAME", "C_CUSTKEY", "O_ORDERKEY", "O_ORDERDATE", "O_TOTALPRICE"], |
| 41 | + as_index=False, |
| 42 | + ).agg(COL6=bpd.NamedAgg(column="L_QUANTITY", aggfunc="sum")) |
| 43 | + |
| 44 | + final_result = final_result.rename(columns={"O_ORDERDATE": "O_ORDERDAT"}) |
| 45 | + |
| 46 | + final_result = typing.cast(bpd.DataFrame, final_result).sort_values( |
| 47 | + ["O_TOTALPRICE", "O_ORDERDAT"], ascending=[False, True] |
| 48 | + ) |
| 49 | + |
| 50 | + q_final = final_result.head(100) |
| 51 | + q_final.to_gbq() |
0 commit comments