Skip to content

Commit 5e3dd06

Browse files
committed
Merge branch 'main' into formatter-options
2 parents 9a1f59f + 91b6635 commit 5e3dd06

File tree

11 files changed

+459
-279
lines changed

11 files changed

+459
-279
lines changed

Cargo.lock

Lines changed: 371 additions & 229 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,25 +34,25 @@ protoc = [ "datafusion-substrait/protoc" ]
3434
substrait = ["dep:datafusion-substrait"]
3535

3636
[dependencies]
37-
tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] }
38-
pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] }
39-
pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]}
40-
arrow = { version = "54.2.1", features = ["pyarrow"] }
41-
datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] }
42-
datafusion-substrait = { version = "46.0.1", optional = true }
43-
datafusion-proto = { version = "46.0.1" }
44-
datafusion-ffi = { version = "46.0.1" }
37+
tokio = { version = "1.44", features = ["macros", "rt", "rt-multi-thread", "sync"] }
38+
pyo3 = { version = "0.24", features = ["extension-module", "abi3", "abi3-py39"] }
39+
pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime"]}
40+
arrow = { version = "55.0.0", features = ["pyarrow"] }
41+
datafusion = { version = "47.0.0", features = ["avro", "unicode_expressions"] }
42+
datafusion-substrait = { version = "47.0.0", optional = true }
43+
datafusion-proto = { version = "47.0.0" }
44+
datafusion-ffi = { version = "47.0.0" }
4545
prost = "0.13.1" # keep in line with `datafusion-substrait`
46-
uuid = { version = "1.12", features = ["v4"] }
46+
uuid = { version = "1.16", features = ["v4"] }
4747
mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
48-
async-trait = "0.1.73"
48+
async-trait = "0.1.88"
4949
futures = "0.3"
50-
object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] }
50+
object_store = { version = "0.12.0", features = ["aws", "gcp", "azure", "http"] }
5151
url = "2"
5252

5353
[build-dependencies]
5454
prost-types = "0.13.1" # keep in line with `datafusion-substrait`
55-
pyo3-build-config = "0.23"
55+
pyo3-build-config = "0.24"
5656

5757
[lib]
5858
name = "datafusion_python"

python/datafusion/expr.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,9 +406,17 @@ def column(value: str) -> Expr:
406406
"""Creates a new expression representing a column."""
407407
return Expr(expr_internal.RawExpr.column(value))
408408

409-
def alias(self, name: str) -> Expr:
410-
"""Assign a name to the expression."""
411-
return Expr(self.expr.alias(name))
409+
def alias(self, name: str, metadata: Optional[dict[str, str]] = None) -> Expr:
410+
"""Assign a name to the expression.
411+
412+
Args:
413+
name: The name to assign to the expression.
414+
metadata: Optional metadata to attach to the expression.
415+
416+
Returns:
417+
A new expression with the assigned name.
418+
"""
419+
return Expr(self.expr.alias(name, metadata))
412420

413421
def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
414422
"""Creates a sort :py:class:`Expr` from an existing :py:class:`Expr`.

python/datafusion/functions.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,18 @@ def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> So
372372
return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
373373

374374

375-
def alias(expr: Expr, name: str) -> Expr:
376-
"""Creates an alias expression."""
377-
return Expr(f.alias(expr.expr, name))
375+
def alias(expr: Expr, name: str, metadata: Optional[dict[str, str]] = None) -> Expr:
376+
"""Creates an alias expression with an optional metadata dictionary.
377+
378+
Args:
379+
expr: The expression to alias
380+
name: The alias name
381+
metadata: Optional metadata to attach to the column
382+
383+
Returns:
384+
An expression with the given alias
385+
"""
386+
return Expr(f.alias(expr.expr, name, metadata))
378387

379388

380389
def col(name: str) -> Expr:

python/tests/test_aggregation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,6 @@ def test_bit_and_bool_fns(df, name, expr, result):
338338
),
339339
[7, 9],
340340
),
341-
("last_value", f.last_value(column("a")), [3, 6]),
342341
(
343342
"last_value_ordered",
344343
f.last_value(column("a"), order_by=[column("a").sort(ascending=False)]),

python/tests/test_expr.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,8 @@ def test_fill_null(df):
247247
assert result.column(0) == pa.array([1, 2, 100])
248248
assert result.column(1) == pa.array([4, 25, 6])
249249
assert result.column(2) == pa.array([1234, 1234, 8])
250+
251+
252+
def test_alias_with_metadata(df):
253+
df = df.select(col("a").alias("b", {"key": "value"}))
254+
assert df.schema().field("b").metadata == {b"key": b"value"}

python/tests/test_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,3 +1231,8 @@ def test_between_default(df):
12311231

12321232
actual = df.collect()[0].to_pydict()
12331233
assert actual == expected
1234+
1235+
1236+
def test_alias_with_metadata(df):
1237+
df = df.select(f.alias(f.col("a"), "b", {"key": "value"}))
1238+
assert df.schema().field("b").metadata == {b"key": b"value"}

src/dataframe.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ impl PyDataFrame {
307307

308308
#[pyo3(signature = (*args))]
309309
fn select(&self, args: Vec<PyExpr>) -> PyDataFusionResult<Self> {
310-
let expr = args.into_iter().map(|e| e.into()).collect();
310+
let expr: Vec<Expr> = args.into_iter().map(|e| e.into()).collect();
311311
let df = self.df.as_ref().clone().select(expr)?;
312312
Ok(Self::new(df))
313313
}

src/dataset_exec.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ impl DisplayAs for DatasetExec {
275275
Python::with_gil(|py| {
276276
let number_of_fragments = self.fragments.bind(py).len();
277277
match t {
278-
DisplayFormatType::Default | DisplayFormatType::Verbose => {
278+
DisplayFormatType::Default
279+
| DisplayFormatType::Verbose
280+
| DisplayFormatType::TreeRender => {
279281
let projected_columns: Vec<String> = self
280282
.schema
281283
.fields()

src/expr.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use datafusion::logical_expr::{
2222
};
2323
use pyo3::IntoPyObjectExt;
2424
use pyo3::{basic::CompareOp, prelude::*};
25+
use std::collections::HashMap;
2526
use std::convert::{From, Into};
2627
use std::sync::Arc;
2728
use window::PyWindowFrame;
@@ -275,8 +276,9 @@ impl PyExpr {
275276
}
276277

277278
/// assign a name to the PyExpr
278-
pub fn alias(&self, name: &str) -> PyExpr {
279-
self.expr.clone().alias(name).into()
279+
#[pyo3(signature = (name, metadata=None))]
280+
pub fn alias(&self, name: &str, metadata: Option<HashMap<String, String>>) -> PyExpr {
281+
self.expr.clone().alias_with_metadata(name, metadata).into()
280282
}
281283

282284
/// Create a sort PyExpr from an existing PyExpr.
@@ -714,9 +716,19 @@ impl PyExpr {
714716
| Operator::BitwiseXor
715717
| Operator::BitwiseAnd
716718
| Operator::BitwiseOr => DataTypeMap::map_from_arrow_type(&DataType::Binary),
717-
Operator::AtArrow | Operator::ArrowAt => {
718-
Err(py_type_err(format!("Unsupported expr: ${op}")))
719-
}
719+
Operator::AtArrow
720+
| Operator::ArrowAt
721+
| Operator::Arrow
722+
| Operator::LongArrow
723+
| Operator::HashArrow
724+
| Operator::HashLongArrow
725+
| Operator::AtAt
726+
| Operator::IntegerDivide
727+
| Operator::HashMinus
728+
| Operator::AtQuestion
729+
| Operator::Question
730+
| Operator::QuestionAnd
731+
| Operator::QuestionPipe => Err(py_type_err(format!("Unsupported expr: ${op}"))),
720732
},
721733
Expr::Cast(Cast { expr: _, data_type }) => DataTypeMap::map_from_arrow_type(data_type),
722734
Expr::Literal(scalar_value) => DataTypeMap::map_from_scalar_value(scalar_value),

0 commit comments

Comments
 (0)