-
Notifications
You must be signed in to change notification settings - Fork 1.9k
feat: plan-time SQL expression simplifying #19311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d9b39a3
3421187
690e167
5b5a838
b2bc6cf
a46e040
1cb87c1
e5a95d4
93ba06e
90f7a4b
964a326
df70b24
78e2dc1
7aa15c8
ff8f3cc
16d93a6
d519934
803c333
8e0fecc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,232 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| //! Parses and simplifies a SQL expression to a literal of a given type. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here too |
||
| //! | ||
| //! This module provides functionality to parse and simplify static SQL expressions | ||
| //! used in SQL constructs like `FROM TABLE SAMPLE (10 + 50 * 2)`. If they are required | ||
| //! in a planning (not an execution) phase, they need to be reduced to literals of a given type. | ||
| use crate::simplify_expressions::ExprSimplifier; | ||
| use arrow::datatypes::ArrowPrimitiveType; | ||
| use datafusion_common::{ | ||
| DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue, plan_datafusion_err, | ||
| plan_err, | ||
| }; | ||
| use datafusion_expr::Expr; | ||
| use datafusion_expr::execution_props::ExecutionProps; | ||
| use datafusion_expr::simplify::SimplifyContext; | ||
| use std::sync::Arc; | ||
|
|
||
| /// Parse and simplifies a SQL expression to a numeric literal, | ||
| /// corresponding to an arrow primitive type `T` (for example, Float64Type). | ||
| /// | ||
| /// This function simplifies and coerces the expression, then extracts the underlying | ||
| /// native type using `TryFrom<ScalarValue>`. | ||
| /// | ||
| /// # Example | ||
| /// ```ignore | ||
| /// let value: f64 = parse_sql_literal::<Float64Type>(expr)?; | ||
| /// ``` | ||
| pub fn parse_sql_literal<T>(expr: &Expr) -> Result<T::Native> | ||
|
Comment on lines
+35
to
+45
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We just need to fix the documentation and rename this to omit mention of SQL now, since it only operates at Expr level |
||
| where | ||
| T: ArrowPrimitiveType, | ||
| T::Native: TryFrom<ScalarValue, Error = DataFusionError>, | ||
| { | ||
| // Empty schema is sufficient because it parses only literal expressions | ||
| let schema = DFSchemaRef::new(DFSchema::empty()); | ||
|
|
||
| log::debug!("Parsing expr {:?} to type {}", expr, T::DATA_TYPE); | ||
|
|
||
| let execution_props = ExecutionProps::new(); | ||
| let simplifier = ExprSimplifier::new( | ||
| SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)), | ||
| ); | ||
|
|
||
| // Simplify and coerce expression in case of constant arithmetic operations (e.g., 10 + 5) | ||
| let simplified_expr: Expr = simplifier | ||
| .simplify(expr.clone()) | ||
| .map_err(|err| plan_datafusion_err!("Cannot simplify {expr:?}: {err}"))?; | ||
| let coerced_expr: Expr = simplifier.coerce(simplified_expr, schema.as_ref())?; | ||
| log::debug!("Coerced expression: {:?}", &coerced_expr); | ||
|
|
||
| match coerced_expr { | ||
| Expr::Literal(scalar_value, _) => { | ||
| // It is a literal - proceed to the underlying value | ||
| // Cast to the target type if needed | ||
| let casted_scalar = scalar_value.cast_to(&T::DATA_TYPE)?; | ||
|
|
||
| // Extract the native type | ||
| T::Native::try_from(casted_scalar).map_err(|err| { | ||
| plan_datafusion_err!( | ||
| "Cannot extract {} from scalar value: {err}", | ||
| std::any::type_name::<T>() | ||
| ) | ||
| }) | ||
| } | ||
| actual => { | ||
| plan_err!( | ||
| "Cannot extract literal from coerced {actual:?} expression given {expr:?} expression" | ||
| ) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use arrow::datatypes::{DataType, Float64Type, Int64Type}; | ||
| use datafusion_common::config::ConfigOptions; | ||
| use datafusion_common::{TableReference, not_impl_err}; | ||
| use datafusion_expr::planner::{ContextProvider, RelationPlannerContext}; | ||
| use datafusion_expr::sqlparser::parser::Parser; | ||
| use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; | ||
| use datafusion_sql::planner::{PlannerContext, SqlToRel}; | ||
| use datafusion_sql::relation::SqlToRelRelationContext; | ||
| use datafusion_sql::sqlparser::dialect::GenericDialect; | ||
| use std::sync::Arc; | ||
|
|
||
| // Simple mock context provider for testing | ||
| struct MockContextProvider { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests can probably be simplified to remove the SQL parsing related code and only supply Expr to test cases |
||
| options: ConfigOptions, | ||
| } | ||
|
|
||
| impl ContextProvider for MockContextProvider { | ||
| fn get_table_source(&self, _: TableReference) -> Result<Arc<dyn TableSource>> { | ||
| not_impl_err!("mock") | ||
| } | ||
|
|
||
| fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> { | ||
| None | ||
| } | ||
|
|
||
| fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> { | ||
| None | ||
| } | ||
|
|
||
| fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> { | ||
| None | ||
| } | ||
|
|
||
| fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> { | ||
| None | ||
| } | ||
|
|
||
| fn options(&self) -> &ConfigOptions { | ||
| &self.options | ||
| } | ||
|
|
||
| fn udf_names(&self) -> Vec<String> { | ||
| vec![] | ||
| } | ||
|
|
||
| fn udaf_names(&self) -> Vec<String> { | ||
| vec![] | ||
| } | ||
|
|
||
| fn udwf_names(&self) -> Vec<String> { | ||
| vec![] | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_parse_sql_float_literal() { | ||
| let test_cases = vec![ | ||
| ("0.0", 0.0), | ||
| ("1.0", 1.0), | ||
| ("0", 0.0), | ||
| ("1", 1.0), | ||
| ("0.5", 0.5), | ||
| ("100.0", 100.0), | ||
| ("0.001", 0.001), | ||
| ("999.999", 999.999), | ||
| ("1.0 + 2.0", 3.0), | ||
| ("10.0 * 0.5", 5.0), | ||
| ("100.0 / 4.0", 25.0), | ||
| ("(80.0 + 2.0*10.0) / 4.0", 25.0), | ||
| ("50.0 - 10.0", 40.0), | ||
| ("1e2", 100.0), | ||
| ("1.5e1", 15.0), | ||
| ("2.5e-1", 0.25), | ||
| ]; | ||
|
|
||
| let context = MockContextProvider { | ||
| options: ConfigOptions::default(), | ||
| }; | ||
| let sql_to_rel = SqlToRel::new(&context); | ||
| let mut planner_context = PlannerContext::new(); | ||
| let mut sql_context = | ||
| SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); | ||
| let dialect = GenericDialect {}; | ||
| let schema = DFSchemaRef::new(DFSchema::empty()); | ||
|
|
||
| for (sql_expr, expected) in test_cases { | ||
| let ast_expr = Parser::new(&dialect) | ||
| .try_with_sql(sql_expr) | ||
| .unwrap() | ||
| .parse_expr() | ||
| .unwrap(); | ||
| let expr = sql_context | ||
| .sql_to_expr(ast_expr, &schema) | ||
| .expect("sql_to_expr"); | ||
|
|
||
| let result: Result<f64> = parse_sql_literal::<Float64Type>(&expr); | ||
|
|
||
| match result { | ||
| Ok(value) => { | ||
| assert!( | ||
| (value - expected).abs() < 1e-10, | ||
| "For expression '{sql_expr}': expected {expected}, got {value}", | ||
| ); | ||
| } | ||
| Err(e) => panic!("Failed to parse expression '{sql_expr}': {e}"), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_parse_sql_integer_literal() { | ||
| let context = MockContextProvider { | ||
| options: ConfigOptions::default(), | ||
| }; | ||
| let sql_to_rel = SqlToRel::new(&context); | ||
| let mut planner_context = PlannerContext::new(); | ||
| let mut sql_context = | ||
| SqlToRelRelationContext::new(&sql_to_rel, &mut planner_context); | ||
| let dialect = GenericDialect {}; | ||
| let schema = DFSchemaRef::new(DFSchema::empty()); | ||
|
|
||
| // Integer | ||
| let ast_expr = Parser::new(&dialect) | ||
| .try_with_sql("2 + 4") | ||
| .unwrap() | ||
| .parse_expr() | ||
| .unwrap(); | ||
| let expr = sql_context | ||
| .sql_to_expr(ast_expr, &schema) | ||
| .expect("sql_to_expr"); | ||
|
|
||
| let result: Result<i64> = parse_sql_literal::<Int64Type>(&expr); | ||
|
|
||
| match result { | ||
| Ok(value) => { | ||
| assert_eq!(6, value); | ||
| } | ||
| Err(e) => panic!("Failed to parse expression: {e}"), | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@geoffreyclaude I wonder what your thoughts on this PR and the approach of parse_sql_literal?