From c43241b17f9c722586b749ddf62583488a6f3d39 Mon Sep 17 00:00:00 2001 From: TCeason Date: Tue, 9 Dec 2025 09:30:21 +0800 Subject: [PATCH] feat(query): tighten procedure overload resolution --- .../management/src/procedure/procedure_mgr.rs | 21 ++ .../sql/src/planner/binder/ddl/procedure.rs | 178 +++++++++- .../base/15_procedure/15_0002_procedure.test | 336 +++++++++++++++++- 3 files changed, 503 insertions(+), 32 deletions(-) diff --git a/src/query/management/src/procedure/procedure_mgr.rs b/src/query/management/src/procedure/procedure_mgr.rs index dfe46500f316c..c6ecab0021257 100644 --- a/src/query/management/src/procedure/procedure_mgr.rs +++ b/src/query/management/src/procedure/procedure_mgr.rs @@ -201,4 +201,25 @@ impl ProcedureMgr { Ok(procedure_infos) } + + #[fastrace::trace] + pub async fn list_procedures_by_name( + &self, + name: &str, + ) -> Result, MetaError> { + debug!(name = (name); "SchemaApi: {}", func_name!()); + let ident = ProcedureNameIdent::new(&self.tenant, ProcedureIdentity::new(name, "")); + let dir = DirName::new_with_level(ident, 1); + + let name_id_metas = self.kv_api.list_id_value(&dir).await?; + let procedure_infos = name_id_metas + .map(|(k, id, seq_meta)| ProcedureInfo { + ident: ProcedureIdIdent::new(&self.tenant, *id), + name_ident: k, + meta: seq_meta.data, + }) + .collect::>(); + + Ok(procedure_infos) + } } diff --git a/src/query/sql/src/planner/binder/ddl/procedure.rs b/src/query/sql/src/planner/binder/ddl/procedure.rs index 78da7d0955321..fbb901420cb20 100644 --- a/src/query/sql/src/planner/binder/ddl/procedure.rs +++ b/src/query/sql/src/planner/binder/ddl/procedure.rs @@ -18,19 +18,27 @@ use databend_common_ast::ast::CreateProcedureStmt; use databend_common_ast::ast::DescProcedureStmt; use databend_common_ast::ast::DropProcedureStmt; use databend_common_ast::ast::ExecuteImmediateStmt; +use databend_common_ast::ast::Expr; use databend_common_ast::ast::ProcedureIdentity as AstProcedureIdentity; use databend_common_ast::ast::ProcedureLanguage; use databend_common_ast::ast::ProcedureType; use databend_common_ast::ast::ShowOptions; +use databend_common_ast::ast::TypeName; +use databend_common_ast::parser::expr::type_name as parse_type_name_ast; use databend_common_ast::parser::run_parser; use databend_common_ast::parser::script::script_block_or_stmt; use databend_common_ast::parser::script::ScriptBlockOrStmt; use databend_common_ast::parser::tokenize_sql; +use databend_common_ast::parser::Dialect; use databend_common_ast::parser::ParseMode; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::type_check::common_super_type; use databend_common_expression::types::DataType; use databend_common_expression::Scalar; +use databend_common_functions::BUILTIN_FUNCTIONS; +use databend_common_meta_app::principal::procedure::ProcedureInfo; +use databend_common_meta_app::principal::GetProcedureReply; use databend_common_meta_app::principal::GetProcedureReq; use databend_common_meta_app::principal::ProcedureIdentity; use databend_common_meta_app::principal::ProcedureMeta; @@ -180,7 +188,7 @@ impl Binder { &[], true, )?; - let mut arg_types = vec![]; + let mut arg_types = Vec::with_capacity(arguments.len()); for argument in arguments { let box (arg, mut arg_type) = type_checker.resolve(argument)?; if let ScalarExpr::SubqueryExpr(subquery) = &arg { @@ -188,31 +196,76 @@ impl Binder { arg_type = arg_type.wrap_nullable(); } } - arg_types.push(arg_type.to_string()); + arg_types.push(arg_type); } - let name = name.to_string(); - let procedure_ident = ProcedureIdentity::new(name, arg_types.join(",")); + + let name_str = name.to_string(); + let procedure_api = UserApiProvider::instance().procedure_api(&tenant); + + // Try exact match first + let arg_type_strings: Vec = arg_types.iter().map(|t| t.to_string()).collect(); + let procedure_ident = ProcedureIdentity::new(name_str.clone(), arg_type_strings.join(",")); let req = GetProcedureReq { inner: ProcedureNameIdent::new(tenant.clone(), procedure_ident.clone()), }; - - let procedure = UserApiProvider::instance() - .procedure_api(&tenant) - .get_procedure(&req) - .await?; - if let Some(procedure) = procedure { - Ok(Plan::CallProcedure(Box::new(CallProcedurePlan { + if let Some(procedure) = procedure_api.get_procedure(&req).await? { + return Ok(Plan::CallProcedure(Box::new(CallProcedurePlan { procedure_id: procedure.id, script: procedure.procedure_meta.script, arg_names: procedure.procedure_meta.arg_names, args: arguments.clone(), - }))) - } else { - Err(ErrorCode::UnknownProcedure(format!( + }))); + } + + // Exact match failed, try implicit cast resolution. + let candidates = procedure_api.list_procedures_by_name(&name_str).await?; + let mut same_arity_candidates = Vec::new(); + for candidate in candidates { + let arg_defs = parse_procedure_signature(&candidate.name_ident.procedure_name().args)?; + if arg_defs.len() == arg_types.len() { + same_arity_candidates.push(candidate); + } + } + + let has_explicit_cast = arguments + .iter() + .any(|expr| matches!(expr, Expr::Cast { .. } | Expr::TryCast { .. })); + + // Multiple overloads plus lack of explicit casts means we cannot disambiguate. + if same_arity_candidates.len() > 1 && !has_explicit_cast { + return Err(ErrorCode::UnknownProcedure(format!( "Unknown procedure {}", procedure_ident - ))) + ))); } + + let allow_implicit_cast = same_arity_candidates.len() == 1 && !has_explicit_cast; + let (procedure, casts_to_apply) = resolve_procedure_candidate( + &procedure_ident, + &arg_types, + same_arity_candidates, + allow_implicit_cast, + )?; + + let args = arguments + .iter() + .zip(casts_to_apply.into_iter()) + .map(|(expr, cast)| match cast { + Some(target_type) => Expr::Cast { + span: expr.span(), + expr: Box::new(expr.clone()), + target_type, + pg_style: false, + }, + None => expr.clone(), + }) + .collect(); + Ok(Plan::CallProcedure(Box::new(CallProcedurePlan { + procedure_id: procedure.id, + script: procedure.procedure_meta.script, + arg_names: procedure.procedure_meta.arg_names, + args, + }))) } fn procedure_meta( @@ -271,7 +324,6 @@ fn generate_procedure_name_ident( .map(|type_name| resolve_type_name(type_name, true).map(|t| DataType::from(&t))) .collect::, _>>()?; - // Convert normalized DataType back to string for storage let args_type_str = args_data_type .iter() .map(|dt| dt.to_string()) @@ -283,3 +335,97 @@ fn generate_procedure_name_ident( ProcedureIdentity::new(name.name.clone(), args_type_str), )) } + +/// Find the first procedure overload whose signature is compatible with the +/// actual argument types, optionally allowing implicit casts. Returns the +/// procedure metadata plus the exact casts we need to inject. +fn resolve_procedure_candidate( + procedure_ident: &ProcedureIdentity, + arg_types: &[DataType], + candidates: Vec, + allow_implicit_cast: bool, +) -> Result<(GetProcedureReply, Vec>)> { + for candidate in candidates { + let arg_defs = parse_procedure_signature(&candidate.name_ident.procedure_name().args)?; + if arg_defs.len() != arg_types.len() { + continue; + } + + let mut casts = Vec::with_capacity(arg_types.len()); + let mut compatible = true; + for (actual, target_ast) in arg_types.iter().zip(arg_defs.iter()) { + let target = DataType::from(&resolve_type_name(target_ast, true)?); + if actual == &target { + casts.push(None); + continue; + } + + if allow_implicit_cast + && common_super_type( + actual.clone(), + target.clone(), + &BUILTIN_FUNCTIONS.default_cast_rules, + ) + .is_some_and(|common| common == target) + { + casts.push(Some(target_ast.clone())); + } else { + compatible = false; + break; + } + } + + if compatible { + return Ok(( + GetProcedureReply { + id: *candidate.ident.procedure_id(), + procedure_meta: candidate.meta.clone(), + }, + casts, + )); + } + } + + Err(ErrorCode::UnknownProcedure(format!( + "Unknown procedure {}", + procedure_ident + ))) +} + +fn parse_procedure_signature(arg_str: &str) -> Result> { + if arg_str.is_empty() { + return Ok(vec![]); + } + + let mut segments = Vec::new(); + let mut depth = 0i32; + let mut start = 0usize; + for (idx, ch) in arg_str.char_indices() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + // Only split on commas at depth 0 so nested args (like DECIMAL) stay intact. + segments.push(arg_str[start..idx].trim()); + start = idx + 1; + } + _ => {} + } + } + segments.push(arg_str[start..].trim()); + + segments + .into_iter() + .map(|segment| { + let tokens = tokenize_sql(segment)?; + run_parser( + &tokens, + Dialect::default(), + ParseMode::Default, + false, + parse_type_name_ast, + ) + .map_err(|e| ErrorCode::SyntaxException(e.to_string())) + }) + .collect() +} diff --git a/tests/sqllogictests/suites/base/15_procedure/15_0002_procedure.test b/tests/sqllogictests/suites/base/15_procedure/15_0002_procedure.test index 093f840ae0e11..cab15b06cc625 100644 --- a/tests/sqllogictests/suites/base/15_procedure/15_0002_procedure.test +++ b/tests/sqllogictests/suites/base/15_procedure/15_0002_procedure.test @@ -170,8 +170,10 @@ BEGIN END; $$; -statement error 3130 +query T call procedure sum_even_numbers(1, 2) +---- +2 query T call procedure sum_even_numbers(1::INT, 2::INT) @@ -181,6 +183,30 @@ call procedure sum_even_numbers(1::INT, 2::INT) statement ok drop procedure sum_even_numbers(Int, Int); +statement ok +CREATE OR REPLACE PROCEDURE p_throw_expr(email STRING) RETURNS STRING NOT NULL LANGUAGE SQL COMMENT='throw expr test' AS $$ +BEGIN + IF email = 'dup' THEN + THROW 'Duplicate email: dup'; + ELSEIF email='deny_user' THEN + THROW concat('Denied prefix: ', email); + ELSE + RETURN email; + END IF; +END; +$$; + +statement error 3129 +call procedure p_throw_expr('dup'); + +statement error 3129 +call procedure p_throw_expr('deny_user'); + +query T +call procedure p_throw_expr('new_user'); +---- +new_user + statement ok CREATE PROCEDURE if not exists p2(x STRING) RETURNS Int32 NOT NULL LANGUAGE SQL COMMENT='test' AS $$ BEGIN @@ -217,29 +243,307 @@ call procedure p3('x'); statement ok drop procedure p3(string); +## ========================================== +## Implicit cast resolution tests +## ========================================== + +## Test 1: Auto cast succeeds - INT8 literal matches INT64 parameter statement ok -CREATE OR REPLACE PROCEDURE p_throw_expr(email STRING) RETURNS STRING NOT NULL LANGUAGE SQL COMMENT='throw expr test' AS $$ +CREATE OR REPLACE PROCEDURE test_cast_int64(x INT64) RETURNS INT64 NOT NULL LANGUAGE SQL AS $$ BEGIN - IF email = 'dup' THEN - THROW 'Duplicate email: dup'; - ELSEIF email='deny_user' THEN - THROW concat('Denied prefix: ', email); - ELSE - RETURN email; - END IF; + RETURN x + 100; END; $$; -statement error 3129 -call procedure p_throw_expr('dup'); +query T +call procedure test_cast_int64(1); +---- +101 -statement error 3129 -call procedure p_throw_expr('deny_user'); +statement ok +drop procedure test_cast_int64(INT64); + +## Test 2: Explicit cast blocks auto resolution +statement ok +CREATE OR REPLACE PROCEDURE test_explicit_cast(x INT32) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'INT32_VERSION'; +END; +$$; +## This should fail because user has explicit cast to INT16, but no INT16 version exists +statement error 3130 +call procedure test_explicit_cast(1::INT16); + +statement ok +drop procedure test_explicit_cast(INT32); + +## Test 3: Ambiguous overload - same cast cost +statement ok +CREATE OR REPLACE PROCEDURE test_ambiguous(x INT64) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'INT64_VERSION'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_ambiguous(x FLOAT64) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'FLOAT64_VERSION'; +END; +$$; + +## INT8 can cast to both INT64 and FLOAT64 with same cost, should be ambiguous +statement error 3130 +call procedure test_ambiguous(1); + +statement ok +drop procedure test_ambiguous(INT64); + +statement ok +drop procedure test_ambiguous(FLOAT64); + +## Test 4: Multiple overloads - exact match with explicit cast +statement ok +CREATE OR REPLACE PROCEDURE test_best_match(x INT32) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'INT32_EXACT'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_best_match(x INT64) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'INT64_CAST'; +END; +$$; + +## Multiple overloads without explicit cast should fail +statement error 3130 +call procedure test_best_match(1); + +## INT32 literal with explicit cast should match INT32 version exactly query T -call procedure p_throw_expr('new_user'); +call procedure test_best_match(1::INT32); ---- -new_user +INT32_EXACT + +## INT64 with explicit cast should match INT64 version +query T +call procedure test_best_match(1::INT64); +---- +INT64_CAST + +statement ok +drop procedure test_best_match(INT32); + +statement ok +drop procedure test_best_match(INT64); + +## Test 5: Multiple overloads require explicit cast +statement ok +CREATE OR REPLACE PROCEDURE test_cost(x INT64, y INT64) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'BOTH_INT64'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_cost(x INT64, y STRING) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'INT64_STRING'; +END; +$$; + +## Multiple overloads without explicit cast should fail +statement error 3130 +call procedure test_cost(1, 2); + +## With explicit cast, matches the right overload +query T +call procedure test_cost(1::INT64, 2::INT64); +---- +BOTH_INT64 + +statement ok +drop procedure test_cost(INT64, INT64); + +statement ok +drop procedure test_cost(INT64, STRING); + +## Test 6: Timestamp implicit cast from string +statement ok +CREATE OR REPLACE PROCEDURE test_timestamp(ts TIMESTAMP) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'GOT_TIMESTAMP'; +END; +$$; + +query T +call procedure test_timestamp('2024-01-15 10:30:00'); +---- +GOT_TIMESTAMP + +query T +call procedure test_timestamp(to_timestamp('2024-01-15 10:30:00')); +---- +GOT_TIMESTAMP + +statement ok +drop procedure test_timestamp(TIMESTAMP); + +## Test 7: Date implicit cast from string +statement ok +CREATE OR REPLACE PROCEDURE test_date(d DATE) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'GOT_DATE'; +END; +$$; + +query T +call procedure test_date(to_date('2024-01-15')); +---- +GOT_DATE + +## Single candidate allows String -> Date implicit cast +query T +call procedure test_date('2024-01-15'); +---- +GOT_DATE + +query T +call procedure test_date('2024-01-15'::DATE); +---- +GOT_DATE + +statement ok +drop procedure test_date(DATE); + +## Test 8: Decimal implicit cast +statement ok +CREATE OR REPLACE PROCEDURE test_decimal(x DECIMAL(10, 2)) RETURNS DECIMAL(10, 2) NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN x + 1.5; +END; +$$; + +query T +call procedure test_decimal(10); +---- +11.50 + +query T +call procedure test_decimal(10.25); +---- +11.75 + +statement ok +drop procedure test_decimal(DECIMAL(10, 2)); + +## Test 9: Decimal precision mismatch - should auto cast +statement ok +CREATE OR REPLACE PROCEDURE test_decimal_precision(x DECIMAL(18, 6)) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'DECIMAL_18_6'; +END; +$$; + +query T +call procedure test_decimal_precision(123.45); +---- +DECIMAL_18_6 + +statement ok +drop procedure test_decimal_precision(DECIMAL(18, 6)); + +## Test 10: Timestamp vs Date overload resolution +statement ok +CREATE OR REPLACE PROCEDURE test_time_overload(ts TIMESTAMP) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'TIMESTAMP_VERSION'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_time_overload(d DATE) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'DATE_VERSION'; +END; +$$; + +query T +call procedure test_time_overload(to_timestamp('2024-01-15 10:30:00')); +---- +TIMESTAMP_VERSION + +query T +call procedure test_time_overload(to_date('2024-01-15')); +---- +DATE_VERSION + +statement ok +drop procedure test_time_overload(TIMESTAMP); + +statement ok +drop procedure test_time_overload(DATE); + +## Test 11: Multiple overloads require explicit casts +statement ok +CREATE OR REPLACE PROCEDURE test_overload_cast(d DATE) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'DATE_VERSION'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_overload_cast(ts TIMESTAMP) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'TIMESTAMP_VERSION'; +END; +$$; + +## Without explicit cast it should fail because multiple overloads exist +statement error 3130 +call procedure test_overload_cast('2024-03-01'); + +query T +call procedure test_overload_cast('2024-03-01'::DATE); +---- +DATE_VERSION + +statement ok +drop procedure test_overload_cast(DATE); + +statement ok +drop procedure test_overload_cast(TIMESTAMP); + +## Test 12: Different arities should not conflict +statement ok +CREATE OR REPLACE PROCEDURE test_overload_arity() RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'NO_ARGS'; +END; +$$; + +statement ok +CREATE OR REPLACE PROCEDURE test_overload_arity(x INT64) RETURNS STRING NOT NULL LANGUAGE SQL AS $$ +BEGIN + RETURN 'ONE_ARG'; +END; +$$; + +query T +call procedure test_overload_arity(); +---- +NO_ARGS + +query T +call procedure test_overload_arity(1); +---- +ONE_ARG + +statement ok +drop procedure test_overload_arity(); statement ok -drop procedure p_throw_expr(string); +drop procedure test_overload_arity(INT64);