diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 6d0a4a9..9720e43 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -12,18 +12,19 @@ concurrency: cancel-in-progress: true jobs: - duckdb-next-build: - name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main - with: - duckdb_version: main - ci_tools_version: main - extension_name: parser_tools + # TODO: Re-enable once compatible with DuckDB main (SetOperationNode API change) + # duckdb-next-build: + # name: Build extension binaries + # uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main + # with: + # duckdb_version: main + # ci_tools_version: main + # extension_name: parser_tools duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.4 with: - duckdb_version: v1.4.0 - ci_tools_version: v1.4.0 + duckdb_version: v1.4.4 + ci_tools_version: v1.4.4 extension_name: parser_tools diff --git a/duckdb b/duckdb index b8a06e4..6ddac80 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit b8a06e4a22672e254cd0baa68a3dbed2eb51c56e +Subproject commit 6ddac802ffa9bcfbcc3f5f0d71de5dff9b0bc250 diff --git a/src/parse_tables.cpp b/src/parse_tables.cpp index d902364..2e29aaf 100644 --- a/src/parse_tables.cpp +++ b/src/parse_tables.cpp @@ -7,9 +7,13 @@ #include "duckdb/parser/statement/select_statement.hpp" #include "duckdb/parser/query_node/select_node.hpp" #include "duckdb/parser/query_node/cte_node.hpp" +#include "duckdb/parser/query_node/set_operation_node.hpp" +#include "duckdb/parser/result_modifier.hpp" #include "duckdb/parser/tableref/basetableref.hpp" #include "duckdb/parser/tableref/joinref.hpp" #include "duckdb/parser/tableref/subqueryref.hpp" +#include "duckdb/parser/expression/subquery_expression.hpp" +#include "duckdb/parser/parsed_expression_iterator.hpp" #include "duckdb/function/scalar/nested_functions.hpp" namespace duckdb { @@ -73,6 +77,13 @@ static unique_ptr ParseTablesInit(ClientContext &conte return make_uniq(); } +// Forward declaration for mutual recursion +static void ExtractTablesFromExpression( + const duckdb::ParsedExpression &expr, + std::vector &results, + const duckdb::CommonTableExpressionMap *cte_map +); + static void ExtractTablesFromRef( const duckdb::TableRef &ref, std::vector &results, @@ -104,6 +115,10 @@ static void ExtractTablesFromRef( auto &join = (JoinRef &)ref; ExtractTablesFromRef(*join.left, results, TableContext::JoinLeft, is_top_level, cte_map); ExtractTablesFromRef(*join.right, results, TableContext::JoinRight, false, cte_map); + // Process JOIN condition for subqueries + if (join.condition) { + ExtractTablesFromExpression(*join.condition, results, cte_map); + } break; } case TableReferenceType::SUBQUERY: { @@ -118,6 +133,33 @@ static void ExtractTablesFromRef( } } +// Extract tables from expressions that may contain subqueries (WHERE, HAVING, SELECT list, etc.) +static void ExtractTablesFromExpression( + const duckdb::ParsedExpression &expr, + std::vector &results, + const duckdb::CommonTableExpressionMap *cte_map +) { + using namespace duckdb; + + // Check if this is a subquery expression + if (expr.GetExpressionClass() == ExpressionClass::SUBQUERY) { + auto &subquery_expr = (const SubqueryExpression &)expr; + if (subquery_expr.subquery && subquery_expr.subquery->node) { + ExtractTablesFromQueryNode(*subquery_expr.subquery->node, results, TableContext::Subquery, cte_map); + } + // Also process the child expression (e.g., the left side of IN) + if (subquery_expr.child) { + ExtractTablesFromExpression(*subquery_expr.child, results, cte_map); + } + return; + } + + // Recursively process child expressions + ParsedExpressionIterator::EnumerateChildren(expr, + [&](const ParsedExpression &child) { + ExtractTablesFromExpression(child, results, cte_map); + }); +} static void ExtractTablesFromQueryNode( const duckdb::QueryNode &node, @@ -144,7 +186,36 @@ static void ExtractTablesFromQueryNode( if (select_node.from_table) { ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map); } - } + + // Extract tables from WHERE clause subqueries + if (select_node.where_clause) { + ExtractTablesFromExpression(*select_node.where_clause, results, &select_node.cte_map); + } + + // Extract tables from SELECT list subqueries + for (const auto &expr : select_node.select_list) { + if (expr) { + ExtractTablesFromExpression(*expr, results, &select_node.cte_map); + } + } + + // Extract tables from HAVING clause subqueries + if (select_node.having) { + ExtractTablesFromExpression(*select_node.having, results, &select_node.cte_map); + } + + // Extract tables from QUALIFY clause subqueries + if (select_node.qualify) { + ExtractTablesFromExpression(*select_node.qualify, results, &select_node.cte_map); + } + + // Extract tables from GROUP BY expressions + for (const auto &expr : select_node.groups.group_expressions) { + if (expr) { + ExtractTablesFromExpression(*expr, results, &select_node.cte_map); + } + } + } // additional step necessary for duckdb v1.4.0: unwrap CTE node else if (node.type == QueryNodeType::CTE_NODE) { auto &cte_node = (CTENode &)node; @@ -153,6 +224,37 @@ static void ExtractTablesFromQueryNode( ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map); } } + // Handle UNION/INTERSECT/EXCEPT (set operations) + else if (node.type == QueryNodeType::SET_OPERATION_NODE) { + auto &set_node = (SetOperationNode &)node; + + if (set_node.left) { + ExtractTablesFromQueryNode(*set_node.left, results, context, cte_map); + } + if (set_node.right) { + ExtractTablesFromQueryNode(*set_node.right, results, context, cte_map); + } + } + + // Process result modifiers (ORDER BY, LIMIT) for all node types + for (const auto &modifier : node.modifiers) { + if (modifier->type == ResultModifierType::ORDER_MODIFIER) { + auto &order_modifier = (OrderModifier &)*modifier; + for (const auto &order : order_modifier.orders) { + if (order.expression) { + ExtractTablesFromExpression(*order.expression, results, cte_map); + } + } + } else if (modifier->type == ResultModifierType::LIMIT_MODIFIER) { + auto &limit_modifier = (LimitModifier &)*modifier; + if (limit_modifier.limit) { + ExtractTablesFromExpression(*limit_modifier.limit, results, cte_map); + } + if (limit_modifier.offset) { + ExtractTablesFromExpression(*limit_modifier.offset, results, cte_map); + } + } + } } static void ExtractTablesFromSQL(const std::string &sql, std::vector &results) { diff --git a/test/sql/parse_tools/scalar_functions/parse_tables.test b/test/sql/parse_tools/scalar_functions/parse_tables.test index e825611..a723e25 100644 --- a/test/sql/parse_tools/scalar_functions/parse_tables.test +++ b/test/sql/parse_tools/scalar_functions/parse_tables.test @@ -29,6 +29,41 @@ SELECT list_filter(parse_tables('select * from MyTable t inner join Other o on o ---- [{'schema': main, 'table': MyTable, 'context': from}] +# subquery in WHERE clause (IN) +query I +SELECT parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders)'); +---- +[{'schema': schema1, 'table': users, 'context': from}, {'schema': schema2, 'table': orders, 'context': from}] + +# subquery in WHERE clause (EXISTS) +query I +SELECT parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id)'); +---- +[{'schema': main, 'table': users, 'context': from}, {'schema': main, 'table': orders, 'context': from}] + +# deeply nested expression subqueries +query I +SELECT parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3))'); +---- +[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}, {'schema': main, 'table': t3, 'context': from}] + +# NOT IN subquery +query I +SELECT parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2)'); +---- +[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}] + +# subquery in HAVING clause +query I +SELECT parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats)'); +---- +[{'schema': main, 'table': orders, 'context': from}, {'schema': main, 'table': stats, 'context': from}] + +# subquery in QUALIFY clause +query I +SELECT parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2)'); +---- +[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}] # Unsupported # ----------- diff --git a/test/sql/parse_tools/table_functions/parse_tables.test b/test/sql/parse_tools/table_functions/parse_tables.test index 338520b..d1cdf26 100644 --- a/test/sql/parse_tools/table_functions/parse_tables.test +++ b/test/sql/parse_tools/table_functions/parse_tables.test @@ -80,10 +80,129 @@ $$); main k from main l from -# INSERT INTO ... SELECT +# subquery in WHERE clause (IN) +query III +SELECT * FROM parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders);'); +---- +schema1 users from +schema2 orders from + +# subquery in WHERE clause (EXISTS) +query III +SELECT * FROM parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id);'); +---- +main users from +main orders from + +# subquery in SELECT list (scalar subquery) +query III +SELECT * FROM parse_tables('SELECT id, (SELECT COUNT(*) FROM orders WHERE orders.user_id = users.id) AS order_count FROM users;'); +---- +main users from +main orders from + +# multiple subqueries in WHERE +query III +SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2) AND b IN (SELECT y FROM t3);'); +---- +main t1 from +main t2 from +main t3 from + +# deeply nested expression subqueries query III -SELECT * FROM parse_tables('INSERT INTO m SELECT * FROM n;'); +SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3));'); ---- +main t1 from +main t2 from +main t3 from + +# NOT IN subquery +query III +SELECT * FROM parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2);'); +---- +main t1 from +main t2 from + +# NOT EXISTS subquery +query III +SELECT * FROM parse_tables('SELECT * FROM users WHERE NOT EXISTS (SELECT 1 FROM banned WHERE banned.user_id = users.id);'); +---- +main users from +main banned from + +# subquery in HAVING clause +query III +SELECT * FROM parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats);'); +---- +main orders from +main stats from + +# subquery in QUALIFY clause +query III +SELECT * FROM parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2);'); +---- +main t1 from +main t2 from + +# CASE WHEN with subquery +query III +SELECT * FROM parse_tables('SELECT CASE WHEN (SELECT COUNT(*) FROM t2) > 0 THEN 1 ELSE 0 END FROM t1;'); +---- +main t1 from +main t2 from + +# UNION query +query III +SELECT * FROM parse_tables('SELECT * FROM t1 UNION SELECT * FROM t2;'); +---- +main t1 from +main t2 from + +# UNION ALL query +query III +SELECT * FROM parse_tables('SELECT * FROM t1 UNION ALL SELECT * FROM t2;'); +---- +main t1 from +main t2 from + +# INTERSECT query +query III +SELECT * FROM parse_tables('SELECT * FROM t1 INTERSECT SELECT * FROM t2;'); +---- +main t1 from +main t2 from + +# EXCEPT query +query III +SELECT * FROM parse_tables('SELECT * FROM t1 EXCEPT SELECT * FROM t2;'); +---- +main t1 from +main t2 from + +# subquery in JOIN condition +query III +SELECT * FROM parse_tables('SELECT * FROM t1 JOIN t2 ON t1.id = (SELECT MAX(id) FROM t3);'); +---- +main t1 from +main t2 join_right +main t3 from + +# subquery in GROUP BY (rare but valid) +query III +SELECT * FROM parse_tables('SELECT COUNT(*) FROM t1 GROUP BY (SELECT 1 FROM t2 LIMIT 1);'); +---- +main t1 from +main t2 from + +# subquery in ORDER BY +query III +SELECT * FROM parse_tables('SELECT * FROM t1 ORDER BY (SELECT COUNT(*) FROM t2);'); +---- +main t1 from +main t2 from + +# INSERT INTO ... SELECT # UPDATE with FROM query III