Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ concurrency:
cancel-in-progress: true

jobs:
duckdb-next-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
duckdb_version: main
ci_tools_version: main
extension_name: parser_tools
# TODO: Re-enable once compatible with DuckDB main (SetOperationNode API change)
# duckdb-next-build:
# name: Build extension binaries
# uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
# with:
# duckdb_version: main
# ci_tools_version: main
# extension_name: parser_tools

duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.0
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.4.4
with:
duckdb_version: v1.4.0
ci_tools_version: v1.4.0
duckdb_version: v1.4.4
ci_tools_version: v1.4.4
extension_name: parser_tools
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 1197 files
104 changes: 103 additions & 1 deletion src/parse_tables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
#include "duckdb/parser/statement/select_statement.hpp"
#include "duckdb/parser/query_node/select_node.hpp"
#include "duckdb/parser/query_node/cte_node.hpp"
#include "duckdb/parser/query_node/set_operation_node.hpp"
#include "duckdb/parser/result_modifier.hpp"
#include "duckdb/parser/tableref/basetableref.hpp"
#include "duckdb/parser/tableref/joinref.hpp"
#include "duckdb/parser/tableref/subqueryref.hpp"
#include "duckdb/parser/expression/subquery_expression.hpp"
#include "duckdb/parser/parsed_expression_iterator.hpp"
#include "duckdb/function/scalar/nested_functions.hpp"

namespace duckdb {
Expand Down Expand Up @@ -73,6 +77,13 @@ static unique_ptr<GlobalTableFunctionState> ParseTablesInit(ClientContext &conte
return make_uniq<ParseTablesState>();
}

// Forward declaration for mutual recursion
static void ExtractTablesFromExpression(
const duckdb::ParsedExpression &expr,
std::vector<TableRefResult> &results,
const duckdb::CommonTableExpressionMap *cte_map
);

static void ExtractTablesFromRef(
const duckdb::TableRef &ref,
std::vector<TableRefResult> &results,
Expand Down Expand Up @@ -104,6 +115,10 @@ static void ExtractTablesFromRef(
auto &join = (JoinRef &)ref;
ExtractTablesFromRef(*join.left, results, TableContext::JoinLeft, is_top_level, cte_map);
ExtractTablesFromRef(*join.right, results, TableContext::JoinRight, false, cte_map);
// Process JOIN condition for subqueries
if (join.condition) {
ExtractTablesFromExpression(*join.condition, results, cte_map);
}
break;
}
case TableReferenceType::SUBQUERY: {
Expand All @@ -118,6 +133,33 @@ static void ExtractTablesFromRef(
}
}

// Extract tables from expressions that may contain subqueries (WHERE, HAVING, SELECT list, etc.)
static void ExtractTablesFromExpression(
const duckdb::ParsedExpression &expr,
std::vector<TableRefResult> &results,
const duckdb::CommonTableExpressionMap *cte_map
) {
using namespace duckdb;

// Check if this is a subquery expression
if (expr.GetExpressionClass() == ExpressionClass::SUBQUERY) {
auto &subquery_expr = (const SubqueryExpression &)expr;
if (subquery_expr.subquery && subquery_expr.subquery->node) {
ExtractTablesFromQueryNode(*subquery_expr.subquery->node, results, TableContext::Subquery, cte_map);
}
// Also process the child expression (e.g., the left side of IN)
if (subquery_expr.child) {
ExtractTablesFromExpression(*subquery_expr.child, results, cte_map);
}
return;
}

// Recursively process child expressions
ParsedExpressionIterator::EnumerateChildren(expr,
[&](const ParsedExpression &child) {
ExtractTablesFromExpression(child, results, cte_map);
});
}

static void ExtractTablesFromQueryNode(
const duckdb::QueryNode &node,
Expand All @@ -144,7 +186,36 @@ static void ExtractTablesFromQueryNode(
if (select_node.from_table) {
ExtractTablesFromRef(*select_node.from_table, results, context, true, &select_node.cte_map);
}
}

// Extract tables from WHERE clause subqueries
if (select_node.where_clause) {
ExtractTablesFromExpression(*select_node.where_clause, results, &select_node.cte_map);
}

// Extract tables from SELECT list subqueries
for (const auto &expr : select_node.select_list) {
if (expr) {
ExtractTablesFromExpression(*expr, results, &select_node.cte_map);
}
}

// Extract tables from HAVING clause subqueries
if (select_node.having) {
ExtractTablesFromExpression(*select_node.having, results, &select_node.cte_map);
}

// Extract tables from QUALIFY clause subqueries
if (select_node.qualify) {
ExtractTablesFromExpression(*select_node.qualify, results, &select_node.cte_map);
}

// Extract tables from GROUP BY expressions
for (const auto &expr : select_node.groups.group_expressions) {
if (expr) {
ExtractTablesFromExpression(*expr, results, &select_node.cte_map);
}
}
}
// additional step necessary for duckdb v1.4.0: unwrap CTE node
else if (node.type == QueryNodeType::CTE_NODE) {
auto &cte_node = (CTENode &)node;
Expand All @@ -153,6 +224,37 @@ static void ExtractTablesFromQueryNode(
ExtractTablesFromQueryNode(*cte_node.child, results, context, cte_map);
}
}
// Handle UNION/INTERSECT/EXCEPT (set operations)
else if (node.type == QueryNodeType::SET_OPERATION_NODE) {
auto &set_node = (SetOperationNode &)node;

if (set_node.left) {
ExtractTablesFromQueryNode(*set_node.left, results, context, cte_map);
}
if (set_node.right) {
ExtractTablesFromQueryNode(*set_node.right, results, context, cte_map);
}
}

// Process result modifiers (ORDER BY, LIMIT) for all node types
for (const auto &modifier : node.modifiers) {
if (modifier->type == ResultModifierType::ORDER_MODIFIER) {
auto &order_modifier = (OrderModifier &)*modifier;
for (const auto &order : order_modifier.orders) {
if (order.expression) {
ExtractTablesFromExpression(*order.expression, results, cte_map);
}
}
} else if (modifier->type == ResultModifierType::LIMIT_MODIFIER) {
auto &limit_modifier = (LimitModifier &)*modifier;
if (limit_modifier.limit) {
ExtractTablesFromExpression(*limit_modifier.limit, results, cte_map);
}
if (limit_modifier.offset) {
ExtractTablesFromExpression(*limit_modifier.offset, results, cte_map);
}
}
}
}

static void ExtractTablesFromSQL(const std::string &sql, std::vector<TableRefResult> &results) {
Expand Down
35 changes: 35 additions & 0 deletions test/sql/parse_tools/scalar_functions/parse_tables.test
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,41 @@ SELECT list_filter(parse_tables('select * from MyTable t inner join Other o on o
----
[{'schema': main, 'table': MyTable, 'context': from}]

# subquery in WHERE clause (IN)
query I
SELECT parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders)');
----
[{'schema': schema1, 'table': users, 'context': from}, {'schema': schema2, 'table': orders, 'context': from}]

# subquery in WHERE clause (EXISTS)
query I
SELECT parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id)');
----
[{'schema': main, 'table': users, 'context': from}, {'schema': main, 'table': orders, 'context': from}]

# deeply nested expression subqueries
query I
SELECT parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3))');
----
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}, {'schema': main, 'table': t3, 'context': from}]

# NOT IN subquery
query I
SELECT parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2)');
----
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}]

# subquery in HAVING clause
query I
SELECT parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats)');
----
[{'schema': main, 'table': orders, 'context': from}, {'schema': main, 'table': stats, 'context': from}]

# subquery in QUALIFY clause
query I
SELECT parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2)');
----
[{'schema': main, 'table': t1, 'context': from}, {'schema': main, 'table': t2, 'context': from}]

# Unsupported
# -----------
Expand Down
123 changes: 121 additions & 2 deletions test/sql/parse_tools/table_functions/parse_tables.test
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,129 @@ $$);
main k from
main l from

# INSERT INTO ... SELECT
# subquery in WHERE clause (IN)
query III
SELECT * FROM parse_tables('SELECT * FROM schema1.users WHERE id IN (SELECT user_id FROM schema2.orders);');
----
schema1 users from
schema2 orders from

# subquery in WHERE clause (EXISTS)
query III
SELECT * FROM parse_tables('SELECT * FROM users WHERE EXISTS (SELECT 1 FROM orders WHERE orders.user_id = users.id);');
----
main users from
main orders from

# subquery in SELECT list (scalar subquery)
query III
SELECT * FROM parse_tables('SELECT id, (SELECT COUNT(*) FROM orders WHERE orders.user_id = users.id) AS order_count FROM users;');
----
main users from
main orders from

# multiple subqueries in WHERE
query III
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2) AND b IN (SELECT y FROM t3);');
----
main t1 from
main t2 from
main t3 from

# deeply nested expression subqueries
query III
SELECT * FROM parse_tables('INSERT INTO m SELECT * FROM n;');
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE a IN (SELECT x FROM t2 WHERE b IN (SELECT y FROM t3));');
----
main t1 from
main t2 from
main t3 from

# NOT IN subquery
query III
SELECT * FROM parse_tables('SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2);');
----
main t1 from
main t2 from

# NOT EXISTS subquery
query III
SELECT * FROM parse_tables('SELECT * FROM users WHERE NOT EXISTS (SELECT 1 FROM banned WHERE banned.user_id = users.id);');
----
main users from
main banned from

# subquery in HAVING clause
query III
SELECT * FROM parse_tables('SELECT user_id, COUNT(*) FROM orders GROUP BY user_id HAVING COUNT(*) > (SELECT AVG(order_count) FROM stats);');
----
main orders from
main stats from

# subquery in QUALIFY clause
query III
SELECT * FROM parse_tables('SELECT * FROM t1 QUALIFY row_number() OVER() > (SELECT COUNT(*) FROM t2);');
----
main t1 from
main t2 from

# CASE WHEN with subquery
query III
SELECT * FROM parse_tables('SELECT CASE WHEN (SELECT COUNT(*) FROM t2) > 0 THEN 1 ELSE 0 END FROM t1;');
----
main t1 from
main t2 from

# UNION query
query III
SELECT * FROM parse_tables('SELECT * FROM t1 UNION SELECT * FROM t2;');
----
main t1 from
main t2 from

# UNION ALL query
query III
SELECT * FROM parse_tables('SELECT * FROM t1 UNION ALL SELECT * FROM t2;');
----
main t1 from
main t2 from

# INTERSECT query
query III
SELECT * FROM parse_tables('SELECT * FROM t1 INTERSECT SELECT * FROM t2;');
----
main t1 from
main t2 from

# EXCEPT query
query III
SELECT * FROM parse_tables('SELECT * FROM t1 EXCEPT SELECT * FROM t2;');
----
main t1 from
main t2 from

# subquery in JOIN condition
query III
SELECT * FROM parse_tables('SELECT * FROM t1 JOIN t2 ON t1.id = (SELECT MAX(id) FROM t3);');
----
main t1 from
main t2 join_right
main t3 from

# subquery in GROUP BY (rare but valid)
query III
SELECT * FROM parse_tables('SELECT COUNT(*) FROM t1 GROUP BY (SELECT 1 FROM t2 LIMIT 1);');
----
main t1 from
main t2 from

# subquery in ORDER BY
query III
SELECT * FROM parse_tables('SELECT * FROM t1 ORDER BY (SELECT COUNT(*) FROM t2);');
----
main t1 from
main t2 from

# INSERT INTO ... SELECT

# UPDATE with FROM
query III
Expand Down
Loading