Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 116 additions & 1 deletion regress/expected/unified_vertex_table.out
Original file line number Diff line number Diff line change
Expand Up @@ -1309,11 +1309,124 @@ $$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
(1 row)

--
-- Test 29: Verify join condition optimization with EXPLAIN
--
-- When vertices/edges from previous clauses are joined, the optimization
-- should replace patterns like:
-- age_id(_agtype_build_vertex(r.id, ...))::graphid
-- with direct column access:
-- r.id
--
-- This avoids expensive vertex reconstruction in join conditions.
--
-- Create test data: Users following each other
SELECT * FROM cypher('unified_test', $$
CREATE (:JoinOptUser {name: 'Alice'}),
(:JoinOptUser {name: 'Bob'}),
(:JoinOptUser {name: 'Carol'})
$$) AS (v agtype);
v
---
(0 rows)

SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
CREATE (a)-[:JOPT_FOLLOWS]->(b)
$$) AS (e agtype);
e
---
(0 rows)

SELECT * FROM cypher('unified_test', $$
MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
CREATE (b)-[:JOPT_FOLLOWS]->(c)
$$) AS (e agtype);
e
---
(0 rows)

-- EXPLAIN showing join conditions use direct column access
-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
-- And: direct id comparisons instead of age_id(...)::graphid
EXPLAIN (COSTS OFF)
SELECT * FROM cypher('unified_test', $$
MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
RETURN u.name, v.name
$$) AS (u_name agtype, v_name agtype);
QUERY PLAN
------------------------------------------------------------
Nested Loop
Join Filter: (e.start_id = u.id)
-> Nested Loop
-> Seq Scan on _ag_label_vertex u
Filter: (labels = '23814'::oid)
-> Seq Scan on _ag_label_vertex v
Filter: (labels = '23814'::oid)
-> Bitmap Heap Scan on "JOPT_FOLLOWS" e
Recheck Cond: (end_id = v.id)
-> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
Index Cond: (end_id = v.id)
(11 rows)

-- Verify the query still returns correct results
SELECT * FROM cypher('unified_test', $$
MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
RETURN u.name, v.name
ORDER BY u.name
$$) AS (u_name agtype, v_name agtype);
u_name | v_name
---------+---------
"Alice" | "Bob"
"Bob" | "Carol"
(2 rows)

-- Multi-hop pattern showing optimization across multiple joins
EXPLAIN (COSTS OFF)
SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
RETURN a.name, b.name, c.name
$$) AS (a_name agtype, b_name agtype, c_name agtype);
QUERY PLAN
------------------------------------------------------------------------
Nested Loop
Join Filter: (e1.start_id = a.id)
-> Nested Loop
Join Filter: _ag_enforce_edge_uniqueness2(e1.id, e2.id)
-> Nested Loop
Join Filter: (e2.start_id = b.id)
-> Nested Loop
-> Seq Scan on _ag_label_vertex b
Filter: (labels = '23814'::oid)
-> Seq Scan on _ag_label_vertex c
Filter: (labels = '23814'::oid)
-> Bitmap Heap Scan on "JOPT_FOLLOWS" e2
Recheck Cond: (end_id = c.id)
-> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
Index Cond: (end_id = c.id)
-> Bitmap Heap Scan on "JOPT_FOLLOWS" e1
Recheck Cond: (end_id = b.id)
-> Bitmap Index Scan on "JOPT_FOLLOWS_end_id_idx"
Index Cond: (end_id = b.id)
-> Seq Scan on _ag_label_vertex a
Filter: (labels = '23814'::oid)
(21 rows)

-- Verify multi-hop query results
SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
RETURN a.name, b.name, c.name
$$) AS (a_name agtype, b_name agtype, c_name agtype);
a_name | b_name | c_name
---------+--------+---------
"Alice" | "Bob" | "Carol"
(1 row)

--
-- Cleanup
--
SELECT drop_graph('unified_test', true);
NOTICE: drop cascades to 42 other objects
NOTICE: drop cascades to 44 other objects
DETAIL: drop cascades to table unified_test._ag_label_vertex
drop cascades to table unified_test._ag_label_edge
drop cascades to table unified_test."Person"
Expand Down Expand Up @@ -1356,6 +1469,8 @@ drop cascades to table unified_test."OptimizeTest"
drop cascades to table unified_test."OptStart"
drop cascades to table unified_test."OPT_EDGE"
drop cascades to table unified_test."OptEnd"
drop cascades to table unified_test."JoinOptUser"
drop cascades to table unified_test."JOPT_FOLLOWS"
NOTICE: graph "unified_test" has been dropped
drop_graph
------------
Expand Down
58 changes: 58 additions & 0 deletions regress/sql/unified_vertex_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,64 @@ SELECT * FROM cypher('unified_test', $$
RETURN id(e), properties(e), start_id(e), end_id(e)
$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);

--
-- Test 29: Verify join condition optimization with EXPLAIN
--
-- When vertices/edges from previous clauses are joined, the optimization
-- should replace patterns like:
-- age_id(_agtype_build_vertex(r.id, ...))::graphid
-- with direct column access:
-- r.id
--
-- This avoids expensive vertex reconstruction in join conditions.
--

-- Create test data: Users following each other
SELECT * FROM cypher('unified_test', $$
CREATE (:JoinOptUser {name: 'Alice'}),
(:JoinOptUser {name: 'Bob'}),
(:JoinOptUser {name: 'Carol'})
$$) AS (v agtype);

SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser {name: 'Alice'}), (b:JoinOptUser {name: 'Bob'})
CREATE (a)-[:JOPT_FOLLOWS]->(b)
$$) AS (e agtype);

SELECT * FROM cypher('unified_test', $$
MATCH (b:JoinOptUser {name: 'Bob'}), (c:JoinOptUser {name: 'Carol'})
CREATE (b)-[:JOPT_FOLLOWS]->(c)
$$) AS (e agtype);

-- EXPLAIN showing join conditions use direct column access
-- Look for: graphid_to_agtype(id) instead of age_id(_agtype_build_vertex(...))
-- And: direct id comparisons instead of age_id(...)::graphid
EXPLAIN (COSTS OFF)
SELECT * FROM cypher('unified_test', $$
MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
RETURN u.name, v.name
$$) AS (u_name agtype, v_name agtype);

-- Verify the query still returns correct results
SELECT * FROM cypher('unified_test', $$
MATCH (u:JoinOptUser)-[e:JOPT_FOLLOWS]->(v:JoinOptUser)
RETURN u.name, v.name
ORDER BY u.name
$$) AS (u_name agtype, v_name agtype);

-- Multi-hop pattern showing optimization across multiple joins
EXPLAIN (COSTS OFF)
SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
RETURN a.name, b.name, c.name
$$) AS (a_name agtype, b_name agtype, c_name agtype);

-- Verify multi-hop query results
SELECT * FROM cypher('unified_test', $$
MATCH (a:JoinOptUser)-[e1:JOPT_FOLLOWS]->(b:JoinOptUser)-[e2:JOPT_FOLLOWS]->(c:JoinOptUser)
RETURN a.name, b.name, c.name
$$) AS (a_name agtype, b_name agtype, c_name agtype);

--
-- Cleanup
--
Expand Down
164 changes: 164 additions & 0 deletions src/backend/parser/cypher_clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "parser/parsetree.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteHandler.h"
#include "utils/lsyscache.h"

#include "catalog/ag_graph.h"
#include "catalog/ag_label.h"
Expand Down Expand Up @@ -136,6 +137,7 @@ static Node *make_edge_expr(cypher_parsestate *cpstate,
ParseNamespaceItem *pnsi);
static Node *make_qual(cypher_parsestate *cpstate,
transform_entity *entity, char *name);
static Node *optimize_qual_expr_mutator(Node *node, void *context);
static TargetEntry *
transform_match_create_path_variable(cypher_parsestate *cpstate,
cypher_path *path, List *entities);
Expand Down Expand Up @@ -3375,11 +3377,173 @@ static void transform_match_pattern(cypher_parsestate *cpstate, Query *query,
expr = (Expr *)coerce_to_boolean(pstate, (Node *)expr, "WHERE");
}

/*
* Apply optimization to the transformed expression tree. This looks for
* patterns like age_id(_agtype_build_vertex(...)) and replaces them with
* direct column references.
*/
if (expr != NULL)
{
expr = (Expr *)optimize_qual_expr_mutator((Node *)expr, NULL);
}

query->rtable = cpstate->pstate.p_rtable;
query->rteperminfos = cpstate->pstate.p_rteperminfos;
query->jointree = makeFromExpr(cpstate->pstate.p_joinlist, (Node *)expr);
}

/*
* optimize_qual_expr_mutator - Walk expression tree and optimize vertex/edge
* accessor patterns.
*
* This mutator looks for patterns like:
* age_id(_agtype_build_vertex(id, label, props))
* and transforms them to:
* graphid_to_agtype(id)
*
* This avoids the expensive reconstruction of vertex/edge agtype values
* just to immediately extract a single field from them. This is particularly
* important for join conditions where the vertex/edge comes from a previous
* clause.
*/
static Node *optimize_qual_expr_mutator(Node *node, void *context)
{
if (node == NULL)
{
return NULL;
}

/*
* Look for FuncExpr nodes that wrap accessor functions around
* _agtype_build_vertex or _agtype_build_edge calls.
*/
if (IsA(node, FuncExpr))
{
FuncExpr *outer_func = (FuncExpr *)node;
char *outer_func_name;
Node *arg;
FuncExpr *inner_func;
char *inner_func_name;
List *inner_args;
int arg_index = -1;

/* Must have exactly one argument */
if (list_length(outer_func->args) != 1)
{
goto recurse;
}

outer_func_name = get_func_name(outer_func->funcid);
if (outer_func_name == NULL)
{
goto recurse;
}

/* Check if this is an accessor function we can optimize */
if (strcmp(outer_func_name, "age_id") != 0 &&
strcmp(outer_func_name, "age_start_id") != 0 &&
strcmp(outer_func_name, "age_end_id") != 0 &&
strcmp(outer_func_name, "age_properties") != 0)
{
goto recurse;
}

arg = (Node *)linitial(outer_func->args);

/* The argument must be a FuncExpr (the build function) */
if (!IsA(arg, FuncExpr))
{
goto recurse;
}

inner_func = (FuncExpr *)arg;
inner_func_name = get_func_name(inner_func->funcid);
if (inner_func_name == NULL)
{
goto recurse;
}

inner_args = inner_func->args;

/*
* Check for _agtype_build_vertex(id, label_name, properties)
* Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties (agtype)
*/
if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
list_length(inner_args) == 3)
{
if (strcmp(outer_func_name, "age_id") == 0)
{
arg_index = 0; /* id */
}
else if (strcmp(outer_func_name, "age_properties") == 0)
{
arg_index = 2; /* properties */
}
}
/*
* Check for _agtype_build_edge(id, startid, endid, label_name, properties)
* Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
* 3=label_name (cstring), 4=properties (agtype)
*/
else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
list_length(inner_args) == 5)
{
if (strcmp(outer_func_name, "age_id") == 0)
{
arg_index = 0; /* id */
}
else if (strcmp(outer_func_name, "age_start_id") == 0)
{
arg_index = 1; /* start_id */
}
else if (strcmp(outer_func_name, "age_end_id") == 0)
{
arg_index = 2; /* end_id */
}
else if (strcmp(outer_func_name, "age_properties") == 0)
{
arg_index = 4; /* properties */
}
}

/* If we found a pattern to optimize */
if (arg_index >= 0)
{
Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);

/* For properties, return directly (already agtype) */
if (strcmp(outer_func_name, "age_properties") == 0)
{
return extracted_arg;
}
else
{
/*
* For graphid fields (id, start_id, end_id), we need to wrap
* in graphid_to_agtype to match the original return type.
*/
Oid cast_func_oid;
FuncExpr *cast_expr;

cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
GRAPHIDOID);

cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
list_make1(extracted_arg),
InvalidOid, InvalidOid,
COERCE_EXPLICIT_CALL);
cast_expr->location = outer_func->location;

return (Node *)cast_expr;
}
}
}

recurse:
return expression_tree_mutator(node, optimize_qual_expr_mutator, context);
}

/*
* Creates a FuncCall node that will prevent an edge from being joined
* to twice.
Expand Down
Loading