From 60d94c7ed4dfabe67ea940b0dddaf1980380a049 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Thu, 7 Aug 2025 17:03:51 -0700 Subject: [PATCH] Add additional user variables during node and edge transform This PR adds additional variables to a node and edge during the transform node/edge phase, provided that a variable was specified for the node or edge. These additional variables are added to allow direct access to the columns of the specific node or edge. These variables can be used to improve the performance of some queries. Variables added have the following suffixes to the node or edge variable name - _idc ID Column _propertiesc PROPERTIES Column _start_idc EDGE START_ID Column _end_idc EDGE END_ID Column For example - MATCH (u) RETURN u, u_idc, u_propertiesc Regression tests added. modified: regress/expected/cypher_match.out modified: regress/sql/cypher_match.sql modified: src/backend/parser/cypher_clause.c modified: src/backend/parser/cypher_item.c modified: src/backend/utils/adt/agtype.c modified: src/include/parser/cypher_parse_node.h --- regress/expected/cypher_match.out | 96 ++++++++++++++++++++++++++ regress/sql/cypher_match.sql | 14 ++++ src/backend/parser/cypher_clause.c | 86 +++++++++++++++++++++++ src/backend/parser/cypher_item.c | 58 +++++++++++++++- src/backend/utils/adt/agtype.c | 5 +- src/include/parser/cypher_parse_node.h | 12 ++++ 6 files changed, 267 insertions(+), 4 deletions(-) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index e83ba3b93..af6eec613 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -3535,9 +3535,105 @@ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x: Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"school"'::agtype]) = '{"name": "XYZ College", "program": {"major": "Psyc", "degree": "BSc"}}'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"phone"'::agtype]) = '[123456789, 987654321, 456987123]'::agtype)) (2 rows) +--- +--- tests for the additional variables added during node and edge transform +--- +SELECT FROM create_graph('special_vars'); +NOTICE: graph "special_vars" has been created +-- +(1 row) + +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 1}) RETURN u $$) AS (u agtype); + u +----------------------------------------------------------------------------- + {"id": 844424930131969, "label": "Object", "properties": {"id": 1}}::vertex +(1 row) + +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 2}) RETURN u $$) AS (u agtype); + u +----------------------------------------------------------------------------- + {"id": 844424930131970, "label": "Object", "properties": {"id": 2}}::vertex +(1 row) + +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 3}) RETURN u $$) AS (u agtype); + u +----------------------------------------------------------------------------- + {"id": 844424930131971, "label": "Object", "properties": {"id": 3}}::vertex +(1 row) + +SELECT * FROM cypher('special_vars', $$ MATCH (u) MATCH (v) CREATE(u)-[e:KNOWS {start: u_idc, end: v_idc}]->(v) RETURN e $$) AS (edge agtype); + edge +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"id": 1125899906842625, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131969, "properties": {"end": 844424930131969, "start": 844424930131969}}::edge + {"id": 1125899906842626, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {"end": 844424930131970, "start": 844424930131969}}::edge + {"id": 1125899906842627, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {"end": 844424930131971, "start": 844424930131969}}::edge + {"id": 1125899906842628, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131970, "properties": {"end": 844424930131969, "start": 844424930131970}}::edge + {"id": 1125899906842629, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131970, "properties": {"end": 844424930131970, "start": 844424930131970}}::edge + {"id": 1125899906842630, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {"end": 844424930131971, "start": 844424930131970}}::edge + {"id": 1125899906842631, "label": "KNOWS", "end_id": 844424930131969, "start_id": 844424930131971, "properties": {"end": 844424930131969, "start": 844424930131971}}::edge + {"id": 1125899906842632, "label": "KNOWS", "end_id": 844424930131970, "start_id": 844424930131971, "properties": {"end": 844424930131970, "start": 844424930131971}}::edge + {"id": 1125899906842633, "label": "KNOWS", "end_id": 844424930131971, "start_id": 844424930131971, "properties": {"end": 844424930131971, "start": 844424930131971}}::edge +(9 rows) + +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->(v) RETURN e_idc, e_start_idc, e_end_idc, e_propertiesc $$) AS (e_idc agtype, e_start_idc agtype, e_end_idc agtype, e_propertiesc agtype); + e_idc | e_start_idc | e_end_idc | e_propertiesc +------------------+-----------------+-----------------+---------------------------------------------------- + 1125899906842628 | 844424930131970 | 844424930131969 | {"end": 844424930131969, "start": 844424930131970} + 1125899906842625 | 844424930131969 | 844424930131969 | {"end": 844424930131969, "start": 844424930131969} + 1125899906842631 | 844424930131971 | 844424930131969 | {"end": 844424930131969, "start": 844424930131971} + 1125899906842629 | 844424930131970 | 844424930131970 | {"end": 844424930131970, "start": 844424930131970} + 1125899906842626 | 844424930131969 | 844424930131970 | {"end": 844424930131970, "start": 844424930131969} + 1125899906842632 | 844424930131971 | 844424930131970 | {"end": 844424930131970, "start": 844424930131971} + 1125899906842627 | 844424930131969 | 844424930131971 | {"end": 844424930131971, "start": 844424930131969} + 1125899906842633 | 844424930131971 | 844424930131971 | {"end": 844424930131971, "start": 844424930131971} + 1125899906842630 | 844424930131970 | 844424930131971 | {"end": 844424930131971, "start": 844424930131970} +(9 rows) + +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->(v) RETURN u_idc, u_propertiesc, v_idc, v_propertiesc $$) AS (u_idc agtype, u_propertiesc agtype, v_idc agtype, v_propertiesc agtype); + u_idc | u_propertiesc | v_idc | v_propertiesc +-----------------+---------------+-----------------+--------------- + 844424930131970 | {"id": 2} | 844424930131969 | {"id": 1} + 844424930131969 | {"id": 1} | 844424930131969 | {"id": 1} + 844424930131971 | {"id": 3} | 844424930131969 | {"id": 1} + 844424930131970 | {"id": 2} | 844424930131970 | {"id": 2} + 844424930131969 | {"id": 1} | 844424930131970 | {"id": 2} + 844424930131971 | {"id": 3} | 844424930131970 | {"id": 2} + 844424930131969 | {"id": 1} | 844424930131971 | {"id": 3} + 844424930131971 | {"id": 3} | 844424930131971 | {"id": 3} + 844424930131970 | {"id": 2} | 844424930131971 | {"id": 3} +(9 rows) + +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->() RETURN count(*), u_idc ORDER BY count(*) DESC $$) AS (count agtype, u_idc agtype); + count | u_idc +-------+----------------- + 3 | 844424930131969 + 3 | 844424930131970 + 3 | 844424930131971 +(3 rows) + +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->() RETURN count(*), id(u) ORDER BY count(*) DESC $$) AS (count agtype, idu agtype); + count | idu +-------+----------------- + 3 | 844424930131969 + 3 | 844424930131970 + 3 | 844424930131971 +(3 rows) + -- -- Clean up -- +SELECT drop_graph('special_vars', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table special_vars._ag_label_vertex +drop cascades to table special_vars._ag_label_edge +drop cascades to table special_vars."Object" +drop cascades to table special_vars."KNOWS" +NOTICE: graph "special_vars" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('cypher_match', true); NOTICE: drop cascades to 17 other objects DETAIL: drop cascades to table cypher_match._ag_label_vertex diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index 2817f36f6..46cf4d1e3 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1437,9 +1437,23 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); +--- +--- tests for the additional variables added during node and edge transform +--- +SELECT FROM create_graph('special_vars'); +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 1}) RETURN u $$) AS (u agtype); +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 2}) RETURN u $$) AS (u agtype); +SELECT * FROM cypher('special_vars', $$ CREATE (u:Object {id: 3}) RETURN u $$) AS (u agtype); +SELECT * FROM cypher('special_vars', $$ MATCH (u) MATCH (v) CREATE(u)-[e:KNOWS {start: u_idc, end: v_idc}]->(v) RETURN e $$) AS (edge agtype); +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->(v) RETURN e_idc, e_start_idc, e_end_idc, e_propertiesc $$) AS (e_idc agtype, e_start_idc agtype, e_end_idc agtype, e_propertiesc agtype); +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->(v) RETURN u_idc, u_propertiesc, v_idc, v_propertiesc $$) AS (u_idc agtype, u_propertiesc agtype, v_idc agtype, v_propertiesc agtype); +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->() RETURN count(*), u_idc ORDER BY count(*) DESC $$) AS (count agtype, u_idc agtype); +SELECT * FROM cypher('special_vars', $$ MATCH (u)-[e]->() RETURN count(*), id(u) ORDER BY count(*) DESC $$) AS (count agtype, idu agtype); + -- -- Clean up -- +SELECT drop_graph('special_vars', true); SELECT drop_graph('cypher_match', true); SELECT drop_graph('test_retrieve_var', true); SELECT drop_graph('test_enable_containment', true); diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 172e6305a..c2082d014 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -345,6 +345,10 @@ static bool isa_special_VLE_case(cypher_path *path); static ParseNamespaceItem *find_pnsi(cypher_parsestate *cpstate, char *varname); static bool has_list_comp_or_subquery(Node *expr, void *context); +static List **add_additional_variables(ParseState *pstate, + ParseNamespaceItem *pnsi, + List **target_list, char *name, + List *colnames, List *suffixes); /* * transform a cypher_clause */ @@ -5193,6 +5197,34 @@ static Expr *transform_cypher_edge(cypher_parsestate *cpstate, *target_list = lappend(*target_list, te); } + /* + * Add in our additional variables tied to this edge var - + * + * - edge id column var + * - edge properties column var + * - edge start_id + * - edge end_id + */ + if (rel->name != NULL) + { + List *suffixes = NULL; + List *colnames = NULL; + + suffixes = list_make4(EDGE_ID_COLUMN_SUFFIX, + EDGE_PROPERTIES_COLUMN_SUFFIX, + EDGE_START_ID_COLUMN_SUFFIX, + EDGE_END_ID_COLUMN_SUFFIX); + + colnames = list_make4(AG_EDGE_COLNAME_ID, + AG_EDGE_COLNAME_PROPERTIES, + AG_EDGE_COLNAME_START_ID, + AG_EDGE_COLNAME_END_ID); + + target_list = add_additional_variables(pstate, pnsi, target_list, + rel->name, colnames, suffixes); + } + + return (Expr *)expr; } @@ -5479,9 +5511,63 @@ static Expr *transform_cypher_node(cypher_parsestate *cpstate, te = makeTargetEntry(expr, resno, node->name, false); *target_list = lappend(*target_list, te); + /* + * Add in our additional variables tied to this node var - + * + * - vertex id column var + * - vertex properties column var + */ + if (node->name != NULL) + { + List *suffixes = NULL; + List *colnames = NULL; + + suffixes = list_make2(VERTEX_ID_COLUMN_SUFFIX, + VERTEX_PROPERTIES_COLUMN_SUFFIX); + + colnames = list_make2(AG_VERTEX_COLNAME_ID, + AG_VERTEX_COLNAME_PROPERTIES); + + target_list = add_additional_variables(pstate, pnsi, target_list, + node->name, colnames, suffixes); + } + return expr; } +/* helper function to add additional variables to a node or edge */ +static List **add_additional_variables(ParseState *pstate, + ParseNamespaceItem *pnsi, + List **target_list, char *name, + List *colnames, List *suffixes) +{ + ListCell *slc = NULL; + ListCell *clc = NULL; + + forboth (slc, suffixes, clc, colnames) + { + TargetEntry *te = NULL; + Node *column = NULL; + char *varname = NULL; + char *suffix = (char *)lfirst(slc); + char *colname = (char *)lfirst(clc); + int varnamelen = 0; + int resno = -1; + + + varnamelen = strlen(name) + strlen(suffix) +1; + varname = palloc0(varnamelen); + strcpy(varname, name); + strcat(varname, suffix); + column = scanNSItemForColumn(pstate, pnsi, 0, colname, -1); + resno = pstate->p_next_resno++; + te = makeTargetEntry((Expr*)column, resno, varname, false); + *target_list = lappend(*target_list, te); + } + + return target_list; +} + static Node *make_edge_expr(cypher_parsestate *cpstate, ParseNamespaceItem *pnsi) { diff --git a/src/backend/parser/cypher_item.c b/src/backend/parser/cypher_item.c index c2feb2720..1f8371750 100644 --- a/src/backend/parser/cypher_item.c +++ b/src/backend/parser/cypher_item.c @@ -30,6 +30,7 @@ #include "parser/cypher_expr.h" #include "parser/cypher_item.h" +#include "parser/cypher_transform_entity.h" static List *ExpandAllTables(ParseState *pstate, int location); static List *expand_pnsi_attrs(ParseState *pstate, ParseNamespaceItem *pnsi, @@ -168,6 +169,43 @@ static List *ExpandAllTables(ParseState *pstate, int location) return target; } +/* + * Function takes the name of the passed var and then checks it against all of + * the potential var suffixes. If one is found it returns the root/entity name. + */ +static char *get_parent_entity_name(char *varname) +{ + List *suffixes = NULL; + ListCell *slc = NULL; + int vlen = strlen(varname); + + /* list of current suffixes */ + suffixes = list_make5(VERTEX_ID_COLUMN_SUFFIX, + VERTEX_PROPERTIES_COLUMN_SUFFIX, + EDGE_ID_COLUMN_SUFFIX, + EDGE_PROPERTIES_COLUMN_SUFFIX, + EDGE_START_ID_COLUMN_SUFFIX); + suffixes = lappend(suffixes, EDGE_END_ID_COLUMN_SUFFIX); + + /* + * Check each suffix against the input varname. If a match is found, + * return the potential entity name. + */ + foreach (slc, suffixes) + { + char *suffix = (char*) lfirst(slc); + int slen = strlen(suffix); + + if (strncmp(&varname[vlen-slen], suffix, slen) == 0) + { + return strndup(varname, vlen-slen); + } + } + + /* nothing found, return null */ + return NULL; +} + /* * From PG's expandNSItemAttrs * Modified to exclude hidden variables and aliases in RETURN * @@ -176,6 +214,7 @@ static List *expand_pnsi_attrs(ParseState *pstate, ParseNamespaceItem *pnsi, int sublevels_up, bool require_col_privs, int location) { + cypher_parsestate *cpstate = (cypher_parsestate*)pstate; RangeTblEntry *rte = pnsi->p_rte; RTEPermissionInfo *perminfo = pnsi->p_perminfo; List *names, *vars; @@ -187,7 +226,7 @@ static List *expand_pnsi_attrs(ParseState *pstate, ParseNamespaceItem *pnsi, vars = expandNSItemVars(pstate, pnsi, sublevels_up, location, &names); /* - * Require read access to the table. This is normally redundant with the + * Require read access to the table. This is normally redundant with the * markVarForSelectPriv calls below, but not if the table has zero * columns. */ @@ -203,14 +242,29 @@ static List *expand_pnsi_attrs(ParseState *pstate, ParseNamespaceItem *pnsi, char *label = strVal(lfirst(name)); Var *varnode = (Var *)lfirst(var); TargetEntry *te; + char *entity = NULL; /* we want to skip our "hidden" variables */ if (strncmp(AGE_DEFAULT_VARNAME_PREFIX, label, var_prefix_len) == 0) + { continue; + } - /* we want to skip out "hidden" aliases */ + /* we want to skip our "hidden" aliases */ if (strncmp(AGE_DEFAULT_ALIAS_PREFIX, label, alias_prefix_len) == 0) + { + continue; + } + + /* + * If we find an potential entity based off of this var and it exists, + * we need to skip it. + */ + entity = get_parent_entity_name(label); + if (entity != NULL && find_variable(cpstate, entity) != NULL) + { continue; + } /* add this variable to the list */ te = makeTargetEntry((Expr *)varnode, diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index d26929d33..e3fa9626c 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -12058,11 +12058,12 @@ Datum agtype_volatile_wrapper(PG_FUNCTION_ARGS) agtv_result.type = AGTV_BOOL; agtv_result.val.boolean = DatumGetBool(arg); } - else if (type == INT2OID || type == INT4OID || type == INT8OID) + else if (type == INT2OID || type == INT4OID || type == INT8OID || + type == GRAPHIDOID) { agtv_result.type = AGTV_INTEGER; - if (type == INT8OID) + if (type == INT8OID || GRAPHIDOID) { agtv_result.val.int_value = DatumGetInt64(arg); } diff --git a/src/include/parser/cypher_parse_node.h b/src/include/parser/cypher_parse_node.h index 263ea197b..7cc4388bf 100644 --- a/src/include/parser/cypher_parse_node.h +++ b/src/include/parser/cypher_parse_node.h @@ -31,6 +31,18 @@ #define AGE_DEFAULT_ALIAS_PREFIX AGE_DEFAULT_PREFIX"alias_" #define AGE_DEFAULT_VARNAME_PREFIX AGE_DEFAULT_PREFIX"varname_" +/* + * Every vertex or edge creation, that is labeled, will additionally add + * variables to point directly to their respective columns. Below are the + * suffixes used. + */ +#define VERTEX_ID_COLUMN_SUFFIX "_idc" +#define VERTEX_PROPERTIES_COLUMN_SUFFIX "_propertiesc" +#define EDGE_ID_COLUMN_SUFFIX "_idc" +#define EDGE_START_ID_COLUMN_SUFFIX "_start_idc" +#define EDGE_END_ID_COLUMN_SUFFIX "_end_idc" +#define EDGE_PROPERTIES_COLUMN_SUFFIX "_propertiesc" + typedef struct cypher_parsestate { ParseState pstate;