From 87dffcf14ae6785b260217ce3da2a8c61e77664c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 14 Nov 2025 22:20:25 -0500 Subject: [PATCH] Reverse sync from upstream --- config.yml | 2 + src/prism.c | 74 +++++++++++++++-------- templates/src/diagnostic.c.erb | 2 + test/prism/result/source_location_test.rb | 2 +- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/config.yml b/config.yml index 4a160aa9cd..b5d0e9d0b9 100644 --- a/config.yml +++ b/config.yml @@ -217,8 +217,10 @@ errors: - PARAMETER_UNEXPECTED_FWD - PARAMETER_UNEXPECTED_NO_KW - PARAMETER_WILD_LOOSE_COMMA + - PATTERN_ALTERNATIVE_AFTER_CAPTURE - PATTERN_ARRAY_MULTIPLE_RESTS - PATTERN_CAPTURE_DUPLICATE + - PATTERN_CAPTURE_IN_ALTERNATIVE - PATTERN_EXPRESSION_AFTER_BRACKET - PATTERN_EXPRESSION_AFTER_COMMA - PATTERN_EXPRESSION_AFTER_HROCKET diff --git a/src/prism.c b/src/prism.c index ca94819c76..59d602dadb 100644 --- a/src/prism.c +++ b/src/prism.c @@ -16947,6 +16947,16 @@ parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint1 #define PM_PARSE_PATTERN_TOP 1 #define PM_PARSE_PATTERN_MULTI 2 +/** Information used to track the state of captures in patterns. */ +typedef struct { + /** Whether we're currently parsing an alternative pattern. This is used to + * disallow captures in alternative patterns. */ + bool in_alternative_pattern; + /** Whether we've seen a capture in this pattern. This is used to disallow + * captures in alternative patterns. */ + bool capture_in_pattern; +} pm_pattern_capturing_t; + static pm_node_t * parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth); @@ -16956,13 +16966,16 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag * an error to the parser. */ static void -parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) { +parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location, pm_pattern_capturing_t *capturing) { // Skip this capture if it starts with an underscore. if (*location->start == '_') return; if (pm_constant_id_list_includes(captures, capture)) { pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE); + } else if (capturing->in_alternative_pattern && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5) { + pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE); } else { + capturing->capture_in_pattern = true; pm_constant_id_list_append(captures, capture); } } @@ -17091,7 +17104,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures * Parse a rest pattern. */ static pm_splat_node_t * -parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { +parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_pattern_capturing_t *capturing) { assert(parser->previous.type == PM_TOKEN_USTAR); pm_token_t operator = parser->previous; pm_node_t *name = NULL; @@ -17108,7 +17121,7 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier)); + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier), capturing); name = (pm_node_t *) pm_local_variable_target_node_create( parser, &PM_LOCATION_TOKEN_VALUE(&identifier), @@ -17125,7 +17138,7 @@ parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { * Parse a keyword rest node. */ static pm_node_t * -parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) { +parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_pattern_capturing_t *capturing) { assert(parser->current.type == PM_TOKEN_USTAR_STAR); parser_lex(parser); @@ -17144,7 +17157,7 @@ parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous), capturing); value = (pm_node_t *) pm_local_variable_target_node_create( parser, &PM_LOCATION_TOKEN_VALUE(&parser->previous), @@ -17188,7 +17201,7 @@ pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const u * value. This will use an implicit local variable target. */ static pm_node_t * -parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) { +parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key, pm_pattern_capturing_t *capturing) { const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc; pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end); @@ -17208,7 +17221,7 @@ parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *ca pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0); } - parse_pattern_capture(parser, captures, constant_id, value_loc); + parse_pattern_capture(parser, captures, constant_id, value_loc, capturing); pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( parser, value_loc, @@ -17234,7 +17247,7 @@ parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_ * Parse a hash pattern. */ static pm_hash_pattern_node_t * -parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) { +parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_pattern_capturing_t *capturing, uint16_t depth) { pm_node_list_t assocs = { 0 }; pm_static_literals_t keys = { 0 }; pm_node_t *rest = NULL; @@ -17252,7 +17265,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) { // Otherwise, we will create an implicit local variable // target for the value. - value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node); + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node, capturing); } else { // Here we have a value for the first assoc in the list, so // we will parse it now. @@ -17296,7 +17309,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node } if (match1(parser, PM_TOKEN_USTAR_STAR)) { - pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures); + pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures, capturing); if (rest == NULL) { rest = assoc; @@ -17324,7 +17337,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node pm_node_t *value = NULL; if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key); + value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key, capturing); } else { value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1)); } @@ -17351,7 +17364,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node * Parse a pattern expression primitive. */ static pm_node_t * -parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) { +parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_pattern_capturing_t *capturing, pm_diagnostic_id_t diag_id, uint16_t depth) { switch (parser->current.type) { case PM_TOKEN_IDENTIFIER: case PM_TOKEN_METHOD_NAME: { @@ -17363,7 +17376,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous), capturing); return (pm_node_t *) pm_local_variable_target_node_create( parser, &PM_LOCATION_TOKEN_VALUE(&parser->previous), @@ -17447,7 +17460,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); break; case PM_TOKEN_USTAR_STAR: - first_node = parse_pattern_keyword_rest(parser, captures); + first_node = parse_pattern_keyword_rest(parser, captures, capturing); break; case PM_TOKEN_STRING_BEGIN: first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1)); @@ -17461,7 +17474,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm } } - node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1)); + node = parse_pattern_hash(parser, captures, first_node, capturing, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE); @@ -17629,10 +17642,18 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm static pm_node_t * parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) { pm_node_t *node = first_node; + pm_pattern_capturing_t capturing = { false, false }; while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) { pm_token_t operator = parser->previous; + if (node) { + if (capturing.capture_in_pattern) { + pm_parser_err(parser, operator.start, operator.end, PM_ERR_PATTERN_ALTERNATIVE_AFTER_CAPTURE); + } + capturing.in_alternative_pattern = true; + } + switch (parser->current.type) { case PM_TOKEN_IDENTIFIER: case PM_TOKEN_BRACKET_LEFT_ARRAY: @@ -17644,9 +17665,9 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p case PM_TOKEN_UDOT_DOT_DOT: case PM_CASE_PRIMITIVE: { if (node == NULL) { - node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); + node = parse_pattern_primitive(parser, captures, &capturing, diag_id, (uint16_t) (depth + 1)); } else { - pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1)); + pm_node_t *right = parse_pattern_primitive(parser, captures, &capturing, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1)); node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator); } @@ -17698,7 +17719,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0); } - parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous)); + parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous), &capturing); pm_local_variable_target_node_t *target = pm_local_variable_target_node_create( parser, &PM_LOCATION_TOKEN_VALUE(&parser->previous), @@ -17721,12 +17742,13 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag bool leading_rest = false; bool trailing_rest = false; + pm_pattern_capturing_t capturing = { false, false }; switch (parser->current.type) { case PM_TOKEN_LABEL: { parser_lex(parser); pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous); - node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)); + node = (pm_node_t *) parse_pattern_hash(parser, captures, key, &capturing, (uint16_t) (depth + 1)); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17735,8 +17757,8 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag return node; } case PM_TOKEN_USTAR_STAR: { - node = parse_pattern_keyword_rest(parser, captures); - node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + node = parse_pattern_keyword_rest(parser, captures, &capturing); + node = (pm_node_t *) parse_pattern_hash(parser, captures, node, &capturing, (uint16_t) (depth + 1)); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17747,10 +17769,10 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag case PM_TOKEN_STRING_BEGIN: { // We need special handling for string beginnings because they could // be dynamic symbols leading to hash patterns. - node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1)); + node = parse_pattern_primitive(parser, captures, &capturing, diag_id, (uint16_t) (depth + 1)); if (pm_symbol_node_label_p(node)) { - node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + node = (pm_node_t *) parse_pattern_hash(parser, captures, node, &capturing, (uint16_t) (depth + 1)); if (!(flags & PM_PARSE_PATTERN_TOP)) { pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT); @@ -17765,7 +17787,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag case PM_TOKEN_USTAR: { if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) { parser_lex(parser); - node = (pm_node_t *) parse_pattern_rest(parser, captures); + node = (pm_node_t *) parse_pattern_rest(parser, captures, &capturing); leading_rest = true; break; } @@ -17779,7 +17801,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // If we got a dynamic label symbol, then we need to treat it like the // beginning of a hash pattern. if (pm_symbol_node_label_p(node)) { - return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)); + return (pm_node_t *) parse_pattern_hash(parser, captures, node, &capturing, (uint16_t) (depth + 1)); } if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) { @@ -17800,7 +17822,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag } if (accept1(parser, PM_TOKEN_USTAR)) { - node = (pm_node_t *) parse_pattern_rest(parser, captures); + node = (pm_node_t *) parse_pattern_rest(parser, captures, &capturing); // If we have already parsed a splat pattern, then this is an // error. We will continue to parse the rest of the patterns, diff --git a/templates/src/diagnostic.c.erb b/templates/src/diagnostic.c.erb index 7c9e2e8886..87e1f341f6 100644 --- a/templates/src/diagnostic.c.erb +++ b/templates/src/diagnostic.c.erb @@ -299,8 +299,10 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_PARAMETER_UNEXPECTED_FWD] = { "unexpected `...` in parameters", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PARAMETER_WILD_LOOSE_COMMA] = { "unexpected `,` in parameters", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PARAMETER_UNEXPECTED_NO_KW] = { "unexpected **nil; no keywords marker disallowed after keywords", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_PATTERN_ALTERNATIVE_AFTER_CAPTURE] = { "alternative pattern after variable capture", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS] = { "unexpected multiple '*' rest patterns in an array pattern", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_CAPTURE_DUPLICATE] = { "duplicated variable name", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE] = { "variable capture in alternative pattern", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET] = { "expected a pattern expression after the `[` operator", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA] = { "expected a pattern expression after `,`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET] = { "expected a pattern expression after `=>`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/test/prism/result/source_location_test.rb b/test/prism/result/source_location_test.rb index 7bdc707658..38b971d02b 100644 --- a/test/prism/result/source_location_test.rb +++ b/test/prism/result/source_location_test.rb @@ -13,7 +13,7 @@ def test_AliasMethodNode end def test_AlternationPatternNode - assert_location(AlternationPatternNode, "foo => bar | baz", 7...16, &:pattern) + assert_location(AlternationPatternNode, "foo => 0 | 1", 7...12, &:pattern) end def test_AndNode