From b1314f159ec501f4fdc1c055293799b33c9ad134 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 29 Nov 2025 15:36:53 -0500 Subject: [PATCH 1/5] [ruby/prism] Fully destroy call operator write arguments If we are about to delete a call operator write argument, it needs to be removed from the list of block exits as well. https://github.com/ruby/prism/commit/ebc91c2e39 --- prism/prism.c | 38 +++++++++++++++++++ .../destroy_call_operator_write_arguments.txt | 11 ++++++ 2 files changed, 49 insertions(+) create mode 100644 test/prism/errors/destroy_call_operator_write_arguments.txt diff --git a/prism/prism.c b/prism/prism.c index 3d60e008c48a0a..60d45cbcd23ae6 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -21062,6 +21062,42 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding return value; } +static bool +parse_call_operator_write_block_exits_each(const pm_node_t *node, void *data) { + pm_parser_t *parser = (pm_parser_t *) data; + size_t index = 0; + + while (index < parser->current_block_exits->size) { + pm_node_t *block_exit = parser->current_block_exits->nodes[index]; + + if (block_exit == node) { + if (index + 1 < parser->current_block_exits->size) { + memmove( + &parser->current_block_exits->nodes[index], + &parser->current_block_exits->nodes[index + 1], + (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *) + ); + } + parser->current_block_exits->size--; + return false; + } + + index++; + } + + return true; +} + +/** + * When we are about to destroy a set of nodes that could potentially contain + * block exits for the current scope, we need to check if they are contained in + * the list of block exits and remove them if they are. + */ +static void +parse_call_operator_write_block_exits(pm_parser_t *parser, const pm_node_t *node) { + pm_visit_node(node, parse_call_operator_write_block_exits_each, parser); +} + /** * Ensure a call node that is about to become a call operator node does not * have arguments or a block attached. If it does, then we'll need to add an @@ -21073,12 +21109,14 @@ static void parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) { if (call_node->arguments != NULL) { pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS); + parse_call_operator_write_block_exits(parser, (pm_node_t *) call_node->arguments); pm_node_destroy(parser, (pm_node_t *) call_node->arguments); call_node->arguments = NULL; } if (call_node->block != NULL) { pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK); + parse_call_operator_write_block_exits(parser, (pm_node_t *) call_node->block); pm_node_destroy(parser, (pm_node_t *) call_node->block); call_node->block = NULL; } diff --git a/test/prism/errors/destroy_call_operator_write_arguments.txt b/test/prism/errors/destroy_call_operator_write_arguments.txt new file mode 100644 index 00000000000000..c3c72f92260d02 --- /dev/null +++ b/test/prism/errors/destroy_call_operator_write_arguments.txt @@ -0,0 +1,11 @@ +t next&&do end&= + ^~ unexpected 'do'; expected an expression after the operator + ^~~~ unexpected void value expression + ^~~~ unexpected void value expression +^~~~~~~~~~~~~~ unexpected write target + ^~ unexpected operator after a call with arguments + ^~ unexpected operator after a call with a block +''while= + ^~~~~ expected a predicate expression for the `while` statement + ^ unexpected '='; target cannot be written + From c4b5fa419ee266c973be9b676caa2cfad0400d58 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 29 Nov 2025 15:25:35 -0500 Subject: [PATCH 2/5] [ruby/prism] Ensure implicit parameter nodes are destroyed. When we are about to destroy a node because of a syntax error, we need to check if it is potentially containing an implicit parameter in its subtree. https://github.com/ruby/prism/commit/1531433e02 --- prism/prism.c | 65 +++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index 60d45cbcd23ae6..8c886bb050b06a 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -13454,28 +13454,43 @@ parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) { return (pm_node_t *) result; } +static bool +parse_target_implicit_parameter_each(const pm_node_t *node, void *data) { + switch (PM_NODE_TYPE(node)) { + case PM_LOCAL_VARIABLE_READ_NODE: + case PM_IT_LOCAL_VARIABLE_READ_NODE: { + pm_parser_t *parser = (pm_parser_t *) data; + pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters; + + for (size_t index = 0; index < implicit_parameters->size; index++) { + if (implicit_parameters->nodes[index] == node) { + // If the node is not the last one in the list, we need to + // shift the remaining nodes down to fill the gap. This is + // extremely unlikely to happen. + if (index != implicit_parameters->size - 1) { + memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *)); + } + + implicit_parameters->size--; + break; + } + } + + return false; + } + default: + return true; + } +} + /** * When an implicit local variable is written to or targeted, it becomes a * regular, named local variable. This function removes it from the list of * implicit parameters when that happens. */ static void -parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) { - pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters; - - for (size_t index = 0; index < implicit_parameters->size; index++) { - if (implicit_parameters->nodes[index] == node) { - // If the node is not the last one in the list, we need to shift the - // remaining nodes down to fill the gap. This is extremely unlikely - // to happen. - if (index != implicit_parameters->size - 1) { - memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *)); - } - - implicit_parameters->size--; - break; - } - } +parse_target_implicit_parameter(pm_parser_t *parser, const pm_node_t *node) { + pm_visit_node(node, parse_target_implicit_parameter_each, parser); } /** @@ -13851,18 +13866,12 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod // syntax error. In this case we'll fall through to our default // handling. We need to free the value that we parsed because there // is no way for us to attach it to the tree at this point. - switch (PM_NODE_TYPE(value)) { - case PM_LOCAL_VARIABLE_READ_NODE: - case PM_IT_LOCAL_VARIABLE_READ_NODE: - // Since it is possible for the value to be an implicit - // parameter, we need to remove it from the list of implicit - // parameters. - parse_target_implicit_parameter(parser, value); - break; - default: - break; - } - + // + // Since it is possible for the value to contain an implicit + // parameter somewhere in its subtree, we need to walk it and remove + // any implicit parameters from the list of implicit parameters for + // the current scope. + parse_target_implicit_parameter(parser, value); pm_node_destroy(parser, value); } PRISM_FALLTHROUGH From 3bd9583a52e9c3f55adb2097a84c4adb09cf74a3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 29 Nov 2025 15:58:10 -0500 Subject: [PATCH 3/5] [ruby/prism] Update unicode tables to match that of CRuby The unicode version has been updated upstream, which means new codepoints mapped to alpha/alnum/isupper flags. We need to update our tables to match. I'm purposefully not adding a version check here, since that is such a large amount of code. It's possible that we could include different tables depending on a macro (like UNICODE_VERSION) or something to that effect, but it's such a minimal impact on the running of the actual parser that I don't think it's necessary. https://github.com/ruby/prism/commit/78925fe5b6 --- prism/encoding.c | 155 +++++++++++++++++++------- test/prism/encoding/encodings_test.rb | 18 +-- 2 files changed, 118 insertions(+), 55 deletions(-) diff --git a/prism/encoding.c b/prism/encoding.c index a4aeed104f89b9..424f149b8f833f 100644 --- a/prism/encoding.c +++ b/prism/encoding.c @@ -2,7 +2,7 @@ typedef uint32_t pm_unicode_codepoint_t; -#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450 +#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1508 static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = { 0x100, 0x2C1, 0x2C6, 0x2D1, @@ -10,7 +10,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x2EC, 0x2EC, 0x2EE, 0x2EE, 0x345, 0x345, - 0x370, 0x374, + 0x363, 0x374, 0x376, 0x377, 0x37A, 0x37D, 0x37F, 0x37F, @@ -50,7 +50,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x840, 0x858, 0x860, 0x86A, 0x870, 0x887, - 0x889, 0x88E, + 0x889, 0x88F, + 0x897, 0x897, 0x8A0, 0x8C9, 0x8D4, 0x8DF, 0x8E3, 0x8E9, @@ -140,7 +141,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0xC4A, 0xC4C, 0xC55, 0xC56, 0xC58, 0xC5A, - 0xC5D, 0xC5D, + 0xC5C, 0xC5D, 0xC60, 0xC63, 0xC80, 0xC83, 0xC85, 0xC8C, @@ -152,7 +153,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0xCC6, 0xCC8, 0xCCA, 0xCCC, 0xCD5, 0xCD6, - 0xCDD, 0xCDE, + 0xCDC, 0xCDE, 0xCE0, 0xCE3, 0xCF1, 0xCF3, 0xD00, 0xD0C, @@ -264,7 +265,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x1C00, 0x1C36, 0x1C4D, 0x1C4F, 0x1C5A, 0x1C7D, - 0x1C80, 0x1C88, + 0x1C80, 0x1C8A, 0x1C90, 0x1CBA, 0x1CBD, 0x1CBF, 0x1CE9, 0x1CEC, @@ -272,7 +273,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x1CF5, 0x1CF6, 0x1CFA, 0x1CFA, 0x1D00, 0x1DBF, - 0x1DE7, 0x1DF4, + 0x1DD3, 0x1DF4, 0x1E00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, @@ -352,11 +353,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0xA67F, 0xA6EF, 0xA717, 0xA71F, 0xA722, 0xA788, - 0xA78B, 0xA7CA, - 0xA7D0, 0xA7D1, - 0xA7D3, 0xA7D3, - 0xA7D5, 0xA7D9, - 0xA7F2, 0xA805, + 0xA78B, 0xA7DC, + 0xA7F1, 0xA805, 0xA807, 0xA827, 0xA840, 0xA873, 0xA880, 0xA8C3, @@ -446,6 +444,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x105A3, 0x105B1, 0x105B3, 0x105B9, 0x105BB, 0x105BC, + 0x105C0, 0x105F3, 0x10600, 0x10736, 0x10740, 0x10755, 0x10760, 0x10767, @@ -464,6 +463,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x108F4, 0x108F5, 0x10900, 0x10915, 0x10920, 0x10939, + 0x10940, 0x10959, 0x10980, 0x109B7, 0x109BE, 0x109BF, 0x10A00, 0x10A03, @@ -483,9 +483,14 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x10C80, 0x10CB2, 0x10CC0, 0x10CF2, 0x10D00, 0x10D27, + 0x10D4A, 0x10D65, + 0x10D69, 0x10D69, + 0x10D6F, 0x10D85, 0x10E80, 0x10EA9, 0x10EAB, 0x10EAC, 0x10EB0, 0x10EB1, + 0x10EC2, 0x10EC7, + 0x10EFA, 0x10EFC, 0x10F00, 0x10F1C, 0x10F27, 0x10F27, 0x10F30, 0x10F45, @@ -529,6 +534,17 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x11350, 0x11350, 0x11357, 0x11357, 0x1135D, 0x11363, + 0x11380, 0x11389, + 0x1138B, 0x1138B, + 0x1138E, 0x1138E, + 0x11390, 0x113B5, + 0x113B7, 0x113C0, + 0x113C2, 0x113C2, + 0x113C5, 0x113C5, + 0x113C7, 0x113CA, + 0x113CC, 0x113CD, + 0x113D1, 0x113D1, + 0x113D3, 0x113D3, 0x11400, 0x11441, 0x11443, 0x11445, 0x11447, 0x1144A, @@ -567,6 +583,8 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x11A50, 0x11A97, 0x11A9D, 0x11A9D, 0x11AB0, 0x11AF8, + 0x11B60, 0x11B67, + 0x11BC0, 0x11BE0, 0x11C00, 0x11C08, 0x11C0A, 0x11C36, 0x11C38, 0x11C3E, @@ -588,6 +606,7 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x11D90, 0x11D91, 0x11D93, 0x11D96, 0x11D98, 0x11D98, + 0x11DB0, 0x11DDB, 0x11EE0, 0x11EF6, 0x11F00, 0x11F10, 0x11F12, 0x11F3A, @@ -599,7 +618,9 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x12F90, 0x12FF0, 0x13000, 0x1342F, 0x13441, 0x13446, + 0x13460, 0x143FA, 0x14400, 0x14646, + 0x16100, 0x1612E, 0x16800, 0x16A38, 0x16A40, 0x16A5E, 0x16A70, 0x16ABE, @@ -608,16 +629,19 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x16B40, 0x16B43, 0x16B63, 0x16B77, 0x16B7D, 0x16B8F, + 0x16D40, 0x16D6C, 0x16E40, 0x16E7F, + 0x16EA0, 0x16EB8, + 0x16EBB, 0x16ED3, 0x16F00, 0x16F4A, 0x16F4F, 0x16F87, 0x16F8F, 0x16F9F, 0x16FE0, 0x16FE1, 0x16FE3, 0x16FE3, - 0x16FF0, 0x16FF1, - 0x17000, 0x187F7, - 0x18800, 0x18CD5, - 0x18D00, 0x18D08, + 0x16FF0, 0x16FF6, + 0x17000, 0x18CD5, + 0x18CFF, 0x18D1E, + 0x18D80, 0x18DF2, 0x1AFF0, 0x1AFF3, 0x1AFF5, 0x1AFFB, 0x1AFFD, 0x1AFFE, @@ -677,6 +701,11 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x1E290, 0x1E2AD, 0x1E2C0, 0x1E2EB, 0x1E4D0, 0x1E4EB, + 0x1E5D0, 0x1E5ED, + 0x1E5F0, 0x1E5F0, + 0x1E6C0, 0x1E6DE, + 0x1E6E0, 0x1E6F5, + 0x1E6FE, 0x1E6FF, 0x1E7E0, 0x1E7E6, 0x1E7E8, 0x1E7EB, 0x1E7ED, 0x1E7EE, @@ -722,16 +751,16 @@ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP 0x1F150, 0x1F169, 0x1F170, 0x1F189, 0x20000, 0x2A6DF, - 0x2A700, 0x2B739, - 0x2B740, 0x2B81D, - 0x2B820, 0x2CEA1, + 0x2A700, 0x2B81D, + 0x2B820, 0x2CEAD, 0x2CEB0, 0x2EBE0, + 0x2EBF0, 0x2EE5D, 0x2F800, 0x2FA1D, 0x30000, 0x3134A, - 0x31350, 0x323AF, + 0x31350, 0x33479, }; -#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528 +#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1598 static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = { 0x100, 0x2C1, 0x2C6, 0x2D1, @@ -739,7 +768,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x2EC, 0x2EC, 0x2EE, 0x2EE, 0x345, 0x345, - 0x370, 0x374, + 0x363, 0x374, 0x376, 0x377, 0x37A, 0x37D, 0x37F, 0x37F, @@ -778,7 +807,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x840, 0x858, 0x860, 0x86A, 0x870, 0x887, - 0x889, 0x88E, + 0x889, 0x88F, + 0x897, 0x897, 0x8A0, 0x8C9, 0x8D4, 0x8DF, 0x8E3, 0x8E9, @@ -872,7 +902,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0xC4A, 0xC4C, 0xC55, 0xC56, 0xC58, 0xC5A, - 0xC5D, 0xC5D, + 0xC5C, 0xC5D, 0xC60, 0xC63, 0xC66, 0xC6F, 0xC80, 0xC83, @@ -885,7 +915,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0xCC6, 0xCC8, 0xCCA, 0xCCC, 0xCD5, 0xCD6, - 0xCDD, 0xCDE, + 0xCDC, 0xCDE, 0xCE0, 0xCE3, 0xCE6, 0xCEF, 0xCF1, 0xCF3, @@ -1007,7 +1037,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x1C00, 0x1C36, 0x1C40, 0x1C49, 0x1C4D, 0x1C7D, - 0x1C80, 0x1C88, + 0x1C80, 0x1C8A, 0x1C90, 0x1CBA, 0x1CBD, 0x1CBF, 0x1CE9, 0x1CEC, @@ -1015,7 +1045,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x1CF5, 0x1CF6, 0x1CFA, 0x1CFA, 0x1D00, 0x1DBF, - 0x1DE7, 0x1DF4, + 0x1DD3, 0x1DF4, 0x1E00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, @@ -1094,11 +1124,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0xA67F, 0xA6EF, 0xA717, 0xA71F, 0xA722, 0xA788, - 0xA78B, 0xA7CA, - 0xA7D0, 0xA7D1, - 0xA7D3, 0xA7D3, - 0xA7D5, 0xA7D9, - 0xA7F2, 0xA805, + 0xA78B, 0xA7DC, + 0xA7F1, 0xA805, 0xA807, 0xA827, 0xA840, 0xA873, 0xA880, 0xA8C3, @@ -1191,6 +1218,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x105A3, 0x105B1, 0x105B3, 0x105B9, 0x105BB, 0x105BC, + 0x105C0, 0x105F3, 0x10600, 0x10736, 0x10740, 0x10755, 0x10760, 0x10767, @@ -1209,6 +1237,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x108F4, 0x108F5, 0x10900, 0x10915, 0x10920, 0x10939, + 0x10940, 0x10959, 0x10980, 0x109B7, 0x109BE, 0x109BF, 0x10A00, 0x10A03, @@ -1229,9 +1258,14 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x10CC0, 0x10CF2, 0x10D00, 0x10D27, 0x10D30, 0x10D39, + 0x10D40, 0x10D65, + 0x10D69, 0x10D69, + 0x10D6F, 0x10D85, 0x10E80, 0x10EA9, 0x10EAB, 0x10EAC, 0x10EB0, 0x10EB1, + 0x10EC2, 0x10EC7, + 0x10EFA, 0x10EFC, 0x10F00, 0x10F1C, 0x10F27, 0x10F27, 0x10F30, 0x10F45, @@ -1278,6 +1312,17 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x11350, 0x11350, 0x11357, 0x11357, 0x1135D, 0x11363, + 0x11380, 0x11389, + 0x1138B, 0x1138B, + 0x1138E, 0x1138E, + 0x11390, 0x113B5, + 0x113B7, 0x113C0, + 0x113C2, 0x113C2, + 0x113C5, 0x113C5, + 0x113C7, 0x113CA, + 0x113CC, 0x113CD, + 0x113D1, 0x113D1, + 0x113D3, 0x113D3, 0x11400, 0x11441, 0x11443, 0x11445, 0x11447, 0x1144A, @@ -1297,6 +1342,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x11680, 0x116B5, 0x116B8, 0x116B8, 0x116C0, 0x116C9, + 0x116D0, 0x116E3, 0x11700, 0x1171A, 0x1171D, 0x1172A, 0x11730, 0x11739, @@ -1322,6 +1368,9 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x11A50, 0x11A97, 0x11A9D, 0x11A9D, 0x11AB0, 0x11AF8, + 0x11B60, 0x11B67, + 0x11BC0, 0x11BE0, + 0x11BF0, 0x11BF9, 0x11C00, 0x11C08, 0x11C0A, 0x11C36, 0x11C38, 0x11C3E, @@ -1346,6 +1395,8 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x11D93, 0x11D96, 0x11D98, 0x11D98, 0x11DA0, 0x11DA9, + 0x11DB0, 0x11DDB, + 0x11DE0, 0x11DE9, 0x11EE0, 0x11EF6, 0x11F00, 0x11F10, 0x11F12, 0x11F3A, @@ -1358,7 +1409,10 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x12F90, 0x12FF0, 0x13000, 0x1342F, 0x13441, 0x13446, + 0x13460, 0x143FA, 0x14400, 0x14646, + 0x16100, 0x1612E, + 0x16130, 0x16139, 0x16800, 0x16A38, 0x16A40, 0x16A5E, 0x16A60, 0x16A69, @@ -1370,16 +1424,20 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x16B50, 0x16B59, 0x16B63, 0x16B77, 0x16B7D, 0x16B8F, + 0x16D40, 0x16D6C, + 0x16D70, 0x16D79, 0x16E40, 0x16E7F, + 0x16EA0, 0x16EB8, + 0x16EBB, 0x16ED3, 0x16F00, 0x16F4A, 0x16F4F, 0x16F87, 0x16F8F, 0x16F9F, 0x16FE0, 0x16FE1, 0x16FE3, 0x16FE3, - 0x16FF0, 0x16FF1, - 0x17000, 0x187F7, - 0x18800, 0x18CD5, - 0x18D00, 0x18D08, + 0x16FF0, 0x16FF6, + 0x17000, 0x18CD5, + 0x18CFF, 0x18D1E, + 0x18D80, 0x18DF2, 0x1AFF0, 0x1AFF3, 0x1AFF5, 0x1AFFB, 0x1AFFD, 0x1AFFE, @@ -1394,6 +1452,7 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x1BC80, 0x1BC88, 0x1BC90, 0x1BC99, 0x1BC9E, 0x1BC9E, + 0x1CCF0, 0x1CCF9, 0x1D400, 0x1D454, 0x1D456, 0x1D49C, 0x1D49E, 0x1D49F, @@ -1443,6 +1502,11 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x1E2F0, 0x1E2F9, 0x1E4D0, 0x1E4EB, 0x1E4F0, 0x1E4F9, + 0x1E5D0, 0x1E5ED, + 0x1E5F0, 0x1E5FA, + 0x1E6C0, 0x1E6DE, + 0x1E6E0, 0x1E6F5, + 0x1E6FE, 0x1E6FF, 0x1E7E0, 0x1E7E6, 0x1E7E8, 0x1E7EB, 0x1E7ED, 0x1E7EE, @@ -1490,16 +1554,16 @@ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP 0x1F170, 0x1F189, 0x1FBF0, 0x1FBF9, 0x20000, 0x2A6DF, - 0x2A700, 0x2B739, - 0x2B740, 0x2B81D, - 0x2B820, 0x2CEA1, + 0x2A700, 0x2B81D, + 0x2B820, 0x2CEAD, 0x2CEB0, 0x2EBE0, + 0x2EBF0, 0x2EE5D, 0x2F800, 0x2FA1D, 0x30000, 0x3134A, - 0x31350, 0x323AF, + 0x31350, 0x33479, }; -#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302 +#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1320 static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = { 0x100, 0x100, 0x102, 0x102, @@ -1774,6 +1838,7 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C 0x10C7, 0x10C7, 0x10CD, 0x10CD, 0x13A0, 0x13F5, + 0x1C89, 0x1C89, 0x1C90, 0x1CBA, 0x1CBD, 0x1CBF, 0x1E00, 0x1E00, @@ -2103,9 +2168,15 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C 0xA7C2, 0xA7C2, 0xA7C4, 0xA7C7, 0xA7C9, 0xA7C9, + 0xA7CB, 0xA7CC, + 0xA7CE, 0xA7CE, 0xA7D0, 0xA7D0, + 0xA7D2, 0xA7D2, + 0xA7D4, 0xA7D4, 0xA7D6, 0xA7D6, 0xA7D8, 0xA7D8, + 0xA7DA, 0xA7DA, + 0xA7DC, 0xA7DC, 0xA7F5, 0xA7F5, 0xFF21, 0xFF3A, 0x10400, 0x10427, @@ -2115,8 +2186,10 @@ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C 0x1058C, 0x10592, 0x10594, 0x10595, 0x10C80, 0x10CB2, + 0x10D50, 0x10D65, 0x118A0, 0x118BF, 0x16E40, 0x16E5F, + 0x16EA0, 0x16EB8, 0x1D400, 0x1D419, 0x1D434, 0x1D44D, 0x1D468, 0x1D481, diff --git a/test/prism/encoding/encodings_test.rb b/test/prism/encoding/encodings_test.rb index 4ad2b465cc1842..b008fc3fa10238 100644 --- a/test/prism/encoding/encodings_test.rb +++ b/test/prism/encoding/encodings_test.rb @@ -56,21 +56,11 @@ def assert_encoding_identifier(name, character) # Check that we can properly parse every codepoint in the given encoding. def assert_encoding(encoding, name, range) - # I'm not entirely sure, but I believe these codepoints are incorrect in - # their parsing in CRuby. They all report as matching `[[:lower:]]` but - # then they are parsed as constants. This is because CRuby determines if - # an identifier is a constant or not by case folding it down to lowercase - # and checking if there is a difference. And even though they report - # themselves as lowercase, their case fold is different. I have reported - # this bug upstream. + unicode = false + case encoding when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8 - range = range.to_a - [ - 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, - 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, - 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, - 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc, - ] + unicode = true when Encoding::Windows_1253 range = range.to_a - [0xb5] end @@ -79,7 +69,7 @@ def assert_encoding(encoding, name, range) character = codepoint.chr(encoding) if character.match?(/[[:alpha:]]/) - if character.match?(/[[:upper:]]/) + if character.match?(/[[:upper:]]/) || (unicode && character.match?(Regexp.new("\\p{Lt}".encode(encoding)))) assert_encoding_constant(name, character) else assert_encoding_identifier(name, character) From 806e554cc027b71cf0970fd4eb8f9ee5f1ccf128 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Nov 2025 14:14:03 +0900 Subject: [PATCH 4/5] Compare with the upper bound of the loop variable Fix sign-compare warning --- gc/default/default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index 530fc4df891acb..2d862b0b212489 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -9448,7 +9448,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr) (bits_t)0x0101010101010101ULL, // i=3: every 8th bit (bits_t)0x0001000100010001ULL, // i=4: every 16th bit }; - GC_ASSERT(i < sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0])); + GC_ASSERT(HEAP_COUNT == sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0])); heap->slot_bits_mask = slot_bits_masks[i]; ccan_list_head_init(&heap->pages); From d7cfd275f881bbd29d95dfdba585f055dd89ba44 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Nov 2025 14:31:34 +0900 Subject: [PATCH 5/5] Set DESTDIR if relative loading When relative loading, `prefix` makes no sense actually. Use the given (or default) path as `DESTDIR` instead. This change affects only when the relative loading is enabled and the destdir is not given, and does not change the final installation path, but makes the configuration options simpler a little. --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 22baeabb32ab46..21f2e5a466e382 100644 --- a/configure.ac +++ b/configure.ac @@ -4753,6 +4753,11 @@ AC_ARG_WITH(destdir, [DESTDIR="$withval"]) AC_SUBST(DESTDIR) +AS_IF([test "x$load_relative:$DESTDIR" = xyes:], [ + AS_IF([test "x$prefix" = xNONE], [DESTDIR="$ac_default_prefix"], [DESTDIR="$prefix"]) + prefix=/. +]) + AC_OUTPUT }