From 8b63780a9d6b7debd4de656f68a0c666cfc2bda1 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 10 Feb 2026 18:28:18 +0100 Subject: [PATCH] Remove unneeded exit conditions for `parse_stream` As I understand it, the code operates line-by-line. When finding `__END__`, it has to decide if it is the end marker or just appears as part of of a multi-line literal (see tests). For a future PR I plan to make, `lex_modes` may be empty after parsing finished, even if it was syntax invalid. Currently there will _always_ be errors when lex modes are not popped. So these extra checks are redundant. Basically, if we found `__END__` on a line by itself and there are syntax errors, it's impossible to be the end marker. --- src/prism.c | 24 +------------- test/prism/api/parse_stream_test.rb | 51 +++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/prism.c b/src/prism.c index 8f3617adce..34e5d38b0a 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22282,28 +22282,6 @@ pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t #undef LINE_SIZE } -/** - * Determine if there was an unterminated heredoc at the end of the input, which - * would mean the stream isn't finished and we should keep reading. - * - * For the other lex modes we can check if the lex mode has been closed, but for - * heredocs when we hit EOF we close the lex mode and then go back to parse the - * rest of the line after the heredoc declaration so that we get more of the - * syntax tree. - */ -static bool -pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) { - pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head; - - for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) { - if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) { - return true; - } - } - - return false; -} - /** * Parse a stream of Ruby source and return the tree. * @@ -22319,7 +22297,7 @@ pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); pm_node_t *node = pm_parse(parser); - while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) { + while (!eof && parser->error_list.size > 0) { pm_node_destroy(parser, node); eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof); diff --git a/test/prism/api/parse_stream_test.rb b/test/prism/api/parse_stream_test.rb index 1c068c617c..3bc86fbd61 100644 --- a/test/prism/api/parse_stream_test.rb +++ b/test/prism/api/parse_stream_test.rb @@ -30,16 +30,28 @@ def test_multi_read end def test___END__ - io = StringIO.new("1 + 2\n3 + 4\n__END__\n5 + 6") + io = StringIO.new(<<~RUBY) + 1 + 2 + 3 + 4 + __END__ + 5 + 6 + RUBY result = Prism.parse_stream(io) assert result.success? assert_equal 2, result.value.statements.body.length - assert_equal "5 + 6", io.read + assert_equal "5 + 6\n", io.read end def test_false___END___in_string - io = StringIO.new("1 + 2\n3 + 4\n\"\n__END__\n\"\n5 + 6") + io = StringIO.new(<<~RUBY) + 1 + 2 + 3 + 4 + " + __END__ + " + 5 + 6 + RUBY result = Prism.parse_stream(io) assert result.success? @@ -47,7 +59,14 @@ def test_false___END___in_string end def test_false___END___in_regexp - io = StringIO.new("1 + 2\n3 + 4\n/\n__END__\n/\n5 + 6") + io = StringIO.new(<<~RUBY) + 1 + 2 + 3 + 4 + / + __END__ + / + 5 + 6 + RUBY result = Prism.parse_stream(io) assert result.success? @@ -55,7 +74,14 @@ def test_false___END___in_regexp end def test_false___END___in_list - io = StringIO.new("1 + 2\n3 + 4\n%w[\n__END__\n]\n5 + 6") + io = StringIO.new(<<~RUBY) + 1 + 2 + 3 + 4 + %w[ + __END__ + ] + 5 + 6 + RUBY result = Prism.parse_stream(io) assert result.success? @@ -63,7 +89,14 @@ def test_false___END___in_list end def test_false___END___in_heredoc - io = StringIO.new("1 + 2\n3 + 4\n<<-EOF\n__END__\nEOF\n5 + 6") + io = StringIO.new(<<~RUBY) + 1 + 2 + 3 + 4 + <<-EOF + __END__ + EOF + 5 + 6 + RUBY result = Prism.parse_stream(io) assert result.success? @@ -71,7 +104,11 @@ def test_false___END___in_heredoc end def test_nul_bytes - io = StringIO.new("1 # \0\0\0 \n2 # \0\0\0\n3") + io = StringIO.new(<<~RUBY) + 1 # \0\0\0\t + 2 # \0\0\0 + 3 + RUBY result = Prism.parse_stream(io) assert result.success?