From e6a1efd152c068abe607b5a6439fe2eae32dd3ea Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 11 Feb 2026 13:03:49 +0100
Subject: [PATCH 1/2] Fix lexing for unterminated strings/heredocs etc.
When we hit EOF and still have lex modes left, it means some content was unterminated.
Heredocs specifically have logic that needs to happen when the body finished lexing.
If we don't reset the mode back to how it was before, it will not continue lexing at the correct place.
---
src/prism.c | 9 ++++
.../unterminated_heredoc_and_embexpr.txt | 11 ++++
.../unterminated_heredoc_and_embexpr_2.txt | 9 ++++
test/prism/lex_test.rb | 53 +++++++++++++++++--
4 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr.txt
create mode 100644 test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
diff --git a/src/prism.c b/src/prism.c
index 34e5d38b0a..097b4c7305 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9856,6 +9856,15 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
+ // We may be missing closing tokens. We should pop modes one by one
+ // to do the appropriate cleanup like moving next_start for heredocs.
+ // Only when no mode is remaining will we actually emit the EOF token.
+ if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+ lex_mode_pop(parser);
+ parser_lex(parser);
+ return;
+ }
+
// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
new file mode 100644
index 0000000000..bed7fcd24e
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
@@ -0,0 +1,11 @@
+<= "3.3"
- def test_lex_compare
- prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
- ripper = Ripper.lex(File.read(__FILE__))
+ def test_lex_compat
+ source = "foo bar"
+ prism = Prism.lex_compat(source, version: "current").value
+ ripper = Ripper.lex(source)
assert_equal(ripper, prism)
end
end
+
+ def test_lex_interpolation_unterminated
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
+ token_types('"#{')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
+ token_types('"#{' + "\n")
+ )
+ end
+
+ def test_lex_interpolation_unterminated_with_content
+ # FIXME: Emits EOL twice.
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
+ token_types('"#{C')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
+ token_types('"#{C' + "\n")
+ )
+ end
+
+ def test_lex_heredoc_unterminated
+ code = <<~'RUBY'.strip
+ <
Date: Wed, 11 Feb 2026 17:01:06 +0100
Subject: [PATCH 2/2] Skip missing heredoc end in ripper translator
Prism inserts these to make bookkeeping easier. Ripper does not do so.
---
lib/prism/lex_compat.rb | 9 ++++++---
test/prism/ruby/ripper_test.rb | 10 ++++++++++
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index e8d2ce1b19..3d5cbfcddc 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -768,21 +768,24 @@ def result
source.byte_offset(line, column)
end
- # Add :on_sp tokens
- tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token)
+ tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
end
private
- def insert_on_sp(tokens, source, data_loc, bom, eof_token)
+ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
new_tokens = []
prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
prev_token_end = bom ? 3 : 0
tokens.each do |token|
+ # Skip missing heredoc ends.
+ next if token[1] == :on_heredoc_end && token[2] == ""
+
+ # Add :on_sp tokens.
line, column = token[0]
start_offset = source.byte_offset(line, column)
diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 15f535f3d6..39cb9395ab 100644
--- a/test/prism/ruby/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -81,6 +81,16 @@ class RipperTest < TestCase
define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) }
end
+ def test_lex_ignored_missing_heredoc_end
+ ["", "-", "~"].each do |type|
+ source = "<<#{type}FOO\n"
+ assert_ripper_lex(source)
+
+ source = "<<#{type}'FOO'\n"
+ assert_ripper_lex(source)
+ end
+ end
+
module Events
attr_reader :events