diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index e8d2ce1b19..3d5cbfcddc 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -768,21 +768,24 @@ def result source.byte_offset(line, column) end - # Add :on_sp tokens - tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token) + tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token) Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source) end private - def insert_on_sp(tokens, source, data_loc, bom, eof_token) + def post_process_tokens(tokens, source, data_loc, bom, eof_token) new_tokens = [] prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG] prev_token_end = bom ? 3 : 0 tokens.each do |token| + # Skip missing heredoc ends. + next if token[1] == :on_heredoc_end && token[2] == "" + + # Add :on_sp tokens. line, column = token[0] start_offset = source.byte_offset(line, column) diff --git a/src/prism.c b/src/prism.c index 34e5d38b0a..097b4c7305 100644 --- a/src/prism.c +++ b/src/prism.c @@ -9856,6 +9856,15 @@ parser_lex(pm_parser_t *parser) { // We'll check if we're at the end of the file. If we are, then we // need to return the EOF token. if (parser->current.end >= parser->end) { + // We may be missing closing tokens. We should pop modes one by one + // to do the appropriate cleanup like moving next_start for heredocs. + // Only when no mode is remaining will we actually emit the EOF token. + if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) { + lex_mode_pop(parser); + parser_lex(parser); + return; + } + // If we hit EOF, but the EOF came immediately after a newline, // set the start of the token to the newline. This way any EOF // errors will be reported as happening on that line rather than diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt new file mode 100644 index 0000000000..bed7fcd24e --- /dev/null +++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt @@ -0,0 +1,11 @@ +<= "3.3" - def test_lex_compare - prism = Prism.lex_compat(File.read(__FILE__), version: "current").value - ripper = Ripper.lex(File.read(__FILE__)) + def test_lex_compat + source = "foo bar" + prism = Prism.lex_compat(source, version: "current").value + ripper = Ripper.lex(source) assert_equal(ripper, prism) end end + + def test_lex_interpolation_unterminated + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN EOF], + token_types('"#{') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF], + token_types('"#{' + "\n") + ) + end + + def test_lex_interpolation_unterminated_with_content + # FIXME: Emits EOL twice. + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF], + token_types('"#{C') + ) + + assert_equal( + %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF], + token_types('"#{C' + "\n") + ) + end + + def test_lex_heredoc_unterminated + code = <<~'RUBY'.strip + <