From 076abc18f3c970707b8676ff199549b1769a391b Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Sun, 22 Dec 2024 15:37:55 +0100
Subject: [PATCH] Fix parser translator simplifying strings when it shouldn't

For example this:
```rb
_buf << ':
'
```

Prism currently converts `tSTRING_BEG` => `tSTRING_CONTENT` => `tSTRING_END` into a simple `tSTRING` token.

But, the parser gem doesn't do that when the string content ends with a newline.

Note that this is only an issue for strings spanning only two lines.
When there are more lines, there are two `tSTRING_CONTENT` tokens so it falls through.
---
 lib/prism/translation/parser/lexer.rb | 21 ++++++++++++++-------
 test/prism/ruby/parser_test.rb        |  4 ----
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 61e22159a1..4334540381 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -302,18 +302,25 @@ def to_a
               if token.type == :HEREDOC_START
                 heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
               end
-              if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
+              next_token = lexed[index][0]
+              next_next_token = lexed[index + 1][0]
+              basic_quotes = ["\"", "'"].include?(value)
+
+              if basic_quotes && next_token&.type == :STRING_END
                 next_location = token.location.join(next_token.location)
                 type = :tSTRING
                 value = ""
                 location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
                 index += 1
-              elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
-                next_location = token.location.join(next_next_token.location)
-                type = :tSTRING
-                value = next_token.value.gsub("\\\\", "\\")
-                location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
-                index += 2
+              elsif basic_quotes && next_token&.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && next_next_token&.type == :STRING_END
+                # the parser gem doesn't simplify strings when its value ends in a newline
+                unless (string_value = next_token.value).end_with?("\n")
+                  next_location = token.location.join(next_next_token.location)
+                  value = string_value.gsub("\\\\", "\\")
+                  type = :tSTRING
+                  location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+                  index += 2
+                end
               elsif value.start_with?("<<")
                 quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
                 if quote == "`"
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
index 606a0e54f6..8d5c02ff51 100644
--- a/test/prism/ruby/parser_test.rb
+++ b/test/prism/ruby/parser_test.rb
@@ -104,7 +104,6 @@ class ParserTest < TestCase
       "seattlerb/dsym_esc_to_sym.txt",
       "seattlerb/heredoc__backslash_dos_format.txt",
       "seattlerb/heredoc_backslash_nl.txt",
-      "seattlerb/heredoc_comma_arg.txt",
       "seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
       "seattlerb/heredoc_squiggly_blank_lines.txt",
       "seattlerb/heredoc_squiggly_interp.txt",
@@ -119,7 +118,6 @@ class ParserTest < TestCase
       "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt",
       "seattlerb/interpolated_symbol_array_line_breaks.txt",
       "seattlerb/interpolated_word_array_line_breaks.txt",
-      "seattlerb/label_vs_string.txt",
       "seattlerb/module_comments.txt",
       "seattlerb/non_interpolated_symbol_array_line_breaks.txt",
       "seattlerb/non_interpolated_word_array_line_breaks.txt",
@@ -139,10 +137,8 @@ class ParserTest < TestCase
       "seattlerb/required_kwarg_no_value.txt",
       "seattlerb/slashy_newlines_within_string.txt",
       "seattlerb/str_double_escaped_newline.txt",
-      "seattlerb/str_double_newline.txt",
       "seattlerb/str_evstr_escape.txt",
       "seattlerb/str_newline_hash_line_number.txt",
-      "seattlerb/str_single_newline.txt",
       "seattlerb/symbols_empty_space.txt",
       "seattlerb/TestRubyParserShared.txt",
       "unparser/corpus/literal/assignment.txt",