From 076abc18f3c970707b8676ff199549b1769a391b Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sun, 22 Dec 2024 15:37:55 +0100 Subject: [PATCH] Fix parser translator simplifying strings when it shouldn't For example this: ```rb _buf << ': ' ``` Prism currently converts `tSTRING_BEG` => `tSTRING_CONTENT` => `tSTRING_END` into a simple `tSTRING` token. But, the parser gem doesn't do that when the string content ends with a newline. Note that this is only an issue for strings spanning only two lines. When there are more lines, there are two `tSTRING_CONTENT` tokens so it falls through. --- lib/prism/translation/parser/lexer.rb | 21 ++++++++++++++------- test/prism/ruby/parser_test.rb | 4 ---- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 61e22159a1..4334540381 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -302,18 +302,25 @@ def to_a if token.type == :HEREDOC_START heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?.*?)["'`]?\z/)[:heredoc_identifier]) end - if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END + next_token = lexed[index][0] + next_next_token = lexed[index + 1][0] + basic_quotes = ["\"", "'"].include?(value) + + if basic_quotes && next_token&.type == :STRING_END next_location = token.location.join(next_token.location) type = :tSTRING value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 - elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END - next_location = token.location.join(next_next_token.location) - type = :tSTRING - value = next_token.value.gsub("\\\\", "\\") - location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) - index += 2 + elsif basic_quotes && next_token&.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && next_next_token&.type == :STRING_END + # the parser gem doesn't simplify strings when its value ends in a newline + unless (string_value = next_token.value).end_with?("\n") + next_location = token.location.join(next_next_token.location) + value = string_value.gsub("\\\\", "\\") + type = :tSTRING + location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) + index += 2 + end elsif value.start_with?("<<") quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2] if quote == "`" diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb index 606a0e54f6..8d5c02ff51 100644 --- a/test/prism/ruby/parser_test.rb +++ b/test/prism/ruby/parser_test.rb @@ -104,7 +104,6 @@ class ParserTest < TestCase "seattlerb/dsym_esc_to_sym.txt", "seattlerb/heredoc__backslash_dos_format.txt", "seattlerb/heredoc_backslash_nl.txt", - "seattlerb/heredoc_comma_arg.txt", "seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt", "seattlerb/heredoc_squiggly_blank_lines.txt", "seattlerb/heredoc_squiggly_interp.txt", @@ -119,7 +118,6 @@ class ParserTest < TestCase "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt", "seattlerb/interpolated_symbol_array_line_breaks.txt", "seattlerb/interpolated_word_array_line_breaks.txt", - "seattlerb/label_vs_string.txt", "seattlerb/module_comments.txt", "seattlerb/non_interpolated_symbol_array_line_breaks.txt", "seattlerb/non_interpolated_word_array_line_breaks.txt", @@ -139,10 +137,8 @@ class ParserTest < TestCase "seattlerb/required_kwarg_no_value.txt", "seattlerb/slashy_newlines_within_string.txt", "seattlerb/str_double_escaped_newline.txt", - "seattlerb/str_double_newline.txt", "seattlerb/str_evstr_escape.txt", "seattlerb/str_newline_hash_line_number.txt", - "seattlerb/str_single_newline.txt", "seattlerb/symbols_empty_space.txt", "seattlerb/TestRubyParserShared.txt", "unparser/corpus/literal/assignment.txt",