From 3bfc86558b7a314417399470b5204a914f2ca3ff Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:53:42 +0100 Subject: [PATCH 1/3] [ruby/prism] Move `LexRipper` into its own file It has a hard dependency on ripper that can't be removed. This makes it so that ripper can be loaded only when the class is actually used. https://github.com/ruby/prism/commit/3b5b4a8a6d --- lib/prism.rb | 2 +- lib/prism/lex_compat.rb | 58 ------------------------------------- lib/prism/lex_ripper.rb | 64 +++++++++++++++++++++++++++++++++++++++++ lib/prism/prism.gemspec | 1 + 4 files changed, 66 insertions(+), 59 deletions(-) create mode 100644 lib/prism/lex_ripper.rb diff --git a/lib/prism.rb b/lib/prism.rb index f6ad0c1fd10155..d809557fce101f 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -20,7 +20,7 @@ module Prism autoload :DSL, "prism/dsl" autoload :InspectVisitor, "prism/inspect_visitor" autoload :LexCompat, "prism/lex_compat" - autoload :LexRipper, "prism/lex_compat" + autoload :LexRipper, "prism/lex_ripper" autoload :MutationCompiler, "prism/mutation_compiler" autoload :Pack, "prism/pack" autoload :Pattern, "prism/pattern" diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 9b3f025ab6a5b4..48ac768b03df2f 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -867,62 +867,4 @@ def result end private_constant :LexCompat - - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source - - def initialize(source) - @source = source - end - - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_sp - # skip - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - when :on_words_sep - if previous[1] == :on_words_sep - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end - - private - - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) - end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? - end - end - end - end - - private_constant :LexRipper end diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb new file mode 100644 index 00000000000000..4b5c3b77fd6112 --- /dev/null +++ b/lib/prism/lex_ripper.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true +# :markup: markdown + +require "ripper" + +module Prism + # This is a class that wraps the Ripper lexer to produce almost exactly the + # same tokens. + class LexRipper # :nodoc: + attr_reader :source + + def initialize(source) + @source = source + end + + def result + previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] + results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] + + lex(source).each do |token| + case token[1] + when :on_sp + # skip + when :on_tstring_content + if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) + previous[2] << token[2] + else + results << token + previous = token + end + when :on_words_sep + if previous[1] == :on_words_sep + previous[2] << token[2] + else + results << token + previous = token + end + else + results << token + previous = token + end + end + + results + end + + private + + if Ripper.method(:lex).parameters.assoc(:keyrest) + def lex(source) + Ripper.lex(source, raise_errors: true) + end + else + def lex(source) + ripper = Ripper::Lexer.new(source) + ripper.lex.tap do |result| + raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? + end + end + end + end + + private_constant :LexRipper +end diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 2fb5d1d0b308e4..a45e0d93e78cd5 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -77,6 +77,7 @@ Gem::Specification.new do |spec| "lib/prism/ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", + "lib/prism/lex_ripper.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", "lib/prism/node.rb", From fc66de3e6b5e28c017c3cffac77a66d680d679a4 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 11:12:15 +0100 Subject: [PATCH 2/3] [ruby/prism] Remove unneeded `ripper` requires Ripper is either not used or loaded where it is actually needed https://github.com/ruby/prism/commit/a73a4fb00c --- lib/prism/translation/ripper.rb | 2 -- test/prism/magic_comment_test.rb | 1 + test/prism/ruby/ripper_test.rb | 1 + test/prism/test_helper.rb | 1 - 4 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index e488b7c5cf0c72..00d5f80af4b2f8 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true # :markup: markdown -require "ripper" - module Prism module Translation # This class provides a compatibility layer between prism and Ripper. It diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb index ab4b5f56e5169b..ccfe5a5d0a5bc2 100644 --- a/test/prism/magic_comment_test.rb +++ b/test/prism/magic_comment_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "test_helper" +require "ripper" module Prism class MagicCommentTest < TestCase diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index bd63302efcf908..defa95b6a84f8f 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -3,6 +3,7 @@ return if RUBY_VERSION < "3.3" || RUBY_ENGINE != "ruby" require_relative "../test_helper" +require "ripper" module Prism class RipperTest < TestCase diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb index 43771110b4284f..406582c0a5b1ec 100644 --- a/test/prism/test_helper.rb +++ b/test/prism/test_helper.rb @@ -2,7 +2,6 @@ require "prism" require "pp" -require "ripper" require "stringio" require "test/unit" require "tempfile" From 16863f2ec1c8cefd852965e58acfcfd61b0194b9 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:47:35 +0100 Subject: [PATCH 3/3] [ruby/prism] Decouple ripper translator from ripper library Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation. I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync. I don't imagine them changing often, prism maps them 1:1 for its own usage. This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue. https://github.com/ruby/prism/commit/2c0bea076d --- lib/prism/lex_compat.rb | 86 +++++++++++++++++++++++++++------- test/prism/ruby/ripper_test.rb | 12 +++++ 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 48ac768b03df2f..ebfb19e56d999d 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -2,7 +2,6 @@ # :markup: markdown require "delegate" -require "ripper" module Prism # This class is responsible for lexing the source using prism and then @@ -199,6 +198,58 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze + # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states + # to reimplement to_s without using Ripper. + class State + # Ripper-internal bitflags. + ALL = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] } + ALL[0] = :NONE + ALL.freeze + ALL.each { |value, name| const_set(name, value) } + + # :stopdoc: + + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # :startdoc: + + private + + # Convert the state flags into the format exposed by ripper. + def state_name(bits) + ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|") + end + end + # When we produce tokens, we produce the same arrays that Ripper does. # However, we add a couple of convenience methods onto them to make them a # little easier to work with. We delegate all other methods to the array. @@ -249,8 +300,8 @@ def ==(other) # :nodoc: class IdentToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) + (other[3] == State::LABEL | State::END) || + (other[3] & (State::ARG | State::CMDARG) != 0) ) end end @@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token def ==(other) # :nodoc: return false unless self[0...-1] == other[0...-1] - if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 + if self[3] == State::ARG | State::LABELED + other[3] & State::ARG | State::LABELED != 0 else self[3] == other[3] end @@ -280,8 +331,8 @@ def ==(other) # :nodoc: class ParamToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) + (other[3] == State::END) || + (other[3] == State::END | State::LABEL) ) end end @@ -615,6 +666,11 @@ def self.build(opening) private_constant :Heredoc + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. + BOM_FLUSHED = RUBY_VERSION >= "3.3.0" + private_constant :BOM_FLUSHED + attr_reader :source, :options def initialize(source, **options) @@ -630,13 +686,9 @@ def result result = Prism.lex(source, **options) result_value = result.value - previous_state = nil #: Ripper::Lexer::State? + previous_state = nil #: State? last_heredoc_end = nil #: Integer? - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 bom = source.byteslice(0..2) == "\xEF\xBB\xBF" result_value.each_with_index do |(token, lex_state), index| @@ -651,7 +703,7 @@ def result if bom && lineno == 1 column -= 3 - if index == 0 && column == 0 && !bom_flushed + if index == 0 && column == 0 && !BOM_FLUSHED flushed = case token.type when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, @@ -675,7 +727,7 @@ def result event = RIPPER.fetch(token.type) value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) + lex_state = State.new(lex_state) token = case event @@ -689,7 +741,7 @@ def result last_heredoc_end = token.location.end_offset IgnoreStateToken.new([[lineno, column], event, value, lex_state]) when :on_ident - if lex_state == Ripper::EXPR_END + if lex_state == State::END # If we have an identifier that follows a method name like: # # def foo bar @@ -699,7 +751,7 @@ def result # yet. We do this more accurately, so we need to allow comparing # against both END and END|LABEL. ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL + elsif lex_state == State::END | State::LABEL # In the event that we're comparing identifiers, we're going to # allow a little divergence. Ripper doesn't account for local # variables introduced through named captures in regexes, and we @@ -739,7 +791,7 @@ def result counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Ripper::Lexer::State.new(result_value[current_index][1]) + State.new(result_value[current_index][1]) else previous_state end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index defa95b6a84f8f..9d64c5c70ce5a5 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -63,6 +63,18 @@ class RipperTest < TestCase define_method(fixture.test_name) { assert_ripper(fixture.read) } end + # Check that the hardcoded values don't change without us noticing. + def test_internals + actual = LexCompat::State::ALL + expected = Ripper.constants.select { |name| name.start_with?("EXPR_") } + expected -= %i[EXPR_VALUE EXPR_BEG_ANY EXPR_ARG_ANY EXPR_END_ANY] + + assert_equal(expected.size, actual.size) + expected.each do |const_name| + assert_equal(const_name.to_s.delete_prefix("EXPR_").to_sym, actual[Ripper.const_get(const_name)]) + end + end + private def assert_ripper(source)