From 3b5b4a8a6d3b06d020231d7d9e15772236a00b13 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 10:53:42 +0100 Subject: [PATCH 1/3] Move `LexRipper` into its own file It has a hard dependency on ripper that can't be removed. This makes it so that ripper can be loaded only when the class is actually used. --- Steepfile | 1 + lib/prism.rb | 2 +- lib/prism/lex_compat.rb | 58 ------------------------------------- lib/prism/lex_ripper.rb | 64 +++++++++++++++++++++++++++++++++++++++++ prism.gemspec | 1 + rakelib/typecheck.rake | 1 + 6 files changed, 68 insertions(+), 59 deletions(-) create mode 100644 lib/prism/lex_ripper.rb diff --git a/Steepfile b/Steepfile index 433e53cd29..1aafafc523 100644 --- a/Steepfile +++ b/Steepfile @@ -10,6 +10,7 @@ target :lib do # TODO: Type-checking these files is still WIP ignore "lib/prism/desugar_compiler.rb" ignore "lib/prism/lex_compat.rb" + ignore "lib/prism/lex_ripper.rb" ignore "lib/prism/serialize.rb" ignore "lib/prism/ffi.rb" ignore "lib/prism/translation" diff --git a/lib/prism.rb b/lib/prism.rb index f6ad0c1fd1..d809557fce 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -20,7 +20,7 @@ module Prism autoload :DSL, "prism/dsl" autoload :InspectVisitor, "prism/inspect_visitor" autoload :LexCompat, "prism/lex_compat" - autoload :LexRipper, "prism/lex_compat" + autoload :LexRipper, "prism/lex_ripper" autoload :MutationCompiler, "prism/mutation_compiler" autoload :Pack, "prism/pack" autoload :Pattern, "prism/pattern" diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 9b3f025ab6..48ac768b03 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -867,62 +867,4 @@ def result end private_constant :LexCompat - - # This is a class that wraps the Ripper lexer to produce almost exactly the - # same tokens. - class LexRipper # :nodoc: - attr_reader :source - - def initialize(source) - @source = source - end - - def result - previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] - results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] - - lex(source).each do |token| - case token[1] - when :on_sp - # skip - when :on_tstring_content - if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) - previous[2] << token[2] - else - results << token - previous = token - end - when :on_words_sep - if previous[1] == :on_words_sep - previous[2] << token[2] - else - results << token - previous = token - end - else - results << token - previous = token - end - end - - results - end - - private - - if Ripper.method(:lex).parameters.assoc(:keyrest) - def lex(source) - Ripper.lex(source, raise_errors: true) - end - else - def lex(source) - ripper = Ripper::Lexer.new(source) - ripper.lex.tap do |result| - raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? - end - end - end - end - - private_constant :LexRipper end diff --git a/lib/prism/lex_ripper.rb b/lib/prism/lex_ripper.rb new file mode 100644 index 0000000000..4b5c3b77fd --- /dev/null +++ b/lib/prism/lex_ripper.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true +# :markup: markdown + +require "ripper" + +module Prism + # This is a class that wraps the Ripper lexer to produce almost exactly the + # same tokens. + class LexRipper # :nodoc: + attr_reader :source + + def initialize(source) + @source = source + end + + def result + previous = [] #: [[Integer, Integer], Symbol, String, untyped] | [] + results = [] #: Array[[[Integer, Integer], Symbol, String, untyped]] + + lex(source).each do |token| + case token[1] + when :on_sp + # skip + when :on_tstring_content + if previous[1] == :on_tstring_content && (token[2].start_with?("\#$") || token[2].start_with?("\#@")) + previous[2] << token[2] + else + results << token + previous = token + end + when :on_words_sep + if previous[1] == :on_words_sep + previous[2] << token[2] + else + results << token + previous = token + end + else + results << token + previous = token + end + end + + results + end + + private + + if Ripper.method(:lex).parameters.assoc(:keyrest) + def lex(source) + Ripper.lex(source, raise_errors: true) + end + else + def lex(source) + ripper = Ripper::Lexer.new(source) + ripper.lex.tap do |result| + raise SyntaxError, ripper.errors.map(&:message).join(' ;') if ripper.errors.any? + end + end + end + end + + private_constant :LexRipper +end diff --git a/prism.gemspec b/prism.gemspec index 2fb5d1d0b3..a45e0d93e7 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -77,6 +77,7 @@ Gem::Specification.new do |spec| "lib/prism/ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", + "lib/prism/lex_ripper.rb", "lib/prism/mutation_compiler.rb", "lib/prism/node_ext.rb", "lib/prism/node.rb", diff --git a/rakelib/typecheck.rake b/rakelib/typecheck.rake index 4f3fb1684e..4a83bad7d0 100644 --- a/rakelib/typecheck.rake +++ b/rakelib/typecheck.rake @@ -20,6 +20,7 @@ namespace :typecheck do File.write("sorbet/typed_overrides.yml", ERB.new(<<~YAML, trim_mode: "-").result_with_hash(locals)) false: - ./lib/prism/lex_compat.rb + - ./lib/prism/lex_ripper.rb - ./lib/prism/node_ext.rb - ./lib/prism/parse_result.rb - ./lib/prism/visitor.rb From a73a4fb00c094a50aeadbb4b4d6a40ef97376114 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 11:12:15 +0100 Subject: [PATCH 2/3] Remove unneeded `ripper` requires Ripper is either not used or loaded where it is actually needed --- lib/prism/translation/ripper.rb | 2 -- rakelib/lex.rake | 4 ---- test/prism/magic_comment_test.rb | 1 + test/prism/ruby/ripper_test.rb | 1 + test/prism/test_helper.rb | 1 - 5 files changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index e488b7c5cf..00d5f80af4 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true # :markup: markdown -require "ripper" - module Prism module Translation # This class provides a compatibility layer between prism and Ripper. It diff --git a/rakelib/lex.rake b/rakelib/lex.rake index 23807a81b6..59f7a52dd4 100644 --- a/rakelib/lex.rake +++ b/rakelib/lex.rake @@ -126,7 +126,6 @@ TARGETS.each do |name, target| desc "Lex #{repo} and compare with lex_compat" task "lex:#{name}" => [dirpath, :compile] do $:.unshift(File.expand_path("../lib", __dir__)) - require "ripper" require "prism" plain_text = ENV.fetch("CI", false) @@ -169,7 +168,6 @@ end desc "Lex files and compare with lex_compat" task lex: :compile do $:.unshift(File.expand_path("../lib", __dir__)) - require "ripper" require "prism" plain_text = ENV.fetch("CI", false) @@ -201,7 +199,6 @@ desc "Lex against the most recent version of various rubygems" task "lex:rubygems": [:compile, "tmp/failing"] do $:.unshift(File.expand_path("../lib", __dir__)) require "net/http" - require "ripper" require "rubygems/package" require "tmpdir" require "prism" @@ -333,7 +330,6 @@ desc "Lex against the top 100 rubygems" task "lex:topgems": ["download:topgems", :compile] do $:.unshift(File.expand_path("../lib", __dir__)) require "net/http" - require "ripper" require "rubygems/package" require "tmpdir" require "prism" diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb index ab4b5f56e5..ccfe5a5d0a 100644 --- a/test/prism/magic_comment_test.rb +++ b/test/prism/magic_comment_test.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "test_helper" +require "ripper" module Prism class MagicCommentTest < TestCase diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index bd63302efc..defa95b6a8 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -3,6 +3,7 @@ return if RUBY_VERSION < "3.3" || RUBY_ENGINE != "ruby" require_relative "../test_helper" +require "ripper" module Prism class RipperTest < TestCase diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb index 43771110b4..406582c0a5 100644 --- a/test/prism/test_helper.rb +++ b/test/prism/test_helper.rb @@ -2,7 +2,6 @@ require "prism" require "pp" -require "ripper" require "stringio" require "test/unit" require "tempfile" From 2c0bea076d75d6429a93c72c1436323e91c59d4e Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Thu, 8 Jan 2026 13:47:35 +0100 Subject: [PATCH 3/3] Decouple ripper translator from ripper library Ripper exposes Ripper::Lexer:State in its output, which is a bit of a problem. To make this work, I basically copy-pasted the implementation. I'm unsure if that is acceptable and added a test to make sure that these values never go out of sync. I don't imagine them changing often, prism maps them 1:1 for its own usage. This also fixed the shim by accident. `Ripper.lex` went to `Translation::Ripper.lex` when it should have been the original. Removing the need for the original resolves that issue. --- lib/prism/lex_compat.rb | 86 +++++++++++++++++++++++++++------- test/prism/ruby/ripper_test.rb | 12 +++++ 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 48ac768b03..ebfb19e56d 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -2,7 +2,6 @@ # :markup: markdown require "delegate" -require "ripper" module Prism # This class is responsible for lexing the source using prism and then @@ -199,6 +198,58 @@ def deconstruct_keys(keys) "__END__": :on___end__ }.freeze + # Pretty much a 1:1 copy of Ripper::Lexer::State. We list all the available states + # to reimplement to_s without using Ripper. + class State + # Ripper-internal bitflags. + ALL = %i[ + BEG END ENDARG ENDFN ARG CMDARG MID FNAME DOT CLASS LABEL LABELED FITEM + ].map.with_index.to_h { |name, i| [2 ** i, name] } + ALL[0] = :NONE + ALL.freeze + ALL.each { |value, name| const_set(name, value) } + + # :stopdoc: + + attr_reader :to_int, :to_s + + def initialize(i) + @to_int = i + @to_s = state_name(i) + freeze + end + + def [](index) + case index + when 0, :to_int + @to_int + when 1, :to_s + @to_s + else + nil + end + end + + alias to_i to_int + alias inspect to_s + def pretty_print(q) q.text(to_s) end + def ==(i) super or to_int == i end + def &(i) self.class.new(to_int & i) end + def |(i) self.class.new(to_int | i) end + def allbits?(i) to_int.allbits?(i) end + def anybits?(i) to_int.anybits?(i) end + def nobits?(i) to_int.nobits?(i) end + + # :startdoc: + + private + + # Convert the state flags into the format exposed by ripper. + def state_name(bits) + ALL.filter_map { |flag, name| name if bits & flag != 0 }.join("|") + end + end + # When we produce tokens, we produce the same arrays that Ripper does. # However, we add a couple of convenience methods onto them to make them a # little easier to work with. We delegate all other methods to the array. @@ -249,8 +300,8 @@ def ==(other) # :nodoc: class IdentToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) || - (other[3] & Ripper::EXPR_ARG_ANY != 0) + (other[3] == State::LABEL | State::END) || + (other[3] & (State::ARG | State::CMDARG) != 0) ) end end @@ -261,8 +312,8 @@ class IgnoredNewlineToken < Token def ==(other) # :nodoc: return false unless self[0...-1] == other[0...-1] - if self[3] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED - other[3] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED != 0 + if self[3] == State::ARG | State::LABELED + other[3] & State::ARG | State::LABELED != 0 else self[3] == other[3] end @@ -280,8 +331,8 @@ def ==(other) # :nodoc: class ParamToken < Token def ==(other) # :nodoc: (self[0...-1] == other[0...-1]) && ( - (other[3] == Ripper::EXPR_END) || - (other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL) + (other[3] == State::END) || + (other[3] == State::END | State::LABEL) ) end end @@ -615,6 +666,11 @@ def self.build(opening) private_constant :Heredoc + # In previous versions of Ruby, Ripper wouldn't flush the bom before the + # first token, so we had to have a hack in place to account for that. + BOM_FLUSHED = RUBY_VERSION >= "3.3.0" + private_constant :BOM_FLUSHED + attr_reader :source, :options def initialize(source, **options) @@ -630,13 +686,9 @@ def result result = Prism.lex(source, **options) result_value = result.value - previous_state = nil #: Ripper::Lexer::State? + previous_state = nil #: State? last_heredoc_end = nil #: Integer? - # In previous versions of Ruby, Ripper wouldn't flush the bom before the - # first token, so we had to have a hack in place to account for that. This - # checks for that behavior. - bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0 bom = source.byteslice(0..2) == "\xEF\xBB\xBF" result_value.each_with_index do |(token, lex_state), index| @@ -651,7 +703,7 @@ def result if bom && lineno == 1 column -= 3 - if index == 0 && column == 0 && !bom_flushed + if index == 0 && column == 0 && !BOM_FLUSHED flushed = case token.type when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE, @@ -675,7 +727,7 @@ def result event = RIPPER.fetch(token.type) value = token.value - lex_state = Ripper::Lexer::State.new(lex_state) + lex_state = State.new(lex_state) token = case event @@ -689,7 +741,7 @@ def result last_heredoc_end = token.location.end_offset IgnoreStateToken.new([[lineno, column], event, value, lex_state]) when :on_ident - if lex_state == Ripper::EXPR_END + if lex_state == State::END # If we have an identifier that follows a method name like: # # def foo bar @@ -699,7 +751,7 @@ def result # yet. We do this more accurately, so we need to allow comparing # against both END and END|LABEL. ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL + elsif lex_state == State::END | State::LABEL # In the event that we're comparing identifiers, we're going to # allow a little divergence. Ripper doesn't account for local # variables introduced through named captures in regexes, and we @@ -739,7 +791,7 @@ def result counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0 end - Ripper::Lexer::State.new(result_value[current_index][1]) + State.new(result_value[current_index][1]) else previous_state end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index defa95b6a8..9d64c5c70c 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -63,6 +63,18 @@ class RipperTest < TestCase define_method(fixture.test_name) { assert_ripper(fixture.read) } end + # Check that the hardcoded values don't change without us noticing. + def test_internals + actual = LexCompat::State::ALL + expected = Ripper.constants.select { |name| name.start_with?("EXPR_") } + expected -= %i[EXPR_VALUE EXPR_BEG_ANY EXPR_ARG_ANY EXPR_END_ANY] + + assert_equal(expected.size, actual.size) + expected.each do |const_name| + assert_equal(const_name.to_s.delete_prefix("EXPR_").to_sym, actual[Ripper.const_get(const_name)]) + end + end + private def assert_ripper(source)