From 1c7e19f961a3e9051cf728deeea196d4d72a89cd Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 16 Jan 2026 20:37:42 +0100 Subject: [PATCH 1/5] rb_free_tmp_buffer: use ruby_sized_xfree We know the buffer length, we might as well feed that information back to the GC. --- imemo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/imemo.c b/imemo.c index 8b3018523f0155..d949466a776f3d 100644 --- a/imemo.c +++ b/imemo.c @@ -94,8 +94,9 @@ rb_free_tmp_buffer(volatile VALUE *store) rb_imemo_tmpbuf_t *s = (rb_imemo_tmpbuf_t*)ATOMIC_VALUE_EXCHANGE(*store, 0); if (s) { void *ptr = ATOMIC_PTR_EXCHANGE(s->ptr, 0); + long cnt = s->cnt; s->cnt = 0; - ruby_xfree(ptr); + ruby_sized_xfree(ptr, sizeof(VALUE) * cnt); } } From 87147ba5e983f554fb56f859af0d858d5eff0896 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:47:18 +0100 Subject: [PATCH 2/5] [ruby/prism] Make the ripper shim work with rdoc The filter class is a 1:1 copy of ruby. rdoc has 32 test failures. It seems to expect `on_sp` in some cases to render code as written. https://github.com/ruby/prism/commit/74bb12c825 --- lib/prism/prism.gemspec | 1 + lib/prism/translation/ripper.rb | 1 + lib/prism/translation/ripper/filter.rb | 53 ++++++++++++++++++++++++++ lib/prism/translation/ripper/lexer.rb | 16 +++----- test/prism/ruby/ripper_test.rb | 32 +++++++++++----- 5 files changed, 83 insertions(+), 20 deletions(-) create mode 100644 lib/prism/translation/ripper/filter.rb diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 463387e55c814b..283c7b04aa95e6 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -104,6 +104,7 @@ Gem::Specification.new do |spec| "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/filter.rb", "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 6552d2dbb80794..c8f9fa7731a539 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -437,6 +437,7 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) end end + autoload :Filter, "prism/translation/ripper/filter" autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 00000000000000..19deef2d37dfaa --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index 787181b5a7fc42..bd40fb4c5a2de0 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -100,21 +100,17 @@ def to_a end end - def initialize(...) - super - @lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno) + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) end # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. # Since ripper is a streaming parser, tokens are expected to be emitted in the order # that the parser encounters them. This is not implemented. - def parse(raise_errors: false) - if @lex_compat.failure? && raise_errors - raise SyntaxError, @lex_compat.errors.first.message - else - @lex_compat.value.map do |position, event, token, state| - Elem.new(position, event, token, state.to_int) - end + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) end end diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index cac20a073db61b..2a0504c19f35f0 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -59,7 +59,7 @@ class RipperTest < TestCase "whitequark/slash_newline_in_heredocs.txt" ] - omitted_lexer_parse = [ + omitted_lex = [ "comments.txt", "heredoc_percent_q_newline_delimiter.txt", "heredoc_with_escaped_newline_at_start.txt", @@ -80,8 +80,20 @@ class RipperTest < TestCase define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) } end - Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture| - define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) } + Fixture.each_for_current_ruby(except: incorrect | omitted_lex) do |fixture| + define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) } + end + + def test_lexer + lexer = Translation::Ripper::Lexer.new("foo") + expected = [[1, 0], :on_ident, "foo", Translation::Ripper::EXPR_CMDARG] + + assert_equal([expected], lexer.lex) + assert_equal(expected, lexer.parse[0].to_a) + assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) + + assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end def test_tokenize @@ -106,15 +118,15 @@ def assert_ripper_sexp_raw(source) assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source) end - def assert_ripper_lexer_parse(source) - prism = Translation::Ripper::Lexer.new(source).parse - ripper = Ripper::Lexer.new(source).parse - ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp - ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order + def assert_ripper_lex(source) + prism = Translation::Ripper.lex(source) + ripper = Ripper.lex(source) + ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp + ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order [prism.size, ripper.size].max.times do |i| - expected = ripper[i].to_a - actual = prism[i].to_a + expected = ripper[i] + actual = prism[i] # Since tokens related to heredocs are not emitted in the same order, # the state also doesn't line up. if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end From c56ce8a6c19334265b1a4a64876fffff9b98b2b1 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Fri, 16 Jan 2026 12:01:15 -0800 Subject: [PATCH 3/5] Remove objspace->flags.has_newobj_hook We aren't using this anymore and the hook is called in gc.c --- gc/default/default.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index be5385f166fde1..132dbcb5faed8b 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -505,7 +505,6 @@ typedef struct rb_objspace { unsigned int during_compacting : 1; unsigned int during_reference_updating : 1; unsigned int gc_stressful: 1; - unsigned int has_newobj_hook: 1; unsigned int during_minor_gc : 1; unsigned int during_incremental_marking : 1; unsigned int measure_gc : 1; @@ -1520,7 +1519,6 @@ rb_gc_impl_set_event_hook(void *objspace_ptr, const rb_event_flag_t event) { rb_objspace_t *objspace = objspace_ptr; objspace->hook_events = event & RUBY_INTERNAL_EVENT_OBJSPACE_MASK; - objspace->flags.has_newobj_hook = !!(objspace->hook_events & RUBY_INTERNAL_EVENT_NEWOBJ); } unsigned long long From 1f3c52dc155fb7fbc42fc8e146924091ba1dfa20 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 16 Jan 2026 21:15:05 +0100 Subject: [PATCH 4/5] Fix rb_interned_str: create strings with BINARY (akak ASCII_8BIT) encoding [Bug #21842] The documentation always stated as much, and it's consistent with the rb_str_* family of functions. --- encoding.c | 6 +++--- ext/io/console/console.c | 8 ++++---- ext/io/console/extconf.rb | 2 +- gc/default/default.c | 2 +- string.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/encoding.c b/encoding.c index 8bb393b471ed54..e9510fe3c146c5 100644 --- a/encoding.c +++ b/encoding.c @@ -1367,7 +1367,7 @@ enc_names_i(st_data_t name, st_data_t idx, st_data_t args) VALUE *arg = (VALUE *)args; if ((int)idx == (int)arg[0]) { - VALUE str = rb_interned_str_cstr((char *)name); + VALUE str = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_ary_push(arg[1], str); } return ST_CONTINUE; @@ -1873,7 +1873,7 @@ static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg) { VALUE ary = (VALUE)arg; - VALUE str = rb_interned_str_cstr((char *)name); + VALUE str = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_ary_push(ary, str); return ST_CONTINUE; } @@ -1921,7 +1921,7 @@ rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) str = rb_fstring_cstr(rb_enc_name(enc)); rb_ary_store(ary, idx, str); } - key = rb_interned_str_cstr((char *)name); + key = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_hash_aset(aliases, key, str); return ST_CONTINUE; } diff --git a/ext/io/console/console.c b/ext/io/console/console.c index 7ddaf071a8833a..2b0193bb90b631 100644 --- a/ext/io/console/console.c +++ b/ext/io/console/console.c @@ -84,9 +84,9 @@ getattr(int fd, conmode *t) static ID id_getc, id_close; static ID id_gets, id_flush, id_chomp_bang; -#ifndef HAVE_RB_INTERNED_STR_CSTR +#ifndef HAVE_RB_ENC_INTERNED_STR_CSTR # define rb_str_to_interned_str(str) rb_str_freeze(str) -# define rb_interned_str_cstr(str) rb_str_freeze(rb_usascii_str_new_cstr(str)) +# define rb_enc_interned_str_cstr(str, enc) rb_str_freeze(rb_usascii_str_new_cstr(str)) #endif #if defined HAVE_RUBY_FIBER_SCHEDULER_H @@ -1897,7 +1897,7 @@ console_ttyname(VALUE io) size_t size = sizeof(termname); int e; if (ttyname_r(fd, tn, size) == 0) - return rb_interned_str_cstr(tn); + return rb_enc_interned_str_cstr(tn, rb_usascii_encoding()); if ((e = errno) == ERANGE) { VALUE s = rb_str_new(0, size); while (1) { @@ -1921,7 +1921,7 @@ console_ttyname(VALUE io) int e = errno; rb_syserr_fail_str(e, rb_sprintf("ttyname(%d)", fd)); } - return rb_interned_str_cstr(tn); + return rb_enc_interned_str_cstr(tn, rb_usascii_encoding()); } # else # error No ttyname function diff --git a/ext/io/console/extconf.rb b/ext/io/console/extconf.rb index dd3d221ae51df3..e6254c9e90fe98 100644 --- a/ext/io/console/extconf.rb +++ b/ext/io/console/extconf.rb @@ -9,7 +9,7 @@ have_func("rb_syserr_new_str(0, Qnil)") or abort -have_func("rb_interned_str_cstr") +have_func("rb_enc_interned_str_cstr") have_func("rb_io_path", "ruby/io.h") have_func("rb_io_descriptor", "ruby/io.h") have_func("rb_io_get_write_io", "ruby/io.h") diff --git a/gc/default/default.c b/gc/default/default.c index 132dbcb5faed8b..14ad33298c18ff 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -9582,7 +9582,7 @@ rb_gc_impl_init(void) VALUE opts; /* \GC build options */ rb_define_const(rb_mGC, "OPTS", opts = rb_ary_new()); -#define OPT(o) if (o) rb_ary_push(opts, rb_interned_str(#o, sizeof(#o) - 1)) +#define OPT(o) if (o) rb_ary_push(opts, rb_enc_interned_str(#o, sizeof(#o) - 1, rb_usascii_encoding())) OPT(GC_DEBUG); OPT(USE_RGENGC); OPT(RGENGC_DEBUG); diff --git a/string.c b/string.c index 6f4ea03fb37a41..d564c2e2e1bf94 100644 --- a/string.c +++ b/string.c @@ -12709,7 +12709,7 @@ VALUE rb_interned_str(const char *ptr, long len) { struct RString fake_str = {RBASIC_INIT}; - return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), true, false); + return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_ASCII_8BIT), true, false); } VALUE From 8a586af33b59cae93a1bee13c39e87dd087a4a6b Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 15 Jan 2026 21:02:05 -0500 Subject: [PATCH 5/5] Don't force major GC when there are allocatable slots [Bug #21838] When we have allocatable slots, we can grow the heap instead of forcing a major GC. This prevents major GC to be ran very often in certain situations. See the ticket for more details. On ruby-bench, we can see that this patch doesn't cause any significant regressions: -------------- ----------- ---------- --------- ----------- ---------- --------- -------------- ------------- bench master (ms) stddev (%) RSS (MiB) branch (ms) stddev (%) RSS (MiB) branch 1st itr master/branch activerecord 148.2 0.3 59.2 150.0 0.8 69.7 1.015 0.988 chunky-png 435.2 0.3 72.9 438.8 0.1 66.7 0.993 0.992 erubi-rails 733.8 1.2 118.7 704.8 0.2 98.3 1.077 1.041 hexapdf 1400.4 1.1 247.0 1405.0 0.9 223.7 0.986 0.997 liquid-c 32.5 3.3 32.8 32.5 2.1 30.7 1.042 0.999 liquid-compile 31.0 1.7 35.1 33.4 3.9 32.8 0.938 0.928 liquid-render 84.7 0.4 30.8 86.3 0.4 30.8 0.981 0.982 lobsters 594.7 0.6 310.5 596.6 0.4 306.0 1.057 0.997 mail 75.6 2.8 53.3 76.9 0.7 53.2 0.968 0.982 psych-load 1122.8 1.2 29.2 1145.1 0.4 31.7 0.964 0.981 railsbench 1244.7 0.3 115.5 1254.8 1.1 115.2 0.939 0.992 rubocop 103.7 0.5 94.1 104.3 0.5 92.4 0.985 0.994 ruby-lsp 88.3 0.6 78.5 88.5 1.2 77.9 0.992 0.997 sequel 26.9 0.9 33.6 28.3 1.4 32.1 0.954 0.952 shipit 1119.3 1.5 171.4 1075.7 2.1 162.5 1.873 1.040 -------------- ----------- ---------- --------- ----------- ---------- --------- -------------- ------------- --- gc/default/default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index 14ad33298c18ff..45c0d3e2552fef 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -3810,7 +3810,7 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap_allocatable_slots_expand(objspace, heap, swept_slots, heap->total_slots); } } - else { + else if (objspace->heap_pages.allocatable_slots < (min_free_slots - swept_slots)) { gc_needs_major_flags |= GPR_FLAG_MAJOR_BY_NOFREE; heap->force_major_gc_count++; }