From 74887a2c121bef639b7aae8d81d1e758994b5062 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 29 Jul 2025 16:50:58 -0400 Subject: [PATCH 01/12] [ruby/mmtk] Skip weak references that are special consts If a reference marked weak becomes a special const, it will crash because it is not a GC handled object. We should skip special consts here. https://github.com/ruby/mmtk/commit/870a79426b --- gc/mmtk/mmtk.c | 9 +++++++++ gc/mmtk/mmtk.h | 1 + gc/mmtk/src/abi.rs | 1 + gc/mmtk/src/weak_proc.rs | 4 ++++ 4 files changed, 15 insertions(+) diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index c318c6fe48cfd8..1c7d2d94559e91 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -342,6 +342,14 @@ rb_mmtk_update_global_tables(int table) rb_gc_vm_weak_table_foreach(rb_mmtk_update_table_i, NULL, NULL, true, (enum rb_gc_vm_weak_tables)table); } +static bool +rb_mmtk_special_const_p(MMTk_ObjectReference object) +{ + VALUE obj = (VALUE)object; + + return RB_SPECIAL_CONST_P(obj); +} + // Bootup MMTk_RubyUpcalls ruby_upcalls = { rb_mmtk_init_gc_worker_thread, @@ -360,6 +368,7 @@ MMTk_RubyUpcalls ruby_upcalls = { rb_mmtk_update_global_tables, rb_mmtk_global_tables_count, rb_mmtk_update_finalizer_table, + rb_mmtk_special_const_p, }; // Use max 80% of the available memory by default for MMTk diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h index 72b4d9df0377fe..b00133a820c546 100644 --- a/gc/mmtk/mmtk.h +++ b/gc/mmtk/mmtk.h @@ -68,6 +68,7 @@ typedef struct MMTk_RubyUpcalls { void (*update_global_tables)(int tbl_idx); int (*global_tables_count)(void); void (*update_finalizer_table)(void); + bool (*special_const_p)(MMTk_ObjectReference object); } MMTk_RubyUpcalls; typedef struct MMTk_RawVecOfObjRef { diff --git a/gc/mmtk/src/abi.rs b/gc/mmtk/src/abi.rs index 81e24679f051db..2214441c9d6db7 100644 --- a/gc/mmtk/src/abi.rs +++ b/gc/mmtk/src/abi.rs @@ -313,6 +313,7 @@ pub struct RubyUpcalls { pub update_global_tables: extern "C" fn(tbl_idx: c_int), pub global_tables_count: extern "C" fn() -> c_int, pub update_finalizer_table: extern "C" fn(), + pub special_const_p: extern "C" fn(object: ObjectReference) -> bool, } unsafe impl Sync for RubyUpcalls {} diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs index 0217673e36b842..68d3dd3273a135 100644 --- a/gc/mmtk/src/weak_proc.rs +++ b/gc/mmtk/src/weak_proc.rs @@ -134,6 +134,10 @@ impl GCWork for ProcessWeakReferences { .expect("Mutators should not be holding the lock."); for ptr_ptr in weak_references.iter_mut() { + if (upcalls().special_const_p)(**ptr_ptr) { + continue; + } + if !(**ptr_ptr).is_reachable() { **ptr_ptr = crate::binding().weak_reference_dead_value; } From 4263c49d1ce61a4e8d1a84cc144d4185ccf935f9 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 30 Jul 2025 17:33:25 +0100 Subject: [PATCH 02/12] YJIT: Remove a dead test for getinlinecaches (#14033) The test was added in #5221 4 years ago but: 1. The insn it targets was removed in 2022 in #6187 2. The YJIT API `blocks_for` seems to be dropped in 2022 when it switched to use Rust in #5826 So this test has not been run in more than 3 years and can't be run anymore. I think we can remove it. --- bootstraptest/test_yjit.rb | 85 -------------------------------------- 1 file changed, 85 deletions(-) diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index d480369c759119..3e3936942d67bb 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -468,91 +468,6 @@ def getter end } -assert_equal '0', %q{ - # This is a regression test for incomplete invalidation from - # opt_setinlinecache. This test might be brittle, so - # feel free to remove it in the future if it's too annoying. - # This test assumes --yjit-call-threshold=2. - module M - Foo = 1 - def foo - Foo - end - - def pin_self_type_then_foo - _ = @foo - foo - end - - def only_ints - 1 + self - foo - end - end - - class Integer - include M - end - - class Sub - include M - end - - foo_method = M.instance_method(:foo) - - dbg = ->(message) do - return # comment this out to get printouts - - $stderr.puts RubyVM::YJIT.disasm(foo_method) - $stderr.puts message - end - - 2.times { 42.only_ints } - - dbg["There should be two versions of getinlineache"] - - module M - remove_const(:Foo) - end - - dbg["There should be no getinlinecaches"] - - 2.times do - 42.only_ints - rescue NameError => err - _ = "caught name error #{err}" - end - - dbg["There should be one version of getinlineache"] - - 2.times do - Sub.new.pin_self_type_then_foo - rescue NameError - _ = 'second specialization' - end - - dbg["There should be two versions of getinlineache"] - - module M - Foo = 1 - end - - dbg["There should still be two versions of getinlineache"] - - 42.only_ints - - dbg["There should be no getinlinecaches"] - - # Find name of the first VM instruction in M#foo. - insns = RubyVM::InstructionSequence.of(foo_method).to_a - if defined?(RubyVM::YJIT.blocks_for) && (insns.last.find { Array === _1 }&.first == :opt_getinlinecache) - RubyVM::YJIT.blocks_for(RubyVM::InstructionSequence.of(foo_method)) - .filter { _1.iseq_start_index == 0 }.count - else - 0 # skip the test - end -} - # Check that frozen objects are respected assert_equal 'great', %q{ class Foo From 2cd10de33097d44f85a42bcde0f8bf17c90cd53a Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 30 Jul 2025 10:11:10 -0700 Subject: [PATCH 03/12] ZJIT: Prepare for sharing JIT hooks with ZJIT (#14044) --- array.rb | 2 +- common.mk | 3 +- depend | 6 ++- inits.c | 7 +-- internal/cmdlineopt.h | 6 +-- jit_hook.rb | 13 ++++++ jit_undef.rb | 4 ++ kernel.rb | 10 ---- numeric.rb | 2 +- ruby.c | 19 ++++---- test/ruby/test_zjit.rb | 7 +++ vm.c | 13 ++++-- vm_method.c | 2 +- yjit.rb | 14 +++--- yjit/src/options.rs | 2 +- yjit/src/yjit.rs | 2 +- yjit_hook.rb | 4 -- zjit/src/cruby.rs | 5 +- zjit/src/options.rs | 103 +++++++++++++++++++++++------------------ zjit/src/state.rs | 34 ++++++-------- 20 files changed, 141 insertions(+), 117 deletions(-) create mode 100644 jit_hook.rb create mode 100644 jit_undef.rb delete mode 100644 yjit_hook.rb diff --git a/array.rb b/array.rb index 5f31693cabf2de..03663dbb0b7ff1 100644 --- a/array.rb +++ b/array.rb @@ -212,7 +212,7 @@ def fetch_values(*indexes, &block) indexes end - with_yjit do + with_jit do if Primitive.rb_builtin_basic_definition_p(:each) undef :each diff --git a/common.mk b/common.mk index 4133f90aa80130..b5a4526ccce8f5 100644 --- a/common.mk +++ b/common.mk @@ -1236,8 +1236,9 @@ BUILTIN_RB_SRCS = \ $(srcdir)/nilclass.rb \ $(srcdir)/prelude.rb \ $(srcdir)/gem_prelude.rb \ + $(srcdir)/jit_hook.rb \ + $(srcdir)/jit_undef.rb \ $(srcdir)/yjit.rb \ - $(srcdir)/yjit_hook.rb \ $(srcdir)/zjit.rb \ $(empty) BUILTIN_RB_INCS = $(BUILTIN_RB_SRCS:.rb=.rbinc) diff --git a/depend b/depend index df0ae1e610b1c3..ec8c2771c92104 100644 --- a/depend +++ b/depend @@ -9196,6 +9196,8 @@ miniinit.$(OBJEXT): {$(VPATH)}internal/warning_push.h miniinit.$(OBJEXT): {$(VPATH)}internal/xmalloc.h miniinit.$(OBJEXT): {$(VPATH)}io.rb miniinit.$(OBJEXT): {$(VPATH)}iseq.h +miniinit.$(OBJEXT): {$(VPATH)}jit_hook.rb +miniinit.$(OBJEXT): {$(VPATH)}jit_undef.rb miniinit.$(OBJEXT): {$(VPATH)}kernel.rb miniinit.$(OBJEXT): {$(VPATH)}marshal.rb miniinit.$(OBJEXT): {$(VPATH)}method.h @@ -9232,7 +9234,6 @@ miniinit.$(OBJEXT): {$(VPATH)}vm_core.h miniinit.$(OBJEXT): {$(VPATH)}vm_opts.h miniinit.$(OBJEXT): {$(VPATH)}warning.rb miniinit.$(OBJEXT): {$(VPATH)}yjit.rb -miniinit.$(OBJEXT): {$(VPATH)}yjit_hook.rb miniinit.$(OBJEXT): {$(VPATH)}zjit.rb namespace.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h namespace.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h @@ -18755,6 +18756,8 @@ vm.$(OBJEXT): {$(VPATH)}internal/variable.h vm.$(OBJEXT): {$(VPATH)}internal/warning_push.h vm.$(OBJEXT): {$(VPATH)}internal/xmalloc.h vm.$(OBJEXT): {$(VPATH)}iseq.h +vm.$(OBJEXT): {$(VPATH)}jit_hook.rbinc +vm.$(OBJEXT): {$(VPATH)}jit_undef.rbinc vm.$(OBJEXT): {$(VPATH)}method.h vm.$(OBJEXT): {$(VPATH)}missing.h vm.$(OBJEXT): {$(VPATH)}node.h @@ -18797,7 +18800,6 @@ vm.$(OBJEXT): {$(VPATH)}vm_opts.h vm.$(OBJEXT): {$(VPATH)}vm_sync.h vm.$(OBJEXT): {$(VPATH)}vmtc.inc vm.$(OBJEXT): {$(VPATH)}yjit.h -vm.$(OBJEXT): {$(VPATH)}yjit_hook.rbinc vm.$(OBJEXT): {$(VPATH)}zjit.h vm_backtrace.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h vm_backtrace.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h diff --git a/inits.c b/inits.c index b4e58ea25a1cec..e0dab9e890fbd8 100644 --- a/inits.c +++ b/inits.c @@ -88,8 +88,10 @@ void rb_call_builtin_inits(void) { #define BUILTIN(n) CALL(builtin_##n) - BUILTIN(kernel); + BUILTIN(jit_hook); BUILTIN(yjit); + BUILTIN(zjit); + BUILTIN(kernel); BUILTIN(gc); BUILTIN(ractor); BUILTIN(numeric); @@ -107,8 +109,7 @@ rb_call_builtin_inits(void) BUILTIN(thread_sync); BUILTIN(nilclass); BUILTIN(marshal); - BUILTIN(zjit); - BUILTIN(yjit_hook); + BUILTIN(jit_undef); Init_builtin_prelude(); } #undef CALL diff --git a/internal/cmdlineopt.h b/internal/cmdlineopt.h index 667fd6df2e976f..aed209e2a21f19 100644 --- a/internal/cmdlineopt.h +++ b/internal/cmdlineopt.h @@ -23,9 +23,6 @@ typedef struct ruby_cmdline_options { ruby_features_t warn; unsigned int dump; long backtrace_length_limit; -#if USE_ZJIT - void *zjit; -#endif const char *crash_report; @@ -42,6 +39,9 @@ typedef struct ruby_cmdline_options { #if USE_YJIT unsigned int yjit: 1; #endif +#if USE_ZJIT + unsigned int zjit: 1; +#endif } ruby_cmdline_options_t; struct ruby_opt_message { diff --git a/jit_hook.rb b/jit_hook.rb new file mode 100644 index 00000000000000..487361c049ed32 --- /dev/null +++ b/jit_hook.rb @@ -0,0 +1,13 @@ +class Module + # Internal helper for built-in initializations to define methods only when JIT is enabled. + # This method is removed in jit_undef.rb. + private def with_jit(&block) # :nodoc: + # ZJIT currently doesn't compile Array#each properly, so it's disabled for now. + if defined?(RubyVM::ZJIT) && Primitive.rb_zjit_option_enabled_p && false # TODO: remove `&& false` (Shopify/ruby#667) + # We don't support lazily enabling ZJIT yet, so we can call the block right away. + block.call + elsif defined?(RubyVM::YJIT) + RubyVM::YJIT.send(:add_jit_hook, block) + end + end +end diff --git a/jit_undef.rb b/jit_undef.rb new file mode 100644 index 00000000000000..0e855fe7a23077 --- /dev/null +++ b/jit_undef.rb @@ -0,0 +1,4 @@ +# Remove the helper defined in jit_hook.rb +class Module + undef :with_jit +end diff --git a/kernel.rb b/kernel.rb index 554de49977c5e3..888ef0c531d7e6 100644 --- a/kernel.rb +++ b/kernel.rb @@ -291,13 +291,3 @@ def Integer(arg, base = 0, exception: true) end end end - -class Module - # Internal helper for built-in initializations to define methods only when YJIT is enabled. - # This method is removed in yjit_hook.rb. - private def with_yjit(&block) # :nodoc: - if defined?(RubyVM::YJIT) - RubyVM::YJIT.send(:add_yjit_hook, block) - end - end -end diff --git a/numeric.rb b/numeric.rb index 27e9951fd3bd8d..552a3dd687aedc 100644 --- a/numeric.rb +++ b/numeric.rb @@ -322,7 +322,7 @@ def denominator 1 end - with_yjit do + with_jit do if Primitive.rb_builtin_basic_definition_p(:downto) undef :downto diff --git a/ruby.c b/ruby.c index cc67b0b25db8f8..a01e3d8afa9524 100644 --- a/ruby.c +++ b/ruby.c @@ -1196,14 +1196,12 @@ setup_yjit_options(const char *s) #if USE_ZJIT static void -setup_zjit_options(ruby_cmdline_options_t *opt, const char *s) +setup_zjit_options(const char *s) { // The option parsing is done in zjit/src/options.rs - extern void *rb_zjit_init_options(void); - extern bool rb_zjit_parse_option(void *options, const char *s); + extern bool rb_zjit_parse_option(const char *s); - if (!opt->zjit) opt->zjit = rb_zjit_init_options(); - if (!rb_zjit_parse_option(opt->zjit, s)) { + if (!rb_zjit_parse_option(s)) { rb_raise(rb_eRuntimeError, "invalid ZJIT option '%s' (--help will show valid zjit options)", s); } } @@ -1481,7 +1479,7 @@ proc_long_options(ruby_cmdline_options_t *opt, const char *s, long argc, char ** else if (is_option_with_optarg("zjit", '-', true, false, false)) { #if USE_ZJIT FEATURE_SET(opt->features, FEATURE_BIT(zjit)); - setup_zjit_options(opt, s); + setup_zjit_options(s); #else rb_warn("Ruby was built without ZJIT support." " You may need to install rustc to build Ruby with ZJIT."); @@ -1828,8 +1826,8 @@ ruby_opt_init(ruby_cmdline_options_t *opt) #endif #if USE_ZJIT if (opt->zjit) { - extern void rb_zjit_init(void *options); - rb_zjit_init(opt->zjit); + extern void rb_zjit_init(void); + rb_zjit_init(); } #endif @@ -2370,8 +2368,9 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt) #endif #if USE_ZJIT if (FEATURE_SET_P(opt->features, zjit) && !opt->zjit) { - extern void *rb_zjit_init_options(void); - opt->zjit = rb_zjit_init_options(); + extern void rb_zjit_prepare_options(void); + rb_zjit_prepare_options(); + opt->zjit = true; } #endif diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index b78d53e682233c..2d18759f50fbcb 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1084,6 +1084,13 @@ def test_require_rubygems_with_auto_compact }, call_threshold: 2 end + def test_zjit_option_uses_array_each_in_ruby + omit 'ZJIT wrongly compiles Array#each, so it is disabled for now' + assert_runs '""', %q{ + Array.instance_method(:each).source_location&.first + } + end + def test_profile_under_nested_jit_call assert_compiles '[nil, nil, 3]', %q{ def profile diff --git a/vm.c b/vm.c index da5a51d25b73ba..c46efafb0db0c1 100644 --- a/vm.c +++ b/vm.c @@ -4509,14 +4509,21 @@ Init_vm_objects(void) vm->cc_refinement_table = rb_set_init_numtable(); } +#if USE_ZJIT +extern VALUE rb_zjit_option_enabled_p(rb_execution_context_t *ec, VALUE self); +#else +static VALUE rb_zjit_option_enabled_p(rb_execution_context_t *ec, VALUE self) { return Qfalse; } +#endif + +// Whether JIT is enabled or not, we need to load/undef `#with_jit` for other builtins. +#include "jit_hook.rbinc" +#include "jit_undef.rbinc" + // Stub for builtin function when not building YJIT units #if !USE_YJIT void Init_builtin_yjit(void) {} #endif -// Whether YJIT is enabled or not, we load yjit_hook.rb to remove Kernel#with_yjit. -#include "yjit_hook.rbinc" - // Stub for builtin function when not building ZJIT units #if !USE_ZJIT void Init_builtin_zjit(void) {} diff --git a/vm_method.c b/vm_method.c index fa81d56c74119d..faf327b36c7283 100644 --- a/vm_method.c +++ b/vm_method.c @@ -775,7 +775,7 @@ rb_method_definition_set(const rb_method_entry_t *me, rb_method_definition_t *de /* setup iseq first (before invoking GC) */ RB_OBJ_WRITE(me, &def->body.iseq.iseqptr, iseq); - // Methods defined in `with_yjit` should be considered METHOD_ENTRY_BASIC + // Methods defined in `with_jit` should be considered METHOD_ENTRY_BASIC if (rb_iseq_attr_p(iseq, BUILTIN_ATTR_C_TRACE)) { METHOD_ENTRY_BASIC_SET((rb_method_entry_t *)me, TRUE); } diff --git a/yjit.rb b/yjit.rb index e4fafa729eea75..1655529b5ee7f5 100644 --- a/yjit.rb +++ b/yjit.rb @@ -264,23 +264,23 @@ def self.simulate_oom! # :nodoc: end # Blocks that are called when YJIT is enabled - @yjit_hooks = [] + @jit_hooks = [] class << self # :stopdoc: private # Register a block to be called when YJIT is enabled - def add_yjit_hook(hook) - @yjit_hooks << hook + def add_jit_hook(hook) + @jit_hooks << hook end - # Run YJIT hooks registered by RubyVM::YJIT.with_yjit - def call_yjit_hooks + # Run YJIT hooks registered by `#with_jit` + def call_jit_hooks # Skip using builtin methods in Ruby if --yjit-c-builtin is given return if Primitive.yjit_c_builtin_p - @yjit_hooks.each(&:call) - @yjit_hooks.clear + @jit_hooks.each(&:call) + @jit_hooks.clear end # Print stats and dump exit locations diff --git a/yjit/src/options.rs b/yjit/src/options.rs index 9f7c70536966a8..c87a436091279f 100644 --- a/yjit/src/options.rs +++ b/yjit/src/options.rs @@ -46,7 +46,7 @@ pub struct Options { // The number of registers allocated for stack temps pub num_temp_regs: usize, - // Disable Ruby builtin methods defined by `with_yjit` hooks, e.g. Array#each in Ruby + // Disable Ruby builtin methods defined by `with_jit` hooks, e.g. Array#each in Ruby pub c_builtin: bool, // Capture stats diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs index 8df1163d64b725..517a0daae5b9e2 100644 --- a/yjit/src/yjit.rs +++ b/yjit/src/yjit.rs @@ -57,7 +57,7 @@ fn yjit_init() { // Call YJIT hooks before enabling YJIT to avoid compiling the hooks themselves unsafe { let yjit = rb_const_get(rb_cRubyVM, rust_str_to_id("YJIT")); - rb_funcall(yjit, rust_str_to_id("call_yjit_hooks"), 0); + rb_funcall(yjit, rust_str_to_id("call_jit_hooks"), 0); } // Catch panics to avoid UB for unwinding into C frames. diff --git a/yjit_hook.rb b/yjit_hook.rb deleted file mode 100644 index 610a7be3303e2c..00000000000000 --- a/yjit_hook.rb +++ /dev/null @@ -1,4 +0,0 @@ -# Remove the helper defined in kernel.rb -class Module - undef :with_yjit -end diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 56e342cba03001..1d4ba4c9c34877 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -957,7 +957,7 @@ pub use manual_defs::*; pub mod test_utils { use std::{ptr::null, sync::Once}; - use crate::{options::init_options, state::rb_zjit_enabled_p, state::ZJITState}; + use crate::{options::rb_zjit_prepare_options, state::rb_zjit_enabled_p, state::ZJITState}; use super::*; @@ -979,6 +979,7 @@ pub mod test_utils { // , though let mut var: VALUE = Qnil; ruby_init_stack(&mut var as *mut VALUE as *mut _); + rb_zjit_prepare_options(); // enable `#with_jit` on builtins ruby_init(); // Pass command line options so the VM loads core library methods defined in @@ -994,7 +995,7 @@ pub mod test_utils { } // Set up globals for convenience - ZJITState::init(init_options()); + ZJITState::init(); // Enable zjit_* instructions unsafe { rb_zjit_enabled_p = true; } diff --git a/zjit/src/options.rs b/zjit/src/options.rs index bb26cc2deeb27e..340812f089acf3 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -16,9 +16,9 @@ pub static mut rb_zjit_profile_threshold: u64 = 1; #[allow(non_upper_case_globals)] pub static mut rb_zjit_call_threshold: u64 = 2; -/// True if --zjit-stats is enabled. -#[allow(non_upper_case_globals)] -static mut zjit_stats_enabled_p: bool = false; +/// ZJIT command-line options. This is set before rb_zjit_init() sets +/// ZJITState so that we can query some options while loading builtins. +pub static mut OPTIONS: Option = None; #[derive(Clone, Debug)] pub struct Options { @@ -53,19 +53,20 @@ pub struct Options { pub log_compiled_iseqs: Option, } -/// Return an Options with default values -pub fn init_options() -> Options { - Options { - num_profiles: 1, - stats: false, - debug: false, - dump_hir_init: None, - dump_hir_opt: None, - dump_lir: false, - dump_disasm: false, - perf: false, - allowed_iseqs: None, - log_compiled_iseqs: None, +impl Default for Options { + fn default() -> Self { + Options { + num_profiles: 1, + stats: false, + debug: false, + dump_hir_init: None, + dump_hir_opt: None, + dump_lir: false, + dump_disasm: false, + perf: false, + allowed_iseqs: None, + log_compiled_iseqs: None, + } } } @@ -95,28 +96,26 @@ macro_rules! get_option { // Unsafe is ok here because options are initialized // once before any Ruby code executes ($option_name:ident) => { - { - use crate::state::ZJITState; - ZJITState::get_options().$option_name - } + unsafe { crate::options::OPTIONS.as_ref() }.unwrap().$option_name }; } pub(crate) use get_option; -/// Allocate Options on the heap, initialize it, and return the address of it. -/// The return value will be modified by rb_zjit_parse_option() and then -/// passed to rb_zjit_init() for initialization. +/// Set default values to ZJIT options. Setting Some to OPTIONS will make `#with_jit` +/// enable the JIT hook while not enabling compilation yet. #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_init_options() -> *const u8 { - let options = init_options(); - Box::into_raw(Box::new(options)) as *const u8 +pub extern "C" fn rb_zjit_prepare_options() { + // rb_zjit_prepare_options() could be called for feature flags or $RUBY_ZJIT_ENABLE + // after rb_zjit_parse_option() is called, so we need to handle the already-initialized case. + if unsafe { OPTIONS.is_none() } { + unsafe { OPTIONS = Some(Options::default()); } + } } /// Parse a --zjit* command-line flag #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_parse_option(options: *const u8, str_ptr: *const c_char) -> bool { - let options = unsafe { &mut *(options as *mut Options) }; - parse_option(options, str_ptr).is_some() +pub extern "C" fn rb_zjit_parse_option(str_ptr: *const c_char) -> bool { + parse_option(str_ptr).is_some() } fn parse_jit_list(path_like: &str) -> HashSet { @@ -142,7 +141,10 @@ fn parse_jit_list(path_like: &str) -> HashSet { /// Expected to receive what comes after the third dash in "--zjit-*". /// Empty string means user passed only "--zjit". C code rejects when /// they pass exact "--zjit-". -fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> Option<()> { +fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { + rb_zjit_prepare_options(); + let options = unsafe { OPTIONS.as_mut().unwrap() }; + let c_str: &CStr = unsafe { CStr::from_ptr(str_ptr) }; let opt_str: &str = c_str.to_str().ok()?; @@ -161,7 +163,7 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> ("call-threshold", _) => match opt_val.parse() { Ok(n) => { unsafe { rb_zjit_call_threshold = n; } - update_profile_threshold(options); + update_profile_threshold(); }, Err(_) => return None, }, @@ -169,13 +171,12 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> ("num-profiles", _) => match opt_val.parse() { Ok(n) => { options.num_profiles = n; - update_profile_threshold(options); + update_profile_threshold(); }, Err(_) => return None, }, ("stats", "") => { - unsafe { zjit_stats_enabled_p = true; } options.stats = true; } @@ -217,15 +218,14 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> } /// Update rb_zjit_profile_threshold based on rb_zjit_call_threshold and options.num_profiles -fn update_profile_threshold(options: &Options) { - unsafe { - if rb_zjit_call_threshold == 1 { - // If --zjit-call-threshold=1, never rewrite ISEQs to profile instructions. - rb_zjit_profile_threshold = 0; - } else { - // Otherwise, profile instructions at least once. - rb_zjit_profile_threshold = rb_zjit_call_threshold.saturating_sub(options.num_profiles as u64).max(1); - } +fn update_profile_threshold() { + if unsafe { rb_zjit_call_threshold == 1 } { + // If --zjit-call-threshold=1, never rewrite ISEQs to profile instructions. + unsafe { rb_zjit_profile_threshold = 0; } + } else { + // Otherwise, profile instructions at least once. + let num_profiles = get_option!(num_profiles) as u64; + unsafe { rb_zjit_profile_threshold = rb_zjit_call_threshold.saturating_sub(num_profiles).max(1) }; } } @@ -254,12 +254,23 @@ macro_rules! debug { } pub(crate) use debug; -/// Return Qtrue if --zjit-stats has been enabled +/// Return Qtrue if --zjit* has been specified. For the `#with_jit` hook, +/// this becomes Qtrue before ZJIT is actually initialized and enabled. +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_option_enabled_p(_ec: EcPtr, _self: VALUE) -> VALUE { + // If any --zjit* option is specified, OPTIONS becomes Some. + if unsafe { OPTIONS.is_some() } { + Qtrue + } else { + Qfalse + } +} + +/// Return Qtrue if --zjit-stats has been specified. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_stats_enabled_p(_ec: EcPtr, _self: VALUE) -> VALUE { - // ZJITState is not initialized yet when loading builtins, so this relies - // on a separate global variable. - if unsafe { zjit_stats_enabled_p } { + // Builtin zjit.rb calls this even if ZJIT is disabled, so OPTIONS may not be set. + if unsafe { OPTIONS.as_ref() }.map_or(false, |opts| opts.stats) { Qtrue } else { Qfalse diff --git a/zjit/src/state.rs b/zjit/src/state.rs index ee7cd15d5fb9b1..cd39e07c57aa42 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,8 +1,8 @@ use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE}; use crate::cruby_methods; use crate::invariants::Invariants; -use crate::options::Options; use crate::asm::CodeBlock; +use crate::options::get_option; use crate::stats::Counters; #[allow(non_upper_case_globals)] @@ -19,9 +19,6 @@ pub struct ZJITState { /// Inline code block (fast path) code_block: CodeBlock, - /// ZJIT command-line options - options: Options, - /// ZJIT statistics counters: Counters, @@ -39,11 +36,12 @@ pub struct ZJITState { static mut ZJIT_STATE: Option = None; impl ZJITState { - /// Initialize the ZJIT globals, given options allocated by rb_zjit_init_options() - pub fn init(options: Options) { + /// Initialize the ZJIT globals + pub fn init() { #[cfg(not(test))] let cb = { use crate::cruby::*; + use crate::options::*; let exec_mem_size: usize = 64 * 1024 * 1024; // TODO: implement the option let virt_block: *mut u8 = unsafe { rb_zjit_reserve_addr_space(64 * 1024 * 1024) }; @@ -75,7 +73,7 @@ impl ZJITState { ); let mem_block = Rc::new(RefCell::new(mem_block)); - CodeBlock::new(mem_block.clone(), options.dump_disasm) + CodeBlock::new(mem_block.clone(), get_option!(dump_disasm)) }; #[cfg(test)] let cb = CodeBlock::new_dummy(); @@ -83,7 +81,6 @@ impl ZJITState { // Initialize the codegen globals instance let zjit_state = ZJITState { code_block: cb, - options, counters: Counters::default(), invariants: Invariants::default(), assert_compiles: false, @@ -107,11 +104,6 @@ impl ZJITState { &mut ZJITState::get_instance().code_block } - /// Get a mutable reference to the options - pub fn get_options() -> &'static mut Options { - &mut ZJITState::get_instance().options - } - /// Get a mutable reference to the invariants pub fn get_invariants() -> &'static mut Invariants { &mut ZJITState::get_instance().invariants @@ -139,13 +131,13 @@ impl ZJITState { /// Was --zjit-save-compiled-iseqs specified? pub fn should_log_compiled_iseqs() -> bool { - ZJITState::get_instance().options.log_compiled_iseqs.is_some() + get_option!(log_compiled_iseqs).is_some() } /// Log the name of a compiled ISEQ to the file specified in options.log_compiled_iseqs pub fn log_compile(iseq_name: String) { assert!(ZJITState::should_log_compiled_iseqs()); - let filename = ZJITState::get_instance().options.log_compiled_iseqs.as_ref().unwrap(); + let filename = get_option!(log_compiled_iseqs).as_ref().unwrap(); use std::io::Write; let mut file = match std::fs::OpenOptions::new().create(true).append(true).open(filename) { Ok(f) => f, @@ -161,7 +153,7 @@ impl ZJITState { /// Check if we are allowed to compile a given ISEQ based on --zjit-allowed-iseqs pub fn can_compile_iseq(iseq: cruby::IseqPtr) -> bool { - if let Some(ref allowed_iseqs) = ZJITState::get_instance().options.allowed_iseqs { + if let Some(ref allowed_iseqs) = get_option!(allowed_iseqs) { let name = cruby::iseq_get_location(iseq, 0); allowed_iseqs.contains(&name) } else { @@ -170,17 +162,17 @@ impl ZJITState { } } -/// Initialize ZJIT, given options allocated by rb_zjit_init_options() +/// Initialize ZJIT #[unsafe(no_mangle)] -pub extern "C" fn rb_zjit_init(options: *const u8) { +pub extern "C" fn rb_zjit_init() { // Catch panics to avoid UB for unwinding into C frames. // See https://doc.rust-lang.org/nomicon/exception-safety.html let result = std::panic::catch_unwind(|| { + // Initialize ZJIT states cruby::ids::init(); + ZJITState::init(); - let options = unsafe { Box::from_raw(options as *mut Options) }; - ZJITState::init(*options); - + // Install a panic hook for ZJIT rb_bug_panic_hook(); // Discard the instruction count for boot which we never compile From 0aac763bf01ee3a85550d5a7651edc297b48f474 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 29 Jul 2025 12:17:49 -0700 Subject: [PATCH 04/12] Convert cross_ractor_requires to DECL_MARKING --- ractor.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ractor.c b/ractor.c index 12ffced0a3546f..0f4da1599a05c3 100644 --- a/ractor.c +++ b/ractor.c @@ -2267,26 +2267,24 @@ struct cross_ractor_require { bool silent; }; -static void -cross_ractor_require_mark(void *ptr) -{ - struct cross_ractor_require *crr = (struct cross_ractor_require *)ptr; - rb_gc_mark(crr->port); - rb_gc_mark(crr->result); - rb_gc_mark(crr->exception); - rb_gc_mark(crr->feature); - rb_gc_mark(crr->module); -} +RUBY_REFERENCES(cross_ractor_require_refs) = { + RUBY_REF_EDGE(struct cross_ractor_require, port), + RUBY_REF_EDGE(struct cross_ractor_require, result), + RUBY_REF_EDGE(struct cross_ractor_require, exception), + RUBY_REF_EDGE(struct cross_ractor_require, feature), + RUBY_REF_EDGE(struct cross_ractor_require, module), + RUBY_REF_END +}; static const rb_data_type_t cross_ractor_require_data_type = { "ractor/cross_ractor_require", { - cross_ractor_require_mark, + RUBY_REFS_LIST_PTR(cross_ractor_require_refs), RUBY_DEFAULT_FREE, NULL, // memsize NULL, // compact }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_DECL_MARKING }; static VALUE From 9a3055479640c8b421621c34946288a3c8c49c87 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 11:49:51 -0400 Subject: [PATCH 05/12] ZJIT: Remove unused ArraySet instruction --- zjit/src/hir.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 06c00e3d99a10b..524de334f8cb00 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -462,7 +462,6 @@ pub enum Insn { /// NewHash contains a vec of (key, value) pairs NewHash { elements: Vec<(InsnId,InsnId)>, state: InsnId }, NewRange { low: InsnId, high: InsnId, flag: RangeType, state: InsnId }, - ArraySet { array: InsnId, idx: usize, val: InsnId }, ArrayDup { val: InsnId, state: InsnId }, ArrayMax { elements: Vec, state: InsnId }, /// Extend `left` with the elements from `right`. `left` and `right` must both be `Array`. @@ -575,7 +574,7 @@ impl Insn { /// Not every instruction returns a value. Return true if the instruction does and false otherwise. pub fn has_output(&self) -> bool { match self { - Insn::ArraySet { .. } | Insn::Jump(_) + Insn::Jump(_) | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } @@ -673,7 +672,6 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) } - Insn::ArraySet { array, idx, val } => { write!(f, "ArraySet {array}, {idx}, {val}") } Insn::ArrayDup { val, .. } => { write!(f, "ArrayDup {val}") } Insn::HashDup { val, .. } => { write!(f, "HashDup {val}") } Insn::StringCopy { val, .. } => { write!(f, "StringCopy {val}") } @@ -1172,7 +1170,6 @@ impl Function { state: *state, }, InvokeBuiltin { bf, args, state } => InvokeBuiltin { bf: *bf, args: find_vec!(*args), state: *state }, - ArraySet { array, idx, val } => ArraySet { array: find!(*array), idx: *idx, val: find!(*val) }, ArrayDup { val , state } => ArrayDup { val: find!(*val), state: *state }, &HashDup { val , state } => HashDup { val: find!(val), state }, &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, @@ -1219,7 +1216,7 @@ impl Function { assert!(self.insns[insn.0].has_output()); match &self.insns[insn.0] { Insn::Param { .. } => unimplemented!("params should not be present in block.insns"), - Insn::SetGlobal { .. } | Insn::ArraySet { .. } | Insn::Jump(_) + Insn::SetGlobal { .. } | Insn::Jump(_) | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) => @@ -1881,10 +1878,6 @@ impl Function { worklist.push_back(val); worklist.push_back(state); } - &Insn::ArraySet { array, val, .. } => { - worklist.push_back(array); - worklist.push_back(val); - } &Insn::Snapshot { ref state } => { worklist.extend(&state.stack); worklist.extend(&state.locals); From 7b10dbd55f4b00fc4268e031a790306c12485b99 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 11:54:30 -0400 Subject: [PATCH 06/12] ZJIT: Remove catch-all case to make it clearer what's unimplemented --- zjit/src/codegen.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 23b2dc2bd4192d..5fef220a4a43b7 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -367,7 +367,22 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v))?, &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), - _ => { + Insn::ArrayExtend { .. } + | Insn::ArrayMax { .. } + | Insn::ArrayPush { .. } + | Insn::DefinedIvar { .. } + | Insn::FixnumDiv { .. } + | Insn::FixnumMod { .. } + | Insn::HashDup { .. } + | Insn::NewHash { .. } + | Insn::ObjToString { .. } + | Insn::Send { .. } + | Insn::StringIntern { .. } + | Insn::Throw { .. } + | Insn::ToArray { .. } + | Insn::ToNewArray { .. } + | Insn::Const { .. } + => { debug!("ZJIT: gen_function: unexpected insn {insn}"); return None; } From 096d48d7db1b1099dda4ccb05be401c97368d64b Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 11:58:31 -0400 Subject: [PATCH 07/12] ZJIT: Deref struct in find() --- zjit/src/hir.rs | 108 ++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 524de334f8cb00..3abe3baa74d8f9 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1100,82 +1100,82 @@ impl Function { | GetLocal {..} | SideExit {..} | IncrCounter(_)) => result.clone(), - Snapshot { state: FrameState { iseq, insn_idx, pc, stack, locals } } => + &Snapshot { state: FrameState { iseq, insn_idx, pc, ref stack, ref locals } } => Snapshot { state: FrameState { - iseq: *iseq, - insn_idx: *insn_idx, - pc: *pc, + iseq, + insn_idx, + pc, stack: find_vec!(stack), locals: find_vec!(locals), } }, - Return { val } => Return { val: find!(*val) }, + &Return { val } => Return { val: find!(val) }, &Throw { throw_state, val } => Throw { throw_state, val: find!(val) }, - StringCopy { val, chilled } => StringCopy { val: find!(*val), chilled: *chilled }, - StringIntern { val } => StringIntern { val: find!(*val) }, - Test { val } => Test { val: find!(*val) }, + &StringCopy { val, chilled } => StringCopy { val: find!(val), chilled }, + &StringIntern { val } => StringIntern { val: find!(val) }, + &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, - Jump(target) => Jump(find_branch_edge!(target)), - IfTrue { val, target } => IfTrue { val: find!(*val), target: find_branch_edge!(target) }, - IfFalse { val, target } => IfFalse { val: find!(*val), target: find_branch_edge!(target) }, - GuardType { val, guard_type, state } => GuardType { val: find!(*val), guard_type: *guard_type, state: *state }, - GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(*val), expected: *expected, state: *state }, - FixnumAdd { left, right, state } => FixnumAdd { left: find!(*left), right: find!(*right), state: *state }, - FixnumSub { left, right, state } => FixnumSub { left: find!(*left), right: find!(*right), state: *state }, - FixnumMult { left, right, state } => FixnumMult { left: find!(*left), right: find!(*right), state: *state }, - FixnumDiv { left, right, state } => FixnumDiv { left: find!(*left), right: find!(*right), state: *state }, - FixnumMod { left, right, state } => FixnumMod { left: find!(*left), right: find!(*right), state: *state }, - FixnumNeq { left, right } => FixnumNeq { left: find!(*left), right: find!(*right) }, - FixnumEq { left, right } => FixnumEq { left: find!(*left), right: find!(*right) }, - FixnumGt { left, right } => FixnumGt { left: find!(*left), right: find!(*right) }, - FixnumGe { left, right } => FixnumGe { left: find!(*left), right: find!(*right) }, - FixnumLt { left, right } => FixnumLt { left: find!(*left), right: find!(*right) }, - FixnumLe { left, right } => FixnumLe { left: find!(*left), right: find!(*right) }, - FixnumAnd { left, right } => FixnumAnd { left: find!(*left), right: find!(*right) }, - FixnumOr { left, right } => FixnumOr { left: find!(*left), right: find!(*right) }, - ObjToString { val, call_info, cd, state } => ObjToString { - val: find!(*val), + &Jump(ref target) => Jump(find_branch_edge!(target)), + &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, + &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, + &GuardType { val, guard_type, state } => GuardType { val: find!(val), guard_type: guard_type, state }, + &GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(val), expected: expected, state }, + &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, + &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, + &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, + &FixnumDiv { left, right, state } => FixnumDiv { left: find!(left), right: find!(right), state }, + &FixnumMod { left, right, state } => FixnumMod { left: find!(left), right: find!(right), state }, + &FixnumNeq { left, right } => FixnumNeq { left: find!(left), right: find!(right) }, + &FixnumEq { left, right } => FixnumEq { left: find!(left), right: find!(right) }, + &FixnumGt { left, right } => FixnumGt { left: find!(left), right: find!(right) }, + &FixnumGe { left, right } => FixnumGe { left: find!(left), right: find!(right) }, + &FixnumLt { left, right } => FixnumLt { left: find!(left), right: find!(right) }, + &FixnumLe { left, right } => FixnumLe { left: find!(left), right: find!(right) }, + &FixnumAnd { left, right } => FixnumAnd { left: find!(left), right: find!(right) }, + &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, + &ObjToString { val, ref call_info, cd, state } => ObjToString { + val: find!(val), call_info: call_info.clone(), - cd: *cd, - state: *state, + cd: cd, + state, }, - AnyToString { val, str, state } => AnyToString { - val: find!(*val), - str: find!(*str), - state: *state, + &AnyToString { val, str, state } => AnyToString { + val: find!(val), + str: find!(str), + state, }, - SendWithoutBlock { self_val, call_info, cd, args, state } => SendWithoutBlock { - self_val: find!(*self_val), + &SendWithoutBlock { self_val, ref call_info, cd, ref args, state } => SendWithoutBlock { + self_val: find!(self_val), call_info: call_info.clone(), - cd: *cd, + cd: cd, args: find_vec!(args), - state: *state, + state, }, - SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state } => SendWithoutBlockDirect { - self_val: find!(*self_val), + &SendWithoutBlockDirect { self_val, ref call_info, cd, cme, iseq, ref args, state } => SendWithoutBlockDirect { + self_val: find!(self_val), call_info: call_info.clone(), - cd: *cd, - cme: *cme, - iseq: *iseq, + cd: cd, + cme: cme, + iseq: iseq, args: find_vec!(args), - state: *state, + state, }, - Send { self_val, call_info, cd, blockiseq, args, state } => Send { - self_val: find!(*self_val), + &Send { self_val, ref call_info, cd, blockiseq, ref args, state } => Send { + self_val: find!(self_val), call_info: call_info.clone(), - cd: *cd, - blockiseq: *blockiseq, + cd: cd, + blockiseq: blockiseq, args: find_vec!(args), - state: *state, + state, }, - InvokeBuiltin { bf, args, state } => InvokeBuiltin { bf: *bf, args: find_vec!(*args), state: *state }, - ArrayDup { val , state } => ArrayDup { val: find!(*val), state: *state }, - &HashDup { val , state } => HashDup { val: find!(val), state }, + &InvokeBuiltin { bf, ref args, state } => InvokeBuiltin { bf: bf, args: find_vec!(args), state }, + &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, + &HashDup { val, state } => HashDup { val: find!(val), state }, &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, &Defined { op_type, obj, pushval, v } => Defined { op_type, obj, pushval, v: find!(v) }, &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state }, - NewArray { elements, state } => NewArray { elements: find_vec!(*elements), state: find!(*state) }, + &NewArray { ref elements, state } => NewArray { elements: find_vec!(elements), state: find!(state) }, &NewHash { ref elements, state } => { let mut found_elements = vec![]; for &(key, value) in elements { @@ -1184,7 +1184,7 @@ impl Function { NewHash { elements: found_elements, state: find!(state) } } &NewRange { low, high, flag, state } => NewRange { low: find!(low), high: find!(high), flag, state: find!(state) }, - ArrayMax { elements, state } => ArrayMax { elements: find_vec!(*elements), state: find!(*state) }, + &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state }, From 0f7ee8e7a4619f6ebbb9858c53c7414e0001e905 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 12:16:51 -0400 Subject: [PATCH 08/12] ZJIT: Get rid of CallInfo --- zjit/src/codegen.rs | 11 ++--- zjit/src/cruby.rs | 25 ++++++++++ zjit/src/hir.rs | 117 +++++++++++++++----------------------------- 3 files changed, 69 insertions(+), 84 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5fef220a4a43b7..607bc560d2f7d5 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -10,7 +10,7 @@ use crate::state::ZJITState; use crate::stats::{counter_ptr, Counter}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; -use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; +use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -331,10 +331,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::Jump(branch) => return gen_jump(jit, asm, branch), Insn::IfTrue { val, target } => return gen_if_true(jit, asm, opnd!(val), target), Insn::IfFalse { val, target } => return gen_if_false(jit, asm, opnd!(val), target), - Insn::SendWithoutBlock { call_info, cd, state, self_val, args, .. } => gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, + Insn::SendWithoutBlock { cd, state, self_val, args, .. } => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. - Insn::SendWithoutBlockDirect { call_info, cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self - gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, + Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self + gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?, Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?, Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?, Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?), @@ -763,7 +763,6 @@ fn gen_if_false(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, branch: fn gen_send_without_block( jit: &mut JITState, asm: &mut Assembler, - call_info: &CallInfo, cd: *const rb_call_data, state: &FrameState, self_val: Opnd, @@ -789,7 +788,7 @@ fn gen_send_without_block( gen_save_pc(asm, state); gen_save_sp(asm, 1 + args.len()); // +1 for receiver - asm_comment!(asm, "call #{} with dynamic dispatch", call_info.method_name); + asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { fn rb_vm_opt_send_without_block(ec: EcPtr, cfp: CfpPtr, cd: VALUE) -> VALUE; } diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 1d4ba4c9c34877..582bd49c965ddb 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -750,6 +750,16 @@ pub fn ruby_sym_to_rust_string(v: VALUE) -> String { ruby_str_to_rust_string(ruby_str) } +pub fn ruby_call_method_id(cd: *const rb_call_data) -> ID { + let call_info = unsafe { rb_get_call_data_ci(cd) }; + unsafe { rb_vm_ci_mid(call_info) } +} + +pub fn ruby_call_method_name(cd: *const rb_call_data) -> String { + let mid = ruby_call_method_id(cd); + mid.contents_lossy().to_string() +} + /// A location in Rust code for integrating with debugging facilities defined in C. /// Use the [src_loc!] macro to crate an instance. pub struct SourceLocation { @@ -1211,6 +1221,21 @@ pub(crate) mod ids { name: to_s name: compile name: eval + name: plus content: b"+" + name: minus content: b"-" + name: mult content: b"*" + name: div content: b"/" + name: modulo content: b"%" + name: neq content: b"!=" + name: lt content: b"<" + name: le content: b"<=" + name: gt content: b">" + name: ge content: b">=" + name: and content: b"&" + name: or content: b"|" + name: freeze + name: minusat content: b"-@" + name: aref content: b"[]" } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 3abe3baa74d8f9..62023e5ea6f863 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -107,11 +107,6 @@ impl std::fmt::Display for BranchEdge { } } -#[derive(Debug, PartialEq, Clone)] -pub struct CallInfo { - pub method_name: String, -} - /// Invalidation reasons #[derive(Debug, Clone, Copy)] pub enum Invariant { @@ -515,11 +510,10 @@ pub enum Insn { /// Send without block with dynamic dispatch /// Ignoring keyword arguments etc for now - SendWithoutBlock { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, args: Vec, state: InsnId }, - Send { self_val: InsnId, call_info: CallInfo, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, + SendWithoutBlock { self_val: InsnId, cd: *const rb_call_data, args: Vec, state: InsnId }, + Send { self_val: InsnId, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, SendWithoutBlockDirect { self_val: InsnId, - call_info: CallInfo, cd: *const rb_call_data, cme: *const rb_callable_method_entry_t, iseq: IseqPtr, @@ -551,7 +545,7 @@ pub enum Insn { FixnumOr { left: InsnId, right: InsnId }, // Distinct from `SendWithoutBlock` with `mid:to_s` because does not have a patch point for String to_s being redefined - ObjToString { val: InsnId, call_info: CallInfo, cd: *const rb_call_data, state: InsnId }, + ObjToString { val: InsnId, cd: *const rb_call_data, state: InsnId }, AnyToString { val: InsnId, str: InsnId, state: InsnId }, /// Side-exit if val doesn't have the expected type. @@ -680,25 +674,25 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::Jump(target) => { write!(f, "Jump {target}") } Insn::IfTrue { val, target } => { write!(f, "IfTrue {val}, {target}") } Insn::IfFalse { val, target } => { write!(f, "IfFalse {val}, {target}") } - Insn::SendWithoutBlock { self_val, call_info, args, .. } => { - write!(f, "SendWithoutBlock {self_val}, :{}", call_info.method_name)?; + Insn::SendWithoutBlock { self_val, cd, args, .. } => { + write!(f, "SendWithoutBlock {self_val}, :{}", ruby_call_method_name(*cd))?; for arg in args { write!(f, ", {arg}")?; } Ok(()) } - Insn::SendWithoutBlockDirect { self_val, call_info, iseq, args, .. } => { - write!(f, "SendWithoutBlockDirect {self_val}, :{} ({:?})", call_info.method_name, self.ptr_map.map_ptr(iseq))?; + Insn::SendWithoutBlockDirect { self_val, cd, iseq, args, .. } => { + write!(f, "SendWithoutBlockDirect {self_val}, :{} ({:?})", ruby_call_method_name(*cd), self.ptr_map.map_ptr(iseq))?; for arg in args { write!(f, ", {arg}")?; } Ok(()) } - Insn::Send { self_val, call_info, args, blockiseq, .. } => { + Insn::Send { self_val, cd, args, blockiseq, .. } => { // For tests, we want to check HIR snippets textually. Addresses change // between runs, making tests fail. Instead, pick an arbitrary hex value to // use as a "pointer" so we can check the rest of the HIR. - write!(f, "Send {self_val}, {:p}, :{}", self.ptr_map.map_ptr(blockiseq), call_info.method_name)?; + write!(f, "Send {self_val}, {:p}, :{}", self.ptr_map.map_ptr(blockiseq), ruby_call_method_name(*cd))?; for arg in args { write!(f, ", {arg}")?; } @@ -1134,9 +1128,8 @@ impl Function { &FixnumLe { left, right } => FixnumLe { left: find!(left), right: find!(right) }, &FixnumAnd { left, right } => FixnumAnd { left: find!(left), right: find!(right) }, &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, - &ObjToString { val, ref call_info, cd, state } => ObjToString { + &ObjToString { val, cd, state } => ObjToString { val: find!(val), - call_info: call_info.clone(), cd: cd, state, }, @@ -1145,25 +1138,22 @@ impl Function { str: find!(str), state, }, - &SendWithoutBlock { self_val, ref call_info, cd, ref args, state } => SendWithoutBlock { + &SendWithoutBlock { self_val, cd, ref args, state } => SendWithoutBlock { self_val: find!(self_val), - call_info: call_info.clone(), cd: cd, args: find_vec!(args), state, }, - &SendWithoutBlockDirect { self_val, ref call_info, cd, cme, iseq, ref args, state } => SendWithoutBlockDirect { + &SendWithoutBlockDirect { self_val, cd, cme, iseq, ref args, state } => SendWithoutBlockDirect { self_val: find!(self_val), - call_info: call_info.clone(), cd: cd, cme: cme, iseq: iseq, args: find_vec!(args), state, }, - &Send { self_val, ref call_info, cd, blockiseq, ref args, state } => Send { + &Send { self_val, cd, blockiseq, ref args, state } => Send { self_val: find!(self_val), - call_info: call_info.clone(), cd: cd, blockiseq: blockiseq, args: find_vec!(args), @@ -1477,39 +1467,39 @@ impl Function { assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { match self.find(insn_id) { - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "+" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(plus) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumAdd { left, right, state }, BOP_PLUS, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "-" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(minus) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumSub { left, right, state }, BOP_MINUS, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "*" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(mult) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumMult { left, right, state }, BOP_MULT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "/" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(div) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumDiv { left, right, state }, BOP_DIV, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "%" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(modulo) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumMod { left, right, state }, BOP_MOD, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "==" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(eq) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumEq { left, right }, BOP_EQ, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "!=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(neq) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumNeq { left, right }, BOP_NEQ, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "<" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(lt) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumLt { left, right }, BOP_LT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "<=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(le) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumLe { left, right }, BOP_LE, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == ">" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(gt) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumGt { left, right }, BOP_GT, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == ">=" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(ge) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumGe { left, right }, BOP_GE, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "&" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(and) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumAnd { left, right }, BOP_AND, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "|" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(or) && args.len() == 1 => self.try_rewrite_fixnum_op(block, insn_id, &|left, right| Insn::FixnumOr { left, right }, BOP_OR, self_val, args[0], state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "freeze" && args.len() == 0 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(freeze) && args.len() == 0 => self.try_rewrite_freeze(block, insn_id, self_val, state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "-@" && args.len() == 0 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(minusat) && args.len() == 0 => self.try_rewrite_uminus(block, insn_id, self_val, state), - Insn::SendWithoutBlock { self_val, call_info: CallInfo { method_name }, args, state, .. } if method_name == "[]" && args.len() == 1 => + Insn::SendWithoutBlock { self_val, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(aref) && args.len() == 1 => self.try_rewrite_aref(block, insn_id, self_val, args[0], state), - Insn::SendWithoutBlock { mut self_val, call_info, cd, args, state } => { + Insn::SendWithoutBlock { mut self_val, cd, args, state } => { let frame_state = self.frame_state(state); let (klass, guard_equal_to) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() { // If we know the class statically, use it to fold the lookup at compile-time. @@ -1546,7 +1536,7 @@ impl Function { if let Some(expected) = guard_equal_to { self_val = self.push_insn(block, Insn::GuardBitEquals { val: self_val, expected, state }); } - let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, call_info, cd, cme, iseq, args, state }); + let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, cd, cme, iseq, args, state }); self.make_equal_to(insn_id, send_direct); } Insn::GetConstantPath { ic, state, .. } => { @@ -1569,12 +1559,12 @@ impl Function { self.insn_types[replacement.0] = self.infer_type(replacement); self.make_equal_to(insn_id, replacement); } - Insn::ObjToString { val, call_info, cd, state, .. } => { + Insn::ObjToString { val, cd, state, .. } => { if self.is_a(val, types::String) { // behaves differently from `SendWithoutBlock` with `mid:to_s` because ObjToString should not have a patch point for String to_s being redefined self.make_equal_to(insn_id, val); } else { - let replacement = self.push_insn(block, Insn::SendWithoutBlock { self_val: val, call_info, cd, args: vec![], state }); + let replacement = self.push_insn(block, Insn::SendWithoutBlock { self_val: val, cd, args: vec![], state }); self.make_equal_to(insn_id, replacement) } } @@ -2934,18 +2924,13 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - assert_eq!(1, argc, "opt_aref_with should only be emitted for argc=1"); let aref_arg = fun.push_insn(block, Insn::Const { val: Const::Value(get_arg(pc, 0)) }); let args = vec![aref_arg]; let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_opt_neq => { @@ -2960,11 +2945,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -2973,7 +2953,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_opt_hash_freeze | @@ -2994,14 +2974,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { assert_eq!(0, argc, "{name} should not have args"); let args = vec![]; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let recv = fun.push_insn(block, Insn::Const { val: Const::Value(get_arg(pc, 0)) }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } @@ -3051,11 +3026,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -3064,7 +3034,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, call_info: CallInfo { method_name }, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { self_val: recv, cd, args, state: exit_id }); state.stack_push(send); } YARVINSN_send => { @@ -3079,10 +3049,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { } let argc = unsafe { vm_ci_argc((*cd).ci) }; - let method_name = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; let mut args = vec![]; for _ in 0..argc { args.push(state.stack_pop()?); @@ -3091,7 +3057,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::Send { self_val: recv, call_info: CallInfo { method_name }, cd, blockiseq, args, state: exit_id }); + let send = fun.push_insn(block, Insn::Send { self_val: recv, cd, blockiseq, args, state: exit_id }); state.stack_push(send); } YARVINSN_getglobal => { @@ -3177,14 +3143,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let argc = unsafe { vm_ci_argc((*cd).ci) }; assert_eq!(0, argc, "objtostring should not have args"); - let method_name: String = unsafe { - let mid = rb_vm_ci_mid(call_info); - mid.contents_lossy().into_owned() - }; - let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let objtostring = fun.push_insn(block, Insn::ObjToString { val: recv, call_info: CallInfo { method_name }, cd, state: exit_id }); + let objtostring = fun.push_insn(block, Insn::ObjToString { val: recv, cd, state: exit_id }); state.stack_push(objtostring) } YARVINSN_anytostring => { From 8c73b103cd268ee081d7c688c48c7073acc80eea Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 12:27:57 -0400 Subject: [PATCH 09/12] ZJIT: Don't write to String --- zjit/src/hir.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 62023e5ea6f863..e5d4f031a399f1 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -763,23 +763,23 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::SideExit { reason, .. } => write!(f, "SideExit {reason}"), Insn::PutSpecialObject { value_type } => write!(f, "PutSpecialObject {value_type}"), Insn::Throw { throw_state, val } => { - let mut state_string = match throw_state & VM_THROW_STATE_MASK { - RUBY_TAG_NONE => "TAG_NONE".to_string(), - RUBY_TAG_RETURN => "TAG_RETURN".to_string(), - RUBY_TAG_BREAK => "TAG_BREAK".to_string(), - RUBY_TAG_NEXT => "TAG_NEXT".to_string(), - RUBY_TAG_RETRY => "TAG_RETRY".to_string(), - RUBY_TAG_REDO => "TAG_REDO".to_string(), - RUBY_TAG_RAISE => "TAG_RAISE".to_string(), - RUBY_TAG_THROW => "TAG_THROW".to_string(), - RUBY_TAG_FATAL => "TAG_FATAL".to_string(), - tag => format!("{tag}") - }; + write!(f, "Throw ")?; + match throw_state & VM_THROW_STATE_MASK { + RUBY_TAG_NONE => write!(f, "TAG_NONE"), + RUBY_TAG_RETURN => write!(f, "TAG_RETURN"), + RUBY_TAG_BREAK => write!(f, "TAG_BREAK"), + RUBY_TAG_NEXT => write!(f, "TAG_NEXT"), + RUBY_TAG_RETRY => write!(f, "TAG_RETRY"), + RUBY_TAG_REDO => write!(f, "TAG_REDO"), + RUBY_TAG_RAISE => write!(f, "TAG_RAISE"), + RUBY_TAG_THROW => write!(f, "TAG_THROW"), + RUBY_TAG_FATAL => write!(f, "TAG_FATAL"), + tag => write!(f, "{tag}") + }?; if throw_state & VM_THROW_NO_ESCAPE_FLAG != 0 { - use std::fmt::Write; - write!(state_string, "|NO_ESCAPE")?; + write!(f, "|NO_ESCAPE")?; } - write!(f, "Throw {state_string}, {val}") + write!(f, ", {val}") } Insn::IncrCounter(counter) => write!(f, "IncrCounter {counter:?}"), insn => { write!(f, "{insn:?}") } From 1b700c56d8556eaba0b6b9a637a0df8d91d603a2 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 12:29:59 -0400 Subject: [PATCH 10/12] ZJIT: Don't make unnecessary Cow --- zjit/src/hir.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e5d4f031a399f1..2031b9fdba33a8 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -734,18 +734,18 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::Defined { op_type, v, .. } => { // op_type (enum defined_type) printing logic from iseq.c. // Not sure why rb_iseq_defined_string() isn't exhaustive. - use std::borrow::Cow; + write!(f, "Defined ")?; let op_type = *op_type as u32; - let op_type = if op_type == DEFINED_FUNC { - Cow::Borrowed("func") + if op_type == DEFINED_FUNC { + write!(f, "func")?; } else if op_type == DEFINED_REF { - Cow::Borrowed("ref") + write!(f, "ref")?; } else if op_type == DEFINED_CONST_FROM { - Cow::Borrowed("constant-from") + write!(f, "constant-from")?; } else { - String::from_utf8_lossy(unsafe { rb_iseq_defined_string(op_type).as_rstring_byte_slice().unwrap() }) + write!(f, "{}", String::from_utf8_lossy(unsafe { rb_iseq_defined_string(op_type).as_rstring_byte_slice().unwrap() }))?; }; - write!(f, "Defined {op_type}, {v}") + write!(f, ", {v}") } Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy().into_owned()), Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy().into_owned()), From 75f25e5c4944a34a543285354b8d7953cb1b047d Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Wed, 30 Jul 2025 12:46:30 -0400 Subject: [PATCH 11/12] ZJIT: Don't create owned Cow/String when printing --- zjit/src/hir.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 2031b9fdba33a8..6d45383713643b 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -292,7 +292,7 @@ impl std::fmt::Display for RangeType { impl std::fmt::Debug for RangeType { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.to_string()) + write!(f, "{}", self) } } @@ -747,11 +747,11 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { }; write!(f, ", {v}") } - Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy().into_owned()), - Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy().into_owned()), - Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy().into_owned()), - Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy().into_owned()), - Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy().into_owned()), + Insn::DefinedIvar { self_val, id, .. } => write!(f, "DefinedIvar {self_val}, :{}", id.contents_lossy()), + Insn::GetIvar { self_val, id, .. } => write!(f, "GetIvar {self_val}, :{}", id.contents_lossy()), + Insn::SetIvar { self_val, id, val, .. } => write!(f, "SetIvar {self_val}, :{}, {val}", id.contents_lossy()), + Insn::GetGlobal { id, .. } => write!(f, "GetGlobal :{}", id.contents_lossy()), + Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), Insn::GetLocal { level, ep_offset } => write!(f, "GetLocal l{level}, EP@{ep_offset}"), Insn::SetLocal { val, level, ep_offset } => write!(f, "SetLocal l{level}, EP@{ep_offset}, {val}"), Insn::ToArray { val, .. } => write!(f, "ToArray {val}"), From 7cece235ab9d3834b314f4395d45b8c97399470c Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 30 Jul 2025 09:57:37 -0400 Subject: [PATCH 12/12] Don't check the symbol's fstr at shutdown During Ruby's shutdown, we no longer need to check the fstr of the symbol because we don't use the fstr anymore for freeing the symbol. This can also fix the following ASAN error: ==2721247==ERROR: AddressSanitizer: use-after-poison on address 0x75fa90a627b8 at pc 0x64a7b06fb4bc bp 0x7ffdf95ba9b0 sp 0x7ffdf95ba9a8 READ of size 8 at 0x75fa90a627b8 thread T0 #0 0x64a7b06fb4bb in RB_BUILTIN_TYPE include/ruby/internal/value_type.h:191:30 #1 0x64a7b06fb4bb in rb_gc_shutdown_call_finalizer_p gc.c:357:18 #2 0x64a7b06fb4bb in rb_gc_impl_shutdown_call_finalizer gc/default/default.c:3045:21 #3 0x64a7b06fb4bb in rb_objspace_call_finalizer gc.c:1739:5 #4 0x64a7b06ca1b2 in rb_ec_finalize eval.c:165:5 #5 0x64a7b06ca1b2 in rb_ec_cleanup eval.c:256:5 #6 0x64a7b06c98a3 in ruby_cleanup eval.c:179:12 --- gc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/gc.c b/gc.c index cdc8891d7c6016..499d0cda8d7f87 100644 --- a/gc.c +++ b/gc.c @@ -353,11 +353,6 @@ rb_gc_shutdown_call_finalizer_p(VALUE obj) return true; case T_SYMBOL: - if (RSYMBOL(obj)->fstr && - (BUILTIN_TYPE(RSYMBOL(obj)->fstr) == T_NONE || - BUILTIN_TYPE(RSYMBOL(obj)->fstr) == T_ZOMBIE)) { - RSYMBOL(obj)->fstr = 0; - } return true; case T_NONE: