From a8b49ab48703dfb8862ca28a443da0e764d692c6 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Mon, 1 Dec 2025 16:51:29 -0500 Subject: [PATCH 01/15] Test CC invalidation for singleton classes of objects (#15360) I made a recent change where all the tests passed but it turns out it was still wrong. We didn't have any tests for CC invalidation on singletons of objects that aren't classes or modules. --- test/ruby/test_class.rb | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/ruby/test_class.rb b/test/ruby/test_class.rb index f40817e7a1ef54..10b7655e9a5562 100644 --- a/test/ruby/test_class.rb +++ b/test/ruby/test_class.rb @@ -887,4 +887,34 @@ def test_method_table_assignment_just_after_class_init class C; end end; end + + def test_singleton_cc_invalidation + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}") + begin; + class T + def hi + "hi" + end + end + + t = T.new + t.singleton_class + + def hello(t) + t.hi + end + + 5.times do + hello(t) # populate inline cache on `t.singleton_class`. + end + + class T + remove_method :hi # invalidate `t.singleton_class` ccs for `hi` + end + + assert_raise NoMethodError do + hello(t) + end + end; + end end From e68fcf111b48a25ccf465e7acef13e4e145bcfb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berkan=20=C3=9Cnal?= <32255826+brkn@users.noreply.github.com> Date: Tue, 2 Dec 2025 01:02:03 +0300 Subject: [PATCH 02/15] [ruby/strscan] [DOC] Fix broken link to helper methods (https://github.com/ruby/strscan/pull/179) ### Helper methods link is broken at master branch To reproduce 1. go to [StringScanner docs](https://docs.ruby-lang.org/en/master/StringScanner.html) 2. Click to link at line > See examples at **helper_methods** 3. Resolved url gives 404: https://docs.ruby-lang.org/en/master/strscan/helper_methods_md.html ### Fix Currently link resolves as `href="doc/strscan/helper_methods_md.html"` Correct link should be resolved as `href="helper_methods_md.html"` https://github.com/ruby/strscan/commit/adb8678aa6 --- doc/strscan/strscan.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/strscan/strscan.md b/doc/strscan/strscan.md index c0bf5413623b07..c4ab2034624947 100644 --- a/doc/strscan/strscan.md +++ b/doc/strscan/strscan.md @@ -37,7 +37,7 @@ Some examples here assume that certain helper methods are defined: - `match_values_cleared?(scanner)`: Returns whether the scanner's [match values][9] are cleared. -See examples at [helper methods](doc/strscan/helper_methods.md). +See examples at [helper methods](helper_methods.md). ## The `StringScanner` \Object From 4161c78a9d0c84f5afe6eb40b2cea6266cd59987 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Fri, 28 Nov 2025 15:01:37 -0800 Subject: [PATCH 03/15] Add remembered flag to heap dump This should be less common than than many of the other flags, so should not inflate the heap too much. This is desirable because reducing the number of remembered objects will improve minor GC speeds. --- gc/default/default.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index 2d862b0b212489..54b93dc8d07cf6 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -6197,7 +6197,7 @@ rb_gc_impl_writebarrier_remember(void *objspace_ptr, VALUE obj) struct rb_gc_object_metadata_names { // Must be ID only ID ID_wb_protected, ID_age, ID_old, ID_uncollectible, ID_marking, - ID_marked, ID_pinned, ID_object_id, ID_shareable; + ID_marked, ID_pinned, ID_remembered, ID_object_id, ID_shareable; }; #define RB_GC_OBJECT_METADATA_ENTRY_COUNT (sizeof(struct rb_gc_object_metadata_names) / sizeof(ID)) @@ -6219,6 +6219,7 @@ rb_gc_impl_object_metadata(void *objspace_ptr, VALUE obj) I(marking); I(marked); I(pinned); + I(remembered); I(object_id); I(shareable); #undef I @@ -6238,6 +6239,7 @@ rb_gc_impl_object_metadata(void *objspace_ptr, VALUE obj) if (RVALUE_MARKING(objspace, obj)) SET_ENTRY(marking, Qtrue); if (RVALUE_MARKED(objspace, obj)) SET_ENTRY(marked, Qtrue); if (RVALUE_PINNED(objspace, obj)) SET_ENTRY(pinned, Qtrue); + if (RVALUE_REMEMBERED(objspace, obj)) SET_ENTRY(remembered, Qtrue); if (rb_obj_id_p(obj)) SET_ENTRY(object_id, rb_obj_id(obj)); if (FL_TEST(obj, FL_SHAREABLE)) SET_ENTRY(shareable, Qtrue); From 8ce78821cd8713d149ff27d9fbed38799e983349 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:18:16 -0500 Subject: [PATCH 04/15] ZJIT: Mark Integer#to_s as returning StringExact --- zjit/src/cruby_methods.rs | 1 + zjit/src/hir/opt_tests.rs | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 56d4de280bff71..f7bf92b31aa6ba 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -242,6 +242,7 @@ pub fn init() -> Annotations { annotate!(rb_cInteger, "<", inline_integer_lt); annotate!(rb_cInteger, "<=", inline_integer_le); annotate!(rb_cInteger, "<<", inline_integer_lshift); + annotate!(rb_cInteger, "to_s", types::StringExact); annotate!(rb_cString, "to_s", inline_string_to_s, types::StringExact); let thread_singleton = unsafe { rb_singleton_class(rb_cThread) }; annotate!(thread_singleton, "current", inline_thread_current, types::BasicObject, no_gc, leaf); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index e8c8d50dbe9256..4faaa402d87543 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -5900,6 +5900,56 @@ mod hir_opt_tests { "); } + #[test] + fn test_fixnum_to_s_returns_string() { + eval(r#" + def test(x) = x.to_s + test 5 + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) + v21:Fixnum = GuardType v9, Fixnum + v22:StringExact = CCallVariadic Integer#to_s@0x1038, v21 + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_bignum_to_s_returns_string() { + eval(r#" + def test(x) = x.to_s + test (2**65) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) + v21:Integer = GuardType v9, Integer + v22:StringExact = CCallVariadic Integer#to_s@0x1038, v21 + CheckInterrupts + Return v22 + "); + } + #[test] fn test_array_aref_fixnum_literal() { eval(" From fd7d17abde4943edaefac7840f7258c8283d1d8e Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:18:43 -0500 Subject: [PATCH 05/15] ZJIT: Don't use GuardTypeNot Use actual receiver type. This gives us better method lookup. --- zjit/src/hir.rs | 4 ++-- zjit/src/hir/opt_tests.rs | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e5c1f7a54293da..bab6d1c841e136 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -2677,8 +2677,8 @@ impl Function { self.insn_types[guard.0] = self.infer_type(guard); self.make_equal_to(insn_id, guard); } else { - self.push_insn(block, Insn::GuardTypeNot { val, guard_type: types::String, state}); - let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv: val, cd, args: vec![], state, reason: ObjToStringNotString }); + let recv = self.push_insn(block, Insn::GuardType { val, guard_type: Type::from_profiled_type(recv_type), state}); + let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv, cd, args: vec![], state, reason: ObjToStringNotString }); self.make_equal_to(insn_id, send_to_s); } } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 4faaa402d87543..029b3735f565ee 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -4137,12 +4137,11 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) - v26:BasicObject = GuardTypeNot v9, String + v26:ArrayExact = GuardType v9, ArrayExact PatchPoint MethodRedefined(Array@0x1008, to_s@0x1010, cme:0x1018) PatchPoint NoSingletonClass(Array@0x1008) - v31:ArrayExact = GuardType v9, ArrayExact - v32:BasicObject = CCallWithFrame v31, :Array#to_s@0x1040 - v19:String = AnyToString v9, str: v32 + v31:BasicObject = CCallWithFrame v26, :Array#to_s@0x1040 + v19:String = AnyToString v9, str: v31 v21:StringExact = StringConcat v13, v19 CheckInterrupts Return v21 @@ -5919,7 +5918,7 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:BasicObject): PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) v21:Fixnum = GuardType v9, Fixnum - v22:StringExact = CCallVariadic Integer#to_s@0x1038, v21 + v22:StringExact = CCallVariadic v21, :Integer#to_s@0x1038 CheckInterrupts Return v22 "); @@ -5944,7 +5943,7 @@ mod hir_opt_tests { bb2(v8:BasicObject, v9:BasicObject): PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) v21:Integer = GuardType v9, Integer - v22:StringExact = CCallVariadic Integer#to_s@0x1038, v21 + v22:StringExact = CCallVariadic v21, :Integer#to_s@0x1038 CheckInterrupts Return v22 "); From 91432a6bad7c52859e2920e780d578adcf4ac3f3 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:19:15 -0500 Subject: [PATCH 06/15] ZJIT: Add late pass to fold AnyToString This otherwise would miss annotations of C methods. --- zjit/src/hir.rs | 5 +++++ zjit/src/hir/opt_tests.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index bab6d1c841e136..4bbbbd150e444b 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3399,6 +3399,11 @@ impl Function { // Don't bother re-inferring the type of val; we already know it. continue; } + Insn::AnyToString { str, .. } if self.is_a(str, types::String) => { + self.make_equal_to(insn_id, str); + // Don't bother re-inferring the type of str; we already know it. + continue; + } Insn::FixnumAdd { left, right, .. } => { self.fold_fixnum_bop(insn_id, left, right, |l, r| match (l, r) { (Some(l), Some(r)) => l.checked_add(r), diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 029b3735f565ee..90675965c2f67a 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -5949,6 +5949,33 @@ mod hir_opt_tests { "); } + #[test] + fn test_fold_any_to_string_with_known_string_exact() { + eval(r##" + def test(x) = "#{x}" + test 123 + "##); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v26:Fixnum = GuardType v9, Fixnum + PatchPoint MethodRedefined(Integer@0x1008, to_s@0x1010, cme:0x1018) + v30:StringExact = CCallVariadic Integer#to_s@0x1040, v26 + v21:StringExact = StringConcat v13, v30 + CheckInterrupts + Return v21 + "); + } + #[test] fn test_array_aref_fixnum_literal() { eval(" From 8aed31103874524395912fb7ffaee88b5e59a9c3 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:24:45 -0500 Subject: [PATCH 07/15] ZJIT: Specialize String#<< with Fixnum Append a codepoint. --- jit.c | 2 ++ string.c | 4 ++-- yjit/bindgen/src/main.rs | 2 +- yjit/src/codegen.rs | 2 +- yjit/src/cruby.rs | 1 - yjit/src/cruby_bindings.inc.rs | 1 + zjit/bindgen/src/main.rs | 1 + zjit/src/codegen.rs | 6 ++++++ zjit/src/cruby.rs | 1 - zjit/src/cruby_bindings.inc.rs | 1 + zjit/src/cruby_methods.rs | 11 +++++++--- zjit/src/hir.rs | 14 ++++++++++++- zjit/src/hir/opt_tests.rs | 37 +++++++++++++++++++++++++++++----- 13 files changed, 68 insertions(+), 15 deletions(-) diff --git a/jit.c b/jit.c index 43c932e5a00ffa..dd28bd6ad02aef 100644 --- a/jit.c +++ b/jit.c @@ -765,3 +765,5 @@ rb_yarv_str_eql_internal(VALUE str1, VALUE str2) // We wrap this since it's static inline return rb_str_eql_internal(str1, str2); } + +void rb_jit_str_concat_codepoint(VALUE str, VALUE codepoint); diff --git a/string.c b/string.c index 2dec3a11e61246..c794b36748e6e6 100644 --- a/string.c +++ b/string.c @@ -12580,9 +12580,9 @@ rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc) return rb_enc_interned_str(ptr, strlen(ptr), enc); } -#if USE_YJIT +#if USE_YJIT || USE_ZJIT void -rb_yjit_str_concat_codepoint(VALUE str, VALUE codepoint) +rb_jit_str_concat_codepoint(VALUE str, VALUE codepoint) { if (RB_LIKELY(ENCODING_GET_INLINED(str) == rb_ascii8bit_encindex())) { ssize_t code = RB_NUM2SSIZE(codepoint); diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 67a461cd16d95c..9c28177a6054b2 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -273,7 +273,7 @@ fn main() { .allowlist_function("rb_yjit_sendish_sp_pops") .allowlist_function("rb_yjit_invokeblock_sp_pops") .allowlist_function("rb_yjit_set_exception_return") - .allowlist_function("rb_yjit_str_concat_codepoint") + .allowlist_function("rb_jit_str_concat_codepoint") .allowlist_type("rstring_offsets") .allowlist_function("rb_assert_holding_vm_lock") .allowlist_function("rb_jit_shape_too_complex_p") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a426ad07737496..50762c64d3eceb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6244,7 +6244,7 @@ fn jit_rb_str_concat_codepoint( guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0)); - asm.ccall(rb_yjit_str_concat_codepoint as *const u8, vec![recv, codepoint]); + asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]); // The receiver is the return value, so we only need to pop the codepoint argument off the stack. // We can reuse the receiver slot in the stack as the return value. diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index cfaf48c3f0b68e..5562f73be26dab 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -123,7 +123,6 @@ extern "C" { pub fn rb_float_new(d: f64) -> VALUE; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; - pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 66d4e5111d7bbd..04ca3494acf5a7 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1277,4 +1277,5 @@ extern "C" { ); pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index fe082504b82302..6659049242983b 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -278,6 +278,7 @@ fn main() { .allowlist_function("rb_jit_get_page_size") .allowlist_function("rb_jit_array_len") .allowlist_function("rb_jit_iseq_builtin_attrs") + .allowlist_function("rb_jit_str_concat_codepoint") .allowlist_function("rb_zjit_iseq_inspect") .allowlist_function("rb_zjit_iseq_insn_set") .allowlist_function("rb_zjit_local_id") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index f77b8cc4bf2ca6..6fc85664693fa3 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -368,6 +368,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)), Insn::StringSetbyteFixnum { string, index, value } => gen_string_setbyte_fixnum(asm, opnd!(string), opnd!(index), opnd!(value)), Insn::StringAppend { recv, other, state } => gen_string_append(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), + Insn::StringAppendCodepoint { recv, other, state } => gen_string_append_codepoint(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param => unreachable!("block.insns should not have Insn::Param"), @@ -2495,6 +2496,11 @@ fn gen_string_append(jit: &mut JITState, asm: &mut Assembler, string: Opnd, val: asm_ccall!(asm, rb_str_buf_append, string, val) } +fn gen_string_append_codepoint(jit: &mut JITState, asm: &mut Assembler, string: Opnd, val: Opnd, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + asm_ccall!(asm, rb_jit_str_concat_codepoint, string, val) +} + /// Generate a JIT entry that just increments exit_compilation_failure and exits fn gen_compile_error_counter(cb: &mut CodeBlock, compile_error: &CompileError) -> Result { let mut asm = Assembler::new(); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 9070cb38be5f42..72c44ccc6e412f 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -130,7 +130,6 @@ unsafe extern "C" { pub fn rb_float_new(d: f64) -> VALUE; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; - pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index aaecfa2f89769a..2256b7e32d3979 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2224,4 +2224,5 @@ unsafe extern "C" { end: *mut ::std::os::raw::c_void, ); pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index f7bf92b31aa6ba..f86d383876fbfc 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -425,10 +425,15 @@ fn inline_string_append(fun: &mut hir::Function, block: hir::BlockId, recv: hir: let recv = fun.coerce_to(block, recv, types::StringExact, state); let other = fun.coerce_to(block, other, types::String, state); let _ = fun.push_insn(block, hir::Insn::StringAppend { recv, other, state }); - Some(recv) - } else { - None + return Some(recv); } + if fun.likely_a(recv, types::StringExact, state) && fun.likely_a(other, types::Fixnum, state) { + let recv = fun.coerce_to(block, recv, types::StringExact, state); + let other = fun.coerce_to(block, other, types::Fixnum, state); + let _ = fun.push_insn(block, hir::Insn::StringAppendCodepoint { recv, other, state }); + return Some(recv); + } + None } fn inline_string_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4bbbbd150e444b..6604c52a8276e6 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -652,6 +652,7 @@ pub enum Insn { StringGetbyteFixnum { string: InsnId, index: InsnId }, StringSetbyteFixnum { string: InsnId, index: InsnId, value: InsnId }, StringAppend { recv: InsnId, other: InsnId, state: InsnId }, + StringAppendCodepoint { recv: InsnId, other: InsnId, state: InsnId }, /// Combine count stack values into a regexp ToRegexp { opt: usize, values: Vec, state: InsnId }, @@ -1124,6 +1125,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::StringAppend { recv, other, .. } => { write!(f, "StringAppend {recv}, {other}") } + Insn::StringAppendCodepoint { recv, other, .. } => { + write!(f, "StringAppendCodepoint {recv}, {other}") + } Insn::ToRegexp { values, opt, .. } => { write!(f, "ToRegexp")?; let mut prefix = " "; @@ -1814,6 +1818,7 @@ impl Function { &StringGetbyteFixnum { string, index } => StringGetbyteFixnum { string: find!(string), index: find!(index) }, &StringSetbyteFixnum { string, index, value } => StringSetbyteFixnum { string: find!(string), index: find!(index), value: find!(value) }, &StringAppend { recv, other, state } => StringAppend { recv: find!(recv), other: find!(other), state: find!(state) }, + &StringAppendCodepoint { recv, other, state } => StringAppendCodepoint { recv: find!(recv), other: find!(other), state: find!(state) }, &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, @@ -2032,6 +2037,7 @@ impl Function { Insn::StringGetbyteFixnum { .. } => types::Fixnum.union(types::NilClass), Insn::StringSetbyteFixnum { .. } => types::Fixnum, Insn::StringAppend { .. } => types::StringExact, + Insn::StringAppendCodepoint { .. } => types::StringExact, Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, @@ -3564,7 +3570,9 @@ impl Function { worklist.push_back(index); worklist.push_back(value); } - &Insn::StringAppend { recv, other, state } => { + &Insn::StringAppend { recv, other, state } + | &Insn::StringAppendCodepoint { recv, other, state } + => { worklist.push_back(recv); worklist.push_back(other); worklist.push_back(state); @@ -4328,6 +4336,10 @@ impl Function { self.assert_subtype(insn_id, recv, types::StringExact)?; self.assert_subtype(insn_id, other, types::String) } + Insn::StringAppendCodepoint { recv, other, .. } => { + self.assert_subtype(insn_id, recv, types::StringExact)?; + self.assert_subtype(insn_id, other, types::Fixnum) + } // Instructions with Array operands Insn::ArrayDup { val, .. } => self.assert_subtype(insn_id, val, types::ArrayExact), Insn::ArrayExtend { left, right, .. } => { diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 90675965c2f67a..60f5814973c482 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -5969,7 +5969,7 @@ mod hir_opt_tests { v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v26:Fixnum = GuardType v9, Fixnum PatchPoint MethodRedefined(Integer@0x1008, to_s@0x1010, cme:0x1018) - v30:StringExact = CCallVariadic Integer#to_s@0x1040, v26 + v30:StringExact = CCallVariadic v26, :Integer#to_s@0x1040 v21:StringExact = StringConcat v13, v30 CheckInterrupts Return v21 @@ -6854,9 +6854,8 @@ mod hir_opt_tests { "); } - // TODO: This should be inlined just as in the interpreter #[test] - fn test_optimize_string_append_non_string() { + fn test_optimize_string_append_codepoint() { eval(r#" def test(x, y) = x << y test("iron", 4) @@ -6876,9 +6875,11 @@ mod hir_opt_tests { PatchPoint MethodRedefined(String@0x1000, <<@0x1008, cme:0x1010) PatchPoint NoSingletonClass(String@0x1000) v27:StringExact = GuardType v11, StringExact - v28:BasicObject = CCallWithFrame v27, :String#<<@0x1038, v12 + v28:Fixnum = GuardType v12, Fixnum + v29:StringExact = StringAppendCodepoint v27, v28 + IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v28 + Return v27 "); } @@ -6942,6 +6943,32 @@ mod hir_opt_tests { "); } + #[test] + fn test_dont_optimize_string_append_non_string() { + eval(r#" + def test = "iron" << :a + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v11:StringExact = StringCopy v10 + v13:StaticSymbol[:a] = Const Value(VALUE(0x1008)) + PatchPoint MethodRedefined(String@0x1010, <<@0x1018, cme:0x1020) + PatchPoint NoSingletonClass(String@0x1010) + v24:BasicObject = CCallWithFrame v11, :String#<<@0x1048, v13 + CheckInterrupts + Return v24 + "); + } + #[test] fn test_dont_optimize_when_passing_too_many_args() { eval(r#" From a25196395e7502e4d6faad0856c697690d8a202e Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:44:15 -0500 Subject: [PATCH 08/15] Add BOP_GTGT This will help JITs (and maybe later the interpreter) optimize Integer#>>. --- internal/basic_operators.h | 1 + vm.c | 1 + yjit/src/cruby_bindings.inc.rs | 33 +++++++++++++++++---------------- zjit/src/cruby_bindings.inc.rs | 33 +++++++++++++++++---------------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/internal/basic_operators.h b/internal/basic_operators.h index 5dc8d7fe8d69fb..493d2fa7f733d7 100644 --- a/internal/basic_operators.h +++ b/internal/basic_operators.h @@ -24,6 +24,7 @@ enum ruby_basic_operators { BOP_SUCC, BOP_GT, BOP_GE, + BOP_GTGT, BOP_NOT, BOP_NEQ, BOP_MATCH, diff --git a/vm.c b/vm.c index 1a8a6d059b569b..b15ee0ba236b57 100644 --- a/vm.c +++ b/vm.c @@ -2444,6 +2444,7 @@ vm_init_redefined_flag(void) OP(GT, GT), (C(Integer), C(Float)); OP(GE, GE), (C(Integer), C(Float)); OP(LTLT, LTLT), (C(String), C(Array)); + OP(GTGT, GTGT), (C(Integer)); OP(AREF, AREF), (C(Array), C(Hash), C(Integer)); OP(ASET, ASET), (C(Array), C(Hash)); OP(Length, LENGTH), (C(Array), C(String), C(Hash)); diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 04ca3494acf5a7..6efef9c55c39c4 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -330,22 +330,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_HASH: ruby_basic_operators = 26; -pub const BOP_CALL: ruby_basic_operators = 27; -pub const BOP_AND: ruby_basic_operators = 28; -pub const BOP_OR: ruby_basic_operators = 29; -pub const BOP_CMP: ruby_basic_operators = 30; -pub const BOP_DEFAULT: ruby_basic_operators = 31; -pub const BOP_PACK: ruby_basic_operators = 32; -pub const BOP_INCLUDE_P: ruby_basic_operators = 33; -pub const BOP_LAST_: ruby_basic_operators = 34; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; pub const imemo_env: imemo_type = 0; diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 2256b7e32d3979..fb329a07166914 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -392,22 +392,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_HASH: ruby_basic_operators = 26; -pub const BOP_CALL: ruby_basic_operators = 27; -pub const BOP_AND: ruby_basic_operators = 28; -pub const BOP_OR: ruby_basic_operators = 29; -pub const BOP_CMP: ruby_basic_operators = 30; -pub const BOP_DEFAULT: ruby_basic_operators = 31; -pub const BOP_PACK: ruby_basic_operators = 32; -pub const BOP_INCLUDE_P: ruby_basic_operators = 33; -pub const BOP_LAST_: ruby_basic_operators = 34; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; #[repr(C)] From 6db83a00a4272eb1089d67da83e1cd9d4e10227b Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 22:44:53 -0500 Subject: [PATCH 09/15] ZJIT: Specialize Integer#>> Same as Integer#>>. Also add more strict type checks for both Integer#>> and Integer#<<. --- zjit/src/codegen.rs | 15 ++++++ zjit/src/cruby_methods.rs | 11 ++++ zjit/src/hir.rs | 22 +++++++- zjit/src/hir/opt_tests.rs | 105 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 152 insertions(+), 1 deletion(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 6fc85664693fa3..df9a9299cf2d80 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -409,6 +409,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio let shift_amount = function.type_of(right).fixnum_value().unwrap() as u64; gen_fixnum_lshift(jit, asm, opnd!(left), shift_amount, &function.frame_state(state)) } + &Insn::FixnumRShift { left, right } => { + // We only create FixnumRShift when we know the shift amount statically and it's in [0, + // 63]. + let shift_amount = function.type_of(right).fixnum_value().unwrap() as u64; + gen_fixnum_rshift(asm, opnd!(left), shift_amount) + } &Insn::FixnumMod { left, right, state } => gen_fixnum_mod(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), &Insn::IsMethodCfunc { val, cd, cfunc, state: _ } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), @@ -1754,6 +1760,15 @@ fn gen_fixnum_lshift(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, s out_val } +/// Compile Fixnum >> Fixnum +fn gen_fixnum_rshift(asm: &mut Assembler, left: lir::Opnd, shift_amount: u64) -> lir::Opnd { + // Shift amount is known statically to be in the range [0, 63] + assert!(shift_amount < 64); + let result = asm.rshift(left, shift_amount.into()); + // Re-tag the output value + asm.or(result, 1.into()) +} + fn gen_fixnum_mod(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Check for left % 0, which raises ZeroDivisionError asm.cmp(right, Opnd::from(VALUE::fixnum_from_usize(0))); diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index f86d383876fbfc..f84882101cc271 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -242,6 +242,7 @@ pub fn init() -> Annotations { annotate!(rb_cInteger, "<", inline_integer_lt); annotate!(rb_cInteger, "<=", inline_integer_le); annotate!(rb_cInteger, "<<", inline_integer_lshift); + annotate!(rb_cInteger, ">>", inline_integer_rshift); annotate!(rb_cInteger, "to_s", types::StringExact); annotate!(rb_cString, "to_s", inline_string_to_s, types::StringExact); let thread_singleton = unsafe { rb_singleton_class(rb_cThread) }; @@ -575,6 +576,16 @@ fn inline_integer_lshift(fun: &mut hir::Function, block: hir::BlockId, recv: hir try_inline_fixnum_op(fun, block, &|left, right| hir::Insn::FixnumLShift { left, right, state }, BOP_LTLT, recv, other, state) } +fn inline_integer_rshift(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { + let &[other] = args else { return None; }; + // Only convert to FixnumLShift if we know the shift amount is known at compile-time and could + // plausibly create a fixnum. + let Some(other_value) = fun.type_of(other).fixnum_value() else { return None; }; + // TODO(max): If other_value > 63, rewrite to constant zero. + if other_value < 0 || other_value > 63 { return None; } + try_inline_fixnum_op(fun, block, &|left, right| hir::Insn::FixnumRShift { left, right }, BOP_GTGT, recv, other, state) +} + fn inline_basic_object_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], _state: hir::InsnId) -> Option { let &[other] = args else { return None; }; let c_result = fun.push_insn(block, hir::Insn::IsBitEqual { left: recv, right: other }); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 6604c52a8276e6..a69628b8690d67 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -891,6 +891,7 @@ pub enum Insn { FixnumOr { left: InsnId, right: InsnId }, FixnumXor { left: InsnId, right: InsnId }, FixnumLShift { left: InsnId, right: InsnId, state: InsnId }, + FixnumRShift { left: InsnId, right: InsnId }, // Distinct from `SendWithoutBlock` with `mid:to_s` because does not have a patch point for String to_s being redefined ObjToString { val: InsnId, cd: *const rb_call_data, state: InsnId }, @@ -989,6 +990,7 @@ impl Insn { Insn::FixnumOr { .. } => false, Insn::FixnumXor { .. } => false, Insn::FixnumLShift { .. } => false, + Insn::FixnumRShift { .. } => false, Insn::GetLocal { .. } => false, Insn::IsNil { .. } => false, Insn::LoadPC => false, @@ -1233,6 +1235,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumOr { left, right, .. } => { write!(f, "FixnumOr {left}, {right}") }, Insn::FixnumXor { left, right, .. } => { write!(f, "FixnumXor {left}, {right}") }, Insn::FixnumLShift { left, right, .. } => { write!(f, "FixnumLShift {left}, {right}") }, + Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, @@ -1854,6 +1857,7 @@ impl Function { &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, &FixnumXor { left, right } => FixnumXor { left: find!(left), right: find!(right) }, &FixnumLShift { left, right, state } => FixnumLShift { left: find!(left), right: find!(right), state }, + &FixnumRShift { left, right } => FixnumRShift { left: find!(left), right: find!(right) }, &ObjToString { val, cd, state } => ObjToString { val: find!(val), cd, @@ -2076,6 +2080,7 @@ impl Function { Insn::FixnumOr { .. } => types::Fixnum, Insn::FixnumXor { .. } => types::Fixnum, Insn::FixnumLShift { .. } => types::Fixnum, + Insn::FixnumRShift { .. } => types::Fixnum, Insn::PutSpecialObject { .. } => types::BasicObject, Insn::SendWithoutBlock { .. } => types::BasicObject, Insn::SendWithoutBlockDirect { .. } => types::BasicObject, @@ -3639,6 +3644,7 @@ impl Function { | &Insn::FixnumAnd { left, right } | &Insn::FixnumOr { left, right } | &Insn::FixnumXor { left, right } + | &Insn::FixnumRShift { left, right } | &Insn::IsBitEqual { left, right } | &Insn::IsBitNotEqual { left, right } => { @@ -4403,12 +4409,26 @@ impl Function { | Insn::FixnumAnd { left, right } | Insn::FixnumOr { left, right } | Insn::FixnumXor { left, right } - | Insn::FixnumLShift { left, right, .. } | Insn::NewRangeFixnum { low: left, high: right, .. } => { self.assert_subtype(insn_id, left, types::Fixnum)?; self.assert_subtype(insn_id, right, types::Fixnum) } + Insn::FixnumLShift { left, right, .. } + | Insn::FixnumRShift { left, right, .. } => { + self.assert_subtype(insn_id, left, types::Fixnum)?; + self.assert_subtype(insn_id, right, types::Fixnum)?; + let Some(obj) = self.type_of(right).fixnum_value() else { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), "".into())); + }; + if obj < 0 { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), format!("{obj}"))); + } + if obj > 63 { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), format!("{obj}"))); + } + Ok(()) + } Insn::GuardBitEquals { val, expected, .. } => { match expected { Const::Value(_) => self.assert_subtype(insn_id, val, types::RubyValue), diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 60f5814973c482..610986d62fe07f 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -6825,6 +6825,111 @@ mod hir_opt_tests { "); } + #[test] + fn test_inline_integer_gtgt_with_known_fixnum() { + eval(" + def test(x) = x >> 5 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[5] = Const Value(5) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:Fixnum = FixnumRShift v23, v14 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_negative() { + eval(" + def test(x) = x >> -5 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[-5] = Const Value(-5) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:BasicObject = CCallWithFrame v23, :Integer#>>@0x1038, v14 + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_out_of_range() { + eval(" + def test(x) = x >> 64 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[64] = Const Value(64) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:BasicObject = CCallWithFrame v23, :Integer#>>@0x1038, v14 + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_unknown_fixnum() { + eval(" + def test(x, y) = x >> y + test(4, 5) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v25:Fixnum = GuardType v11, Fixnum + v26:BasicObject = CCallWithFrame v25, :Integer#>>@0x1038, v12 + CheckInterrupts + Return v26 + "); + } + #[test] fn test_optimize_string_append() { eval(r#" From 74e9f717200106be553e954e8ac354b54acf1c60 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 23:18:13 -0500 Subject: [PATCH 10/15] ZJIT: Mark String#ascii_only? as leaf --- zjit/src/cruby_methods.rs | 4 ++++ zjit/src/hir/opt_tests.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index f84882101cc271..155979d105ae16 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -206,6 +206,10 @@ pub fn init() -> Annotations { annotate!(rb_cString, "empty?", inline_string_empty_p, types::BoolExact, no_gc, leaf, elidable); annotate!(rb_cString, "<<", inline_string_append); annotate!(rb_cString, "==", inline_string_eq); + // Not elidable; has a side effect of setting the encoding if ENC_CODERANGE_UNKNOWN. + // TOOD(max): Turn this into a load/compare. Will need to side-exit or do the full call if + // ENC_CODERANGE_UNKNOWN. + annotate!(rb_cString, "ascii_only?", types::BoolExact, no_gc, leaf); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); annotate!(rb_cModule, "===", inline_module_eqq, types::BoolExact, no_gc, leaf); annotate!(rb_cArray, "length", types::Fixnum, no_gc, leaf, elidable); diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 610986d62fe07f..60135b6ac9e969 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -7100,6 +7100,33 @@ mod hir_opt_tests { "); } + #[test] + fn test_optimize_string_ascii_only_p() { + eval(r#" + def test(x) = x.ascii_only? + test("iron") + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ascii_only?@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v22:StringExact = GuardType v9, StringExact + IncrCounter inline_cfunc_optimized_send_count + v24:BoolExact = CCall v22, :String#ascii_only?@0x1038 + CheckInterrupts + Return v24 + "); + } + #[test] fn test_dont_optimize_when_passing_too_few_args() { eval(r#" From 985a4d977f1c1a5051c88b0c2f8af8913bd93008 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 23:30:05 -0500 Subject: [PATCH 11/15] ZJIT: Open-code String#getbyte Don't call a C function. --- zjit/src/codegen.rs | 33 +++++++++++++++++++++++++++++---- zjit/src/cruby_methods.rs | 16 +++++++++++++++- zjit/src/hir.rs | 18 +++++++++--------- zjit/src/hir/opt_tests.rs | 14 ++++++++++++-- 4 files changed, 65 insertions(+), 16 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index df9a9299cf2d80..ac1c65b26e1c13 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -365,7 +365,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // If it happens we abort the compilation for now Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), - &Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)), + &Insn::StringGetbyte { string, index } => gen_string_getbyte(asm, opnd!(string), opnd!(index)), Insn::StringSetbyteFixnum { string, index, value } => gen_string_setbyte_fixnum(asm, opnd!(string), opnd!(index), opnd!(value)), Insn::StringAppend { recv, other, state } => gen_string_append(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), Insn::StringAppendCodepoint { recv, other, state } => gen_string_append_codepoint(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), @@ -2496,9 +2496,34 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec result } -fn gen_string_getbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd { - // TODO(max): Open-code rb_str_getbyte to avoid a call - asm_ccall!(asm, rb_str_getbyte, string, index) +// Generate RSTRING_PTR +fn get_string_ptr(asm: &mut Assembler, string: Opnd) -> Opnd { + asm_comment!(asm, "get string pointer for embedded or heap"); + let string = asm.load(string); + let flags = Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags, (RSTRING_NOEMBED as u64).into()); + let heap_ptr = asm.load(Opnd::mem( + usize::BITS as u8, + string, + RUBY_OFFSET_RSTRING_AS_HEAP_PTR, + )); + // Load the address of the embedded array + // (struct RString *)(obj)->as.ary + let ary = asm.lea(Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RSTRING_AS_ARY)); + asm.csel_nz(heap_ptr, ary) +} + +fn gen_string_getbyte(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd { + let string_ptr = get_string_ptr(asm, string); + // TODO(max): Use SIB indexing here once the backend supports it + let string_ptr = asm.add(string_ptr, index); + let byte = asm.load(Opnd::mem(8, string_ptr, 0)); + // Zero-extend the byte to 64 bits + let byte = byte.with_num_bits(64); + let byte = asm.and(byte, 0xFF.into()); + // Tag the byte + let byte = asm.lshift(byte, Opnd::UImm(1)); + asm.or(byte, Opnd::UImm(1)) } fn gen_string_setbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd, value: Opnd) -> Opnd { diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 155979d105ae16..0f5c992b88f892 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -376,7 +376,21 @@ fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir // String#getbyte with a Fixnum is leaf and nogc; otherwise it may run arbitrary Ruby code // when converting the index to a C integer. let index = fun.coerce_to(block, index, types::Fixnum, state); - let result = fun.push_insn(block, hir::Insn::StringGetbyteFixnum { string: recv, index }); + let unboxed_index = fun.push_insn(block, hir::Insn::UnboxFixnum { val: index }); + let len = fun.push_insn(block, hir::Insn::LoadField { + recv, + id: ID!(len), + offset: RUBY_OFFSET_RSTRING_LEN as i32, + return_type: types::CInt64, + }); + // TODO(max): Find a way to mark these guards as not needed for correctness... as in, once + // the data dependency is gone (say, the StringGetbyte is elided), they can also be elided. + // + // This is unlike most other guards. + let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, state }); + let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); + let _ = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: unboxed_index, right: zero, state }); + let result = fun.push_insn(block, hir::Insn::StringGetbyte { string: recv, index: unboxed_index }); return Some(result); } None diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index a69628b8690d67..49f139b45e76a7 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -649,7 +649,7 @@ pub enum Insn { StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec, state: InsnId }, /// Call rb_str_getbyte with known-Fixnum index - StringGetbyteFixnum { string: InsnId, index: InsnId }, + StringGetbyte { string: InsnId, index: InsnId }, StringSetbyteFixnum { string: InsnId, index: InsnId, value: InsnId }, StringAppend { recv: InsnId, other: InsnId, state: InsnId }, StringAppendCodepoint { recv: InsnId, other: InsnId, state: InsnId }, @@ -1004,7 +1004,7 @@ impl Insn { // but we don't have type information here in `impl Insn`. See rb_range_new(). Insn::NewRange { .. } => true, Insn::NewRangeFixnum { .. } => false, - Insn::StringGetbyteFixnum { .. } => false, + Insn::StringGetbyte { .. } => false, Insn::IsBlockGiven => false, Insn::BoxFixnum { .. } => false, Insn::BoxBool { .. } => false, @@ -1118,8 +1118,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } - Insn::StringGetbyteFixnum { string, index, .. } => { - write!(f, "StringGetbyteFixnum {string}, {index}") + Insn::StringGetbyte { string, index, .. } => { + write!(f, "StringGetbyte {string}, {index}") } Insn::StringSetbyteFixnum { string, index, value, .. } => { write!(f, "StringSetbyteFixnum {string}, {index}, {value}") @@ -1818,7 +1818,7 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, - &StringGetbyteFixnum { string, index } => StringGetbyteFixnum { string: find!(string), index: find!(index) }, + &StringGetbyte { string, index } => StringGetbyte { string: find!(string), index: find!(index) }, &StringSetbyteFixnum { string, index, value } => StringSetbyteFixnum { string: find!(string), index: find!(index), value: find!(value) }, &StringAppend { recv, other, state } => StringAppend { recv: find!(recv), other: find!(other), state: find!(state) }, &StringAppendCodepoint { recv, other, state } => StringAppendCodepoint { recv: find!(recv), other: find!(other), state: find!(state) }, @@ -2038,7 +2038,7 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, - Insn::StringGetbyteFixnum { .. } => types::Fixnum.union(types::NilClass), + Insn::StringGetbyte { .. } => types::Fixnum, Insn::StringSetbyteFixnum { .. } => types::Fixnum, Insn::StringAppend { .. } => types::StringExact, Insn::StringAppendCodepoint { .. } => types::StringExact, @@ -3566,7 +3566,7 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } - &Insn::StringGetbyteFixnum { string, index } => { + &Insn::StringGetbyte { string, index } => { worklist.push_back(string); worklist.push_back(index); } @@ -4450,9 +4450,9 @@ impl Function { self.assert_subtype(insn_id, left, types::CInt64)?; self.assert_subtype(insn_id, right, types::CInt64) }, - Insn::StringGetbyteFixnum { string, index } => { + Insn::StringGetbyte { string, index } => { self.assert_subtype(insn_id, string, types::String)?; - self.assert_subtype(insn_id, index, types::Fixnum) + self.assert_subtype(insn_id, index, types::CInt64) }, Insn::StringSetbyteFixnum { string, index, value } => { self.assert_subtype(insn_id, string, types::String)?; diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 60135b6ac9e969..bdc26f758e107e 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -6451,10 +6451,15 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1000) v26:StringExact = GuardType v11, StringExact v27:Fixnum = GuardType v12, Fixnum - v28:NilClass|Fixnum = StringGetbyteFixnum v26, v27 + v28:CInt64 = UnboxFixnum v27 + v29:CInt64 = LoadField v26, :len@0x1038 + v30:CInt64 = GuardLess v28, v29 + v31:CInt64[0] = Const CInt64(0) + v32:CInt64 = GuardGreaterEq v30, v31 + v33:Fixnum = StringGetbyte v26, v30 IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v28 + Return v33 "); } @@ -6483,6 +6488,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1000) v30:StringExact = GuardType v11, StringExact v31:Fixnum = GuardType v12, Fixnum + v32:CInt64 = UnboxFixnum v31 + v33:CInt64 = LoadField v30, :len@0x1038 + v34:CInt64 = GuardLess v32, v33 + v35:CInt64[0] = Const CInt64(0) + v36:CInt64 = GuardGreaterEq v34, v35 IncrCounter inline_cfunc_optimized_send_count v22:Fixnum[5] = Const Value(5) CheckInterrupts From b9f1976f2334b4d9f55be5607de408eb7973c188 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 24 Nov 2025 23:37:24 -0500 Subject: [PATCH 12/15] ZJIT: Add HIR test for VM_OPT_NEWARRAY_SEND_PACK_BUFFER --- zjit/src/hir/tests.rs | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 314eb7777cbf6c..79ba1d30c53443 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2121,7 +2121,42 @@ pub mod hir_build_tests { "); } - // TODO(max): Add a test for VM_OPT_NEWARRAY_SEND_PACK_BUFFER + #[test] + fn test_opt_newarray_send_pack_buffer() { + eval(r#" + def test(a,b) + sum = a+b + buf = "" + [a,b].pack 'C', buffer: buf + buf + end + "#); + assert_contains_opcode("test", YARVINSN_opt_newarray_send); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@7 + v3:BasicObject = GetLocal l0, SP@6 + v4:NilClass = Const Value(nil) + v5:NilClass = Const Value(nil) + Jump bb2(v1, v2, v3, v4, v5) + bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject): + EntryPoint JIT(0) + v11:NilClass = Const Value(nil) + v12:NilClass = Const Value(nil) + Jump bb2(v8, v9, v10, v11, v12) + bb2(v14:BasicObject, v15:BasicObject, v16:BasicObject, v17:NilClass, v18:NilClass): + v25:BasicObject = SendWithoutBlock v15, :+, v16 + v29:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v30:StringExact = StringCopy v29 + v36:StringExact[VALUE(0x1008)] = Const Value(VALUE(0x1008)) + v37:StringExact = StringCopy v36 + v39:BasicObject = GetLocal l0, EP@3 + SideExit UnhandledNewarraySend(PACK_BUFFER) + "); + } #[test] fn test_opt_newarray_send_include_p() { From d0bb505a04bcd6a3d86c01d7c402c1f6205e69b4 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Mon, 1 Dec 2025 11:57:11 -0800 Subject: [PATCH 13/15] ZJIT: Split Lea memory reads on x86_64 --- zjit/src/backend/x86_64/mod.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index d17286f51fda25..a9bd57f368c59b 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -559,7 +559,8 @@ impl Assembler { asm.store(mem_out, SCRATCH0_OPND); } } - Insn::Lea { out, .. } => { + Insn::Lea { opnd, out } => { + *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state); let mem_out = split_memory_write(out, SCRATCH0_OPND); asm.push_insn(insn); if let Some(mem_out) = mem_out { @@ -1804,4 +1805,20 @@ mod tests { "); assert_snapshot!(cb.hexdump(), @"4c8b55f84c8b5df04d8b5b024d0f441a4c895df8"); } + + #[test] + fn test_lea_split_memory_read() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = Opnd::Mem(Mem { base: MemBase::Stack { stack_idx: 0, num_bits: 64 }, disp: 0, num_bits: 64 }); + let _ = asm.lea(opnd); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm_snapshot!(cb.disasm(), @" + 0x0: mov r11, qword ptr [rbp - 8] + 0x4: lea r11, [r11] + 0x7: mov qword ptr [rbp - 8], r11 + "); + assert_snapshot!(cb.hexdump(), @"4c8b5df84d8d1b4c895df8"); + } } From 2dfb8149d58694cd578dbe2222640499ac021231 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 11 Jan 2025 18:26:09 +0900 Subject: [PATCH 14/15] Clean generated transcoders --- enc/Makefile.in | 1 + enc/depend | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/enc/Makefile.in b/enc/Makefile.in index 17888f8f9b5e10..2a3c45169fbb22 100644 --- a/enc/Makefile.in +++ b/enc/Makefile.in @@ -31,6 +31,7 @@ BUILTIN_ENCS = enc/ascii.c enc/us_ascii.c\ enc/unicode.c enc/utf_8.c BUILTIN_TRANSES = enc/trans/newline.trans +BUILTIN_TRANS_CSRCS = $(BUILTIN_TRANSES:.trans=.c) RUBY_SO_NAME = @RUBY_SO_NAME@ LIBRUBY = @LIBRUBY@ diff --git a/enc/depend b/enc/depend index a458b63887e858..4bf97dc880f195 100644 --- a/enc/depend +++ b/enc/depend @@ -157,15 +157,15 @@ clean: % end % unless inplace $(Q)$(RM) enc/unicode/*/casefold.h enc/unicode/*/name2ctype.h - $(Q)$(RM) enc/jis/props.h - -$(Q)$(RMDIR) enc/unicode<%=ignore_error%> + $(Q)$(RM) enc/trans/newline.c enc/jis/props.h + -$(Q)$(RMDIR) enc/trnas enc/unicode<%=ignore_error%> % end % workdirs.reverse_each do|d| -$(Q)$(RMDIR) <%=pathrep[d]%><%=ignore_error%> % end clean-srcs: - $(Q)$(RM) <%=pathrep['$(TRANSCSRCS)']%> + $(Q)$(RM) <%=pathrep['$(TRANSCSRCS)']%> <%=pathrep['$(BUILTIN_TRANS_CSRCS)']%> -$(Q)$(RMDIR) <%=pathrep['enc/trans']%><%=ignore_error%> $(Q)$(RM) enc/unicode/*/casefold.h enc/unicode/*/name2ctype.h $(Q)$(RM) enc/jis/props.h From 07ea9a38097c088efd6c2872f2a563dbc69a544a Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Tue, 25 Nov 2025 23:26:02 +0100 Subject: [PATCH 15/15] ZJIT: Optimize GetIvar for non-T_OBJECT * All Invariant::SingleRactorMode PatchPoint are replaced by assume_single_ractor_mode() to fix https://github.com/Shopify/ruby/issues/875 for SingleRactorMode patchpoints. --- test/ruby/test_zjit.rb | 30 ++++++- zjit/bindgen/src/main.rs | 1 + zjit/src/codegen.rs | 5 ++ zjit/src/cruby.rs | 1 + zjit/src/cruby_bindings.inc.rs | 1 + zjit/src/hir.rs | 87 +++++++++++++------- zjit/src/hir/opt_tests.rs | 142 +++++++++++++++++++++++++++++++++ 7 files changed, 236 insertions(+), 31 deletions(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index d821d8ad5cca69..6609bb2461c31d 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -3017,7 +3017,35 @@ def test test Ractor.new { test }.value - } + }, call_threshold: 2 + end + + def test_ivar_get_with_already_multi_ractor_mode + assert_compiles '42', %q{ + class Foo + def self.set_bar + @bar = [] # needs to be a ractor unshareable object + end + + def self.bar + @bar + rescue Ractor::IsolationError + 42 + end + end + + Foo.set_bar + r = Ractor.new { + Ractor.receive + Foo.bar + } + + Foo.bar + Foo.bar + + r << :go + r.value + }, call_threshold: 2 end def test_ivar_set_with_multi_ractor_mode diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 6659049242983b..0860c073cd17ed 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -326,6 +326,7 @@ fn main() { .allowlist_function("rb_attr_get") .allowlist_function("rb_ivar_defined") .allowlist_function("rb_ivar_get") + .allowlist_function("rb_ivar_get_at_no_ractor_check") .allowlist_function("rb_ivar_set") .allowlist_function("rb_mod_name") .allowlist_var("rb_vm_insn_count") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index ac1c65b26e1c13..bb19d7d8209ff8 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -349,6 +349,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::Const { val: Const::Value(val) } => gen_const_value(val), &Insn::Const { val: Const::CPtr(val) } => gen_const_cptr(val), &Insn::Const { val: Const::CInt64(val) } => gen_const_long(val), + &Insn::Const { val: Const::CUInt16(val) } => gen_const_uint16(val), Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"), Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)), Insn::NewHash { elements, state } => gen_new_hash(jit, asm, opnds!(elements), &function.frame_state(*state)), @@ -1154,6 +1155,10 @@ fn gen_const_long(val: i64) -> lir::Opnd { Opnd::Imm(val) } +fn gen_const_uint16(val: u16) -> lir::Opnd { + Opnd::UImm(val as u64) +} + /// Compile a basic block argument fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { // Allocate a register or a stack slot diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 72c44ccc6e412f..9ed3a1abf79ca2 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1381,6 +1381,7 @@ pub(crate) mod ids { name: _as_heap name: thread_ptr name: self_ content: b"self" + name: rb_ivar_get_at_no_ractor_check } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index fb329a07166914..7bd392563987a3 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -2034,6 +2034,7 @@ unsafe extern "C" { pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t; pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; pub fn rb_shape_transition_add_ivar_no_warnings(obj: VALUE, id: ID) -> shape_id_t; + pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE; pub fn rb_gvar_get(arg1: ID) -> VALUE; pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 49f139b45e76a7..5ede64d42c2d80 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1742,6 +1742,15 @@ impl Function { self.blocks.len() } + pub fn assume_single_ractor_mode(&mut self, block: BlockId, state: InsnId) -> bool { + if unsafe { rb_jit_multi_ractor_p() } { + false + } else { + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); + true + } + } + /// Return a copy of the instruction where the instruction and its operands have been read from /// the union-find table (to find the current most-optimized version of this instruction). See /// [`UnionFind`] for more. @@ -2377,6 +2386,17 @@ impl Function { } } + fn is_metaclass(&self, object: VALUE) -> bool { + unsafe { + if RB_TYPE_P(object, RUBY_T_CLASS) && rb_zjit_singleton_class_p(object) { + let attached = rb_class_attached_object(object); + RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) + } else { + false + } + } + } + /// Rewrite SendWithoutBlock opcodes into SendWithoutBlockDirect opcodes if we know the target /// ISEQ statically. This removes run-time method lookups and opens the door for inlining. /// Also try and inline constant caches, specialize object allocations, and more. @@ -2483,9 +2503,9 @@ impl Function { // Patch points: // Check for "defined with an un-shareable Proc in a different Ractor" - if !procv.shareable_p() { + if !procv.shareable_p() && !self.assume_single_ractor_mode(block, state) { // TODO(alan): Turn this into a ractor belonging guard to work better in multi ractor mode. - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); + self.push_insn_id(block, insn_id); continue; } self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if klass.instance_can_have_singleton_class() { @@ -2498,6 +2518,12 @@ impl Function { let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args, state }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_IVAR && args.is_empty() { + // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. + // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. + if self.is_metaclass(klass) && !self.assume_single_ractor_mode(block, state) { + self.push_insn_id(block, insn_id); continue; + } + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if klass.instance_can_have_singleton_class() { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass }, state }); @@ -2507,31 +2533,21 @@ impl Function { } let id = unsafe { get_cme_def_body_attr_id(cme) }; - // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. - // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. - if unsafe { rb_zjit_singleton_class_p(klass) } { - let attached = unsafe { rb_class_attached_object(klass) }; - if unsafe { RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) } { - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); - } - } let getivar = self.push_insn(block, Insn::GetIvar { self_val: recv, id, ic: std::ptr::null(), state }); self.make_equal_to(insn_id, getivar); } else if let (VM_METHOD_TYPE_ATTRSET, &[val]) = (def_type, args.as_slice()) { + // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. + // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. + if self.is_metaclass(klass) && !self.assume_single_ractor_mode(block, state) { + self.push_insn_id(block, insn_id); continue; + } + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); } let id = unsafe { get_cme_def_body_attr_id(cme) }; - // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. - // We omit gen_prepare_non_leaf_call on gen_setivar, so it's unsafe to raise for multi-ractor mode. - if unsafe { rb_zjit_singleton_class_p(klass) } { - let attached = unsafe { rb_class_attached_object(klass) }; - if unsafe { RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) } { - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); - } - } self.push_insn(block, Insn::SetIvar { self_val: recv, id, ic: std::ptr::null(), val, state }); self.make_equal_to(insn_id, val); } else if def_type == VM_METHOD_TYPE_OPTIMIZED { @@ -2657,12 +2673,9 @@ impl Function { self.push_insn_id(block, insn_id); continue; } let cref_sensitive = !unsafe { (*ice).ic_cref }.is_null(); - let multi_ractor_mode = unsafe { rb_jit_multi_ractor_p() }; - if cref_sensitive || multi_ractor_mode { + if cref_sensitive || !self.assume_single_ractor_mode(block, state) { self.push_insn_id(block, insn_id); continue; } - // Assume single-ractor mode. - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); // Invalidate output code on any constant writes associated with constants // referenced after the PatchPoint. self.push_insn(block, Insn::PatchPoint { invariant: Invariant::StableConstantNames { idlist }, state }); @@ -2829,11 +2842,6 @@ impl Function { self.push_insn_id(block, insn_id); continue; } assert!(recv_type.shape().is_valid()); - if !recv_type.flags().is_t_object() { - // Check if the receiver is a T_OBJECT - self.push_insn(block, Insn::IncrCounter(Counter::getivar_fallback_not_t_object)); - self.push_insn_id(block, insn_id); continue; - } if recv_type.shape().is_too_complex() { // too-complex shapes can't use index access self.push_insn(block, Insn::IncrCounter(Counter::getivar_fallback_too_complex)); @@ -2847,6 +2855,17 @@ impl Function { // entered the compiler. That means we can just return nil for this // shape + iv name self.push_insn(block, Insn::Const { val: Const::Value(Qnil) }) + } else if !recv_type.flags().is_t_object() { + // NOTE: it's fine to use rb_ivar_get_at_no_ractor_check because + // getinstancevariable does assume_single_ractor_mode() + let ivar_index_insn: InsnId = self.push_insn(block, Insn::Const { val: Const::CUInt16(ivar_index as u16) }); + self.push_insn(block, Insn::CCall { + cfunc: rb_ivar_get_at_no_ractor_check as *const u8, + recv: self_val, + args: vec![ivar_index_insn], + name: ID!(rb_ivar_get_at_no_ractor_check), + return_type: types::BasicObject, + elidable: true }) } else if recv_type.flags().is_embedded() { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h let offset = ROBJECT_OFFSET_AS_ARY as i32 + (SIZEOF_VALUE * ivar_index.to_usize()) as i32; @@ -4277,6 +4296,7 @@ impl Function { | Insn::ObjectAlloc { val, .. } | Insn::DupArrayInclude { target: val, .. } | Insn::GetIvar { self_val: val, .. } + | Insn::CCall { recv: val, .. } | Insn::FixnumBitCheck { val, .. } // TODO (https://github.com/Shopify/ruby/issues/859) this should check Fixnum, but then test_checkkeyword_tests_fixnum_bit fails | Insn::DefinedIvar { self_val: val, .. } => { self.assert_subtype(insn_id, val, types::BasicObject) @@ -4298,7 +4318,6 @@ impl Function { | Insn::Send { recv, ref args, .. } | Insn::SendForward { recv, ref args, .. } | Insn::InvokeSuper { recv, ref args, .. } - | Insn::CCall { recv, ref args, .. } | Insn::CCallWithFrame { recv, ref args, .. } | Insn::CCallVariadic { recv, ref args, .. } | Insn::ArrayInclude { target: recv, elements: ref args, .. } => { @@ -5693,7 +5712,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { // ic is in arg 1 // Assume single-Ractor mode to omit gen_prepare_non_leaf_call on gen_getivar // TODO: We only really need this if self_val is a class/module - fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state: exit_id }); + if !fun.assume_single_ractor_mode(block, exit_id) { + // gen_getivar assumes single Ractor; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledYARVInsn(opcode) }); + break; // End the block + } let result = fun.push_insn(block, Insn::GetIvar { self_val: self_param, id, ic, state: exit_id }); state.stack_push(result); } @@ -5702,7 +5725,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let ic = get_arg(pc, 1).as_ptr(); // Assume single-Ractor mode to omit gen_prepare_non_leaf_call on gen_setivar // TODO: We only really need this if self_val is a class/module - fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state: exit_id }); + if !fun.assume_single_ractor_mode(block, exit_id) { + // gen_setivar assumes single Ractor; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledYARVInsn(opcode) }); + break; // End the block + } let val = state.stack_pop()?; fun.push_insn(block, Insn::SetIvar { self_val: self_param, id, ic, val, state: exit_id }); } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index bdc26f758e107e..9809c8bffbee01 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -5231,6 +5231,148 @@ mod hir_opt_tests { "); } + #[test] + fn test_optimize_getivar_on_module() { + eval(" + module M + @foo = 42 + def self.test = @foo + end + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_class() { + eval(" + class C + @foo = 42 + def self.test = @foo + end + C.test + "); + assert_snapshot!(hir_string_proc("C.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_t_data() { + eval(" + class C < Range + def test = @a + end + obj = C.new 0, 1 + obj.instance_variable_set(:@a, 1) + obj.test + TEST = C.instance_method(:test) + "); + assert_snapshot!(hir_string_proc("TEST"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_module_multi_ractor() { + eval(" + module M + @foo = 42 + def self.test = @foo + end + Ractor.new {}.value + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + SideExit UnhandledYARVInsn(getinstancevariable) + "); + } + + #[test] + fn test_optimize_attr_reader_on_module_multi_ractor() { + eval(" + module M + @foo = 42 + class << self + attr_reader :foo + end + def self.test = foo + end + Ractor.new {}.value + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:7: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = SendWithoutBlock v6, :foo + CheckInterrupts + Return v11 + "); + } + #[test] fn test_dont_optimize_getivar_polymorphic() { set_call_threshold(3);