From 79f52020e2fcc167415e06f16e5d9868571908b3 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 13 Aug 2025 15:08:18 -0400 Subject: [PATCH 01/10] Don't change Encoding.default_internal in assert_raise_with_message For most tests (except two), we don't need to change Encoding.default_internal in assert_raise_with_message. We're trying to run the test suite across multiple Ractors and modifying Encoding.default_internal can cause other concurrently running tests to fail. --- test/ruby/test_numeric.rb | 10 ++++++---- test/ruby/test_regexp.rb | 10 ++++++---- tool/lib/core_assertions.rb | 9 +++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb index ab492743f6ec34..cf408fac0af411 100644 --- a/test/ruby/test_numeric.rb +++ b/test/ruby/test_numeric.rb @@ -22,10 +22,12 @@ def test_coerce assert_raise_with_message(TypeError, /:\u{3042}/) {1&:"\u{3042}"} assert_raise_with_message(TypeError, /:\u{3042}/) {1|:"\u{3042}"} assert_raise_with_message(TypeError, /:\u{3042}/) {1^:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1+:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1&:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1|:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1^:"\u{3042}"} + EnvUtil.with_default_internal(Encoding::US_ASCII) do + assert_raise_with_message(TypeError, /:"\\u3042"/) {1+:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1&:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1|:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1^:"\u{3042}"} + end assert_raise_with_message(TypeError, /:\u{3044}/) {1+"\u{3044}".to_sym} assert_raise_with_message(TypeError, /:\u{3044}/) {1&"\u{3044}".to_sym} assert_raise_with_message(TypeError, /:\u{3044}/) {1|"\u{3044}".to_sym} diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 7885acc87e6112..6cc0ac672f165f 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1036,10 +1036,12 @@ def test_match_without_regexp [Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc| idx = key.encode(enc) pat = /#{idx}/ - test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + EnvUtil.with_default_internal(enc) do + test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + end end test.call {|m| assert_equal(/a/, m.regexp) } test.call {|m| assert_equal("abc", m.string) } diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index 00d180fa8ce088..47cc6574c878d1 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -501,13 +501,10 @@ def assert_raise_with_message(exception, expected, msg = nil, &block) assert = :assert_match end - ex = m = nil - EnvUtil.with_default_internal(of: expected) do - ex = assert_raise(exception, msg || proc {"Exception(#{exception}) with message matches to #{expected.inspect}"}) do - yield - end - m = ex.message + ex = assert_raise(exception, msg || proc {"Exception(#{exception}) with message matches to #{expected.inspect}"}) do + yield end + m = ex.message msg = message(msg, "") {"Expected Exception(#{exception}) was raised, but the message doesn't match"} if assert == :assert_equal From 8f7e7bfca23ff5e402ed9c22a8f684fe59de72e4 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 09:05:36 -0700 Subject: [PATCH 02/10] ZJIT: Set PC before function_stub_hit_body (#14234) --- zjit/src/codegen.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 43fde7db7fd629..995d8ba6072a08 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -1396,13 +1396,12 @@ c_callable! { /// instructions, so this should be used primarily for cb.has_dropped_bytes() situations. fn function_stub_hit(iseq: IseqPtr, branch_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 { with_vm_lock(src_loc!(), || { - /// gen_push_frame() doesn't set PC and SP, so we need to set them before exit - fn set_pc_and_sp(iseq: IseqPtr, ec: EcPtr, sp: *mut VALUE) { - let cfp = unsafe { get_ec_cfp(ec) }; - let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported - unsafe { rb_set_cfp_pc(cfp, pc) }; - unsafe { rb_set_cfp_sp(cfp, sp) }; - } + // gen_push_frame() doesn't set PC and SP, so we need to set them before exit. + // function_stub_hit_body() may allocate and call gc_validate_pc(), so we always set PC. + let cfp = unsafe { get_ec_cfp(ec) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported + unsafe { rb_set_cfp_pc(cfp, pc) }; + unsafe { rb_set_cfp_sp(cfp, sp) }; // If we already know we can't compile the ISEQ, fail early without cb.mark_all_executable(). // TODO: Alan thinks the payload status part of this check can happen without the VM lock, since the whole @@ -1411,7 +1410,6 @@ c_callable! { let payload = get_or_create_iseq_payload(iseq); if cb.has_dropped_bytes() || payload.status == IseqStatus::CantCompile { // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); return ZJITState::get_exit_code().raw_ptr(cb); } @@ -1421,7 +1419,6 @@ c_callable! { code_ptr } else { // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); ZJITState::get_exit_code() }; cb.mark_all_executable(); From 7c67060dad5ae1582e91a88841cc0cf5c6edeea3 Mon Sep 17 00:00:00 2001 From: Luke Gruber Date: Fri, 15 Aug 2025 11:15:58 -0400 Subject: [PATCH 03/10] Fix enc_list across ractors Calling rb_ary_replace(copy, orig) can modify orig, which is not safe across ractors because orig is shared (it's the global encoding list). Hoping to address CI failures such as https://ci.rvm.jp/results/trunk-gc-asserts@ruby-sp2-noble-docker/5890058 --- encoding.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/encoding.c b/encoding.c index 78964476c001d3..0a5d61ee4a1692 100644 --- a/encoding.c +++ b/encoding.c @@ -1394,10 +1394,8 @@ enc_names(VALUE self) static VALUE enc_list(VALUE klass) { - VALUE ary = rb_ary_new2(ENCODING_LIST_CAPA); VALUE list = RUBY_ATOMIC_VALUE_LOAD(rb_encoding_list); - rb_ary_replace(ary, list); - return ary; + return rb_ary_dup(list); } /* From 0d0c051b19548eeb65417a20e52fb460e330f0ce Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Fri, 15 Aug 2025 10:41:39 -0700 Subject: [PATCH 04/10] Don't check for stack overflow when ec is NULL (#14236) Co-authored-by: Nobuyoshi Nakada --- signal.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/signal.c b/signal.c index 9edac5a7896fb7..f6b62b30147d87 100644 --- a/signal.c +++ b/signal.c @@ -877,7 +877,9 @@ static void check_stack_overflow(int sig, const void *addr) { int ruby_stack_overflowed_p(const rb_thread_t *, const void *); - rb_thread_t *th = GET_THREAD(); + rb_execution_context_t *ec = rb_current_execution_context(false); + if (!ec) return; + rb_thread_t *th = rb_ec_thread_ptr(ec); if (ruby_stack_overflowed_p(th, addr)) { reset_sigmask(sig); rb_ec_stack_overflow(th->ec, 1); From 9deaf68c1a6bd45b2104b028d868d80c9a69105e Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 09:54:45 -0700 Subject: [PATCH 05/10] Increase timeout for a flaky test https://github.com/ruby/ruby/actions/runs/16983250181/job/48147338699 --- test/ruby/test_regexp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 6cc0ac672f165f..9f1e03e6499782 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1990,7 +1990,7 @@ def test_bug_20886 end def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) - assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}", timeout: 60) global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect } expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect } From 12ef01b52f69dfdad2941e1b79c826b74ae0ed24 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 09:56:08 -0700 Subject: [PATCH 06/10] Skip an unstable test for macOS https://github.com/ruby/ruby/actions/runs/16983250181/job/48147338735 https://github.com/ruby/ruby/actions/runs/16994020689/job/48180211762 --- test/socket/test_unix.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/socket/test_unix.rb b/test/socket/test_unix.rb index 3e7d85befcc1ad..0d284b29264618 100644 --- a/test/socket/test_unix.rb +++ b/test/socket/test_unix.rb @@ -146,6 +146,7 @@ def test_fd_passing_n2 end def test_fd_passing_race_condition + omit 'randomly crashes on macOS' if RUBY_PLATFORM =~ /darwin/ r1, w = IO.pipe s1, s2 = UNIXSocket.pair s1.nonblock = s2.nonblock = true From 97cffb95e35c8e5b6fe6d76a80dc1d4d775d8094 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 10:59:51 -0700 Subject: [PATCH 07/10] Skip an unstable test for macOS (#14245) --- test/ruby/test_file_exhaustive.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb index bb64483a795203..b20b597256a125 100644 --- a/test/ruby/test_file_exhaustive.rb +++ b/test/ruby/test_file_exhaustive.rb @@ -1477,6 +1477,7 @@ def test_flock_shared end def test_test + omit 'timestamp check is unstable on macOS' if RUBY_PLATFORM =~ /darwin/ fn1 = regular_file hardlinkfile sleep(1.1) From 2f2e2bd7cd82e209dbb54a36dc4465045dd0b434 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 11:13:11 -0700 Subject: [PATCH 08/10] Relax a delta for an unstable test (#14246) https://github.com/ruby/ruby/actions/runs/16995978143/job/48186652826?pr=14244 --- test/-ext-/gvl/test_last_thread.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/-ext-/gvl/test_last_thread.rb b/test/-ext-/gvl/test_last_thread.rb index f63d98aab16f02..bcda0e33856c51 100644 --- a/test/-ext-/gvl/test_last_thread.rb +++ b/test/-ext-/gvl/test_last_thread.rb @@ -15,7 +15,7 @@ def test_last_thread t1 = Time.now t = t1 - t0 - assert_in_delta(1.0, t, 0.18) + assert_in_delta(1.0, t, 0.8) end; end end From 1d7ed95604d7f9b9847c0054d1c48704a0d1bded Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 15 Aug 2025 11:26:22 -0700 Subject: [PATCH 09/10] ZJIT: Make --zjit-dump-hir work with HIR opt disabled (#14242) --- zjit/src/codegen.rs | 1 + zjit/src/hir.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 995d8ba6072a08..ba7555485a3cd8 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -1313,6 +1313,7 @@ fn compile_iseq(iseq: IseqPtr) -> Option { if !get_option!(disable_hir_opt) { function.optimize(); } + function.dump_hir(); #[cfg(debug_assertions)] if let Err(err) = function.validate() { debug!("ZJIT: compile_iseq: {err:?}"); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index c93a6858f18fab..b6e18e73562857 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -2143,7 +2143,10 @@ impl Function { #[cfg(debug_assertions)] self.assert_validates(); self.eliminate_dead_code(); #[cfg(debug_assertions)] self.assert_validates(); + } + /// Dump HIR passed to codegen if specified by options. + pub fn dump_hir(&self) { // Dump HIR after optimization match get_option!(dump_hir_opt) { Some(DumpHIR::WithoutSnapshot) => println!("Optimized HIR:\n{}", FunctionPrinter::without_snapshot(&self)), @@ -2157,7 +2160,6 @@ impl Function { } } - /// Validates the following: /// 1. Basic block jump args match parameter arity. /// 2. Every terminator must be in the last position. From 2a1210f7cce284e07217e620313085e12e4d575d Mon Sep 17 00:00:00 2001 From: Eileen Date: Fri, 15 Aug 2025 14:31:25 -0400 Subject: [PATCH 10/10] ZJIT: Implement getspecial (#13642) ZJIT: Implement getspecial in ZJIT Adds support for the getspecial instruction in zjit. We split getspecial into two instructions, one for special symbols (`$&`, $'`, etc) and one for special backrefs (`$1`, `$2`, etc). Co-authored-by: Aaron Patterson --- test/ruby/test_zjit.rb | 100 +++++++++++++++++++++++++++++++++++++++++ zjit/src/codegen.rs | 35 ++++++++++++++- zjit/src/hir.rs | 57 +++++++++++++++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index e3880a2828cf2e..d30af737c3bde8 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1221,6 +1221,106 @@ def test = 1.nil? }, insns: [:opt_nil_p] end + def test_getspecial_last_match + assert_compiles '"hello"', %q{ + def test(str) + str =~ /hello/ + $& + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_pre + assert_compiles '"hello "', %q{ + def test(str) + str =~ /world/ + $` + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_post + assert_compiles '" world"', %q{ + def test(str) + str =~ /hello/ + $' + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_last_group + assert_compiles '"world"', %q{ + def test(str) + str =~ /(hello) (world)/ + $+ + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_1 + assert_compiles '"hello"', %q{ + def test(str) + str =~ /(hello) (world)/ + $1 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_2 + assert_compiles '"world"', %q{ + def test(str) + str =~ /(hello) (world)/ + $2 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_nonexistent + assert_compiles 'nil', %q{ + def test(str) + str =~ /(hello)/ + $2 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_no_match + assert_compiles 'nil', %q{ + def test(str) + str =~ /xyz/ + $& + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_complex_pattern + assert_compiles '"123"', %q{ + def test(str) + str =~ /(\d+)/ + $1 + end + test("abc123def") + }, insns: [:getspecial] + end + + def test_getspecial_multiple_groups + assert_compiles '"456"', %q{ + def test(str) + str =~ /(\d+)-(\d+)/ + $2 + end + test("123-456") + }, insns: [:getspecial] + end + # tool/ruby_vm/views/*.erb relies on the zjit instructions a) being contiguous and # b) being reliably ordered after all the other instructions. def test_instruction_order diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index ba7555485a3cd8..a096f3fad69069 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -10,7 +10,7 @@ use crate::state::ZJITState; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; -use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; +use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -378,6 +378,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::PutSpecialObject { value_type } => gen_putspecialobject(asm, *value_type), Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state))?, + Insn::GetSpecialSymbol { symbol_type, state: _ } => gen_getspecial_symbol(asm, *symbol_type), + Insn::GetSpecialNumber { nth, state } => gen_getspecial_number(asm, *nth, &function.frame_state(*state)), &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state))?, Insn::ArrayExtend { .. } @@ -640,6 +642,37 @@ fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> O asm_ccall!(asm, rb_vm_get_special_object, ep_reg, Opnd::UImm(u64::from(value_type))) } +fn gen_getspecial_symbol(asm: &mut Assembler, symbol_type: SpecialBackrefSymbol) -> Opnd { + // Fetch a "special" backref based on the symbol type + + let backref = asm_ccall!(asm, rb_backref_get,); + + match symbol_type { + SpecialBackrefSymbol::LastMatch => { + asm_ccall!(asm, rb_reg_last_match, backref) + } + SpecialBackrefSymbol::PreMatch => { + asm_ccall!(asm, rb_reg_match_pre, backref) + } + SpecialBackrefSymbol::PostMatch => { + asm_ccall!(asm, rb_reg_match_post, backref) + } + SpecialBackrefSymbol::LastGroup => { + asm_ccall!(asm, rb_reg_match_last, backref) + } + } +} + +fn gen_getspecial_number(asm: &mut Assembler, nth: u64, state: &FrameState) -> Opnd { + // Fetch the N-th match from the last backref based on type shifted by 1 + + let backref = asm_ccall!(asm, rb_backref_get,); + + gen_prepare_call_with_gc(asm, state); + + asm_ccall!(asm, rb_reg_nth_match, Opnd::Imm((nth >> 1).try_into().unwrap()), backref) +} + /// Compile an interpreter entry block to be inserted into an ISEQ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0)); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index b6e18e73562857..c88965f891ac59 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -321,6 +321,29 @@ impl From for u32 { } } +/// Special regex backref symbol types +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum SpecialBackrefSymbol { + LastMatch, // $& + PreMatch, // $` + PostMatch, // $' + LastGroup, // $+ +} + +impl TryFrom for SpecialBackrefSymbol { + type Error = String; + + fn try_from(value: u8) -> Result { + match value as char { + '&' => Ok(SpecialBackrefSymbol::LastMatch), + '`' => Ok(SpecialBackrefSymbol::PreMatch), + '\'' => Ok(SpecialBackrefSymbol::PostMatch), + '+' => Ok(SpecialBackrefSymbol::LastGroup), + c => Err(format!("invalid backref symbol: '{}'", c)), + } + } +} + /// Print adaptor for [`Const`]. See [`PtrPrintMap`]. struct ConstPrinter<'a> { inner: &'a Const, @@ -415,6 +438,7 @@ pub enum SideExitReason { PatchPoint(Invariant), CalleeSideExit, ObjToStringFallback, + UnknownSpecialVariable(u64), } impl std::fmt::Display for SideExitReason { @@ -494,6 +518,8 @@ pub enum Insn { GetLocal { level: u32, ep_offset: u32 }, /// Set a local variable in a higher scope or the heap SetLocal { level: u32, ep_offset: u32, val: InsnId }, + GetSpecialSymbol { symbol_type: SpecialBackrefSymbol, state: InsnId }, + GetSpecialNumber { nth: u64, state: InsnId }, /// Own a FrameState so that instructions can look up their dominating FrameState when /// generating deopt side-exits and frame reconstruction metadata. Does not directly generate @@ -774,6 +800,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), Insn::GetLocal { level, ep_offset } => write!(f, "GetLocal l{level}, EP@{ep_offset}"), Insn::SetLocal { val, level, ep_offset } => write!(f, "SetLocal l{level}, EP@{ep_offset}, {val}"), + Insn::GetSpecialSymbol { symbol_type, .. } => write!(f, "GetSpecialSymbol {symbol_type:?}"), + Insn::GetSpecialNumber { nth, .. } => write!(f, "GetSpecialNumber {nth}"), Insn::ToArray { val, .. } => write!(f, "ToArray {val}"), Insn::ToNewArray { val, .. } => write!(f, "ToNewArray {val}"), Insn::ArrayExtend { left, right, .. } => write!(f, "ArrayExtend {left}, {right}"), @@ -1221,6 +1249,8 @@ impl Function { &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state }, &SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level }, + &GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state }, + &GetSpecialNumber { nth, state } => GetSpecialNumber { nth, state }, &ToArray { val, state } => ToArray { val: find!(val), state }, &ToNewArray { val, state } => ToNewArray { val: find!(val), state }, &ArrayExtend { left, right, state } => ArrayExtend { left: find!(left), right: find!(right), state }, @@ -1306,6 +1336,8 @@ impl Function { Insn::ArrayMax { .. } => types::BasicObject, Insn::GetGlobal { .. } => types::BasicObject, Insn::GetIvar { .. } => types::BasicObject, + Insn::GetSpecialSymbol { .. } => types::BasicObject, + Insn::GetSpecialNumber { .. } => types::BasicObject, Insn::ToNewArray { .. } => types::ArrayExact, Insn::ToArray { .. } => types::ArrayExact, Insn::ObjToString { .. } => types::BasicObject, @@ -1995,6 +2027,8 @@ impl Function { worklist.push_back(state); } &Insn::GetGlobal { state, .. } | + &Insn::GetSpecialSymbol { state, .. } | + &Insn::GetSpecialNumber { state, .. } | &Insn::SideExit { state, .. } => worklist.push_back(state), } } @@ -3325,6 +3359,29 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let anytostring = fun.push_insn(block, Insn::AnyToString { val, str, state: exit_id }); state.stack_push(anytostring); } + YARVINSN_getspecial => { + let key = get_arg(pc, 0).as_u64(); + let svar = get_arg(pc, 1).as_u64(); + + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + + if svar == 0 { + // TODO: Handle non-backref + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnknownSpecialVariable(key) }); + // End the block + break; + } else if svar & 0x01 != 0 { + // Handle symbol backrefs like $&, $`, $', $+ + let shifted_svar: u8 = (svar >> 1).try_into().unwrap(); + let symbol_type = SpecialBackrefSymbol::try_from(shifted_svar).expect("invalid backref symbol"); + let result = fun.push_insn(block, Insn::GetSpecialSymbol { symbol_type, state: exit_id }); + state.stack_push(result); + } else { + // Handle number backrefs like $1, $2, $3 + let result = fun.push_insn(block, Insn::GetSpecialNumber { nth: svar, state: exit_id }); + state.stack_push(result); + } + } _ => { // Unknown opcode; side-exit into the interpreter let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });