diff --git a/encoding.c b/encoding.c index 78964476c001d3..0a5d61ee4a1692 100644 --- a/encoding.c +++ b/encoding.c @@ -1394,10 +1394,8 @@ enc_names(VALUE self) static VALUE enc_list(VALUE klass) { - VALUE ary = rb_ary_new2(ENCODING_LIST_CAPA); VALUE list = RUBY_ATOMIC_VALUE_LOAD(rb_encoding_list); - rb_ary_replace(ary, list); - return ary; + return rb_ary_dup(list); } /* diff --git a/signal.c b/signal.c index 9edac5a7896fb7..f6b62b30147d87 100644 --- a/signal.c +++ b/signal.c @@ -877,7 +877,9 @@ static void check_stack_overflow(int sig, const void *addr) { int ruby_stack_overflowed_p(const rb_thread_t *, const void *); - rb_thread_t *th = GET_THREAD(); + rb_execution_context_t *ec = rb_current_execution_context(false); + if (!ec) return; + rb_thread_t *th = rb_ec_thread_ptr(ec); if (ruby_stack_overflowed_p(th, addr)) { reset_sigmask(sig); rb_ec_stack_overflow(th->ec, 1); diff --git a/test/-ext-/gvl/test_last_thread.rb b/test/-ext-/gvl/test_last_thread.rb index f63d98aab16f02..bcda0e33856c51 100644 --- a/test/-ext-/gvl/test_last_thread.rb +++ b/test/-ext-/gvl/test_last_thread.rb @@ -15,7 +15,7 @@ def test_last_thread t1 = Time.now t = t1 - t0 - assert_in_delta(1.0, t, 0.18) + assert_in_delta(1.0, t, 0.8) end; end end diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb index bb64483a795203..b20b597256a125 100644 --- a/test/ruby/test_file_exhaustive.rb +++ b/test/ruby/test_file_exhaustive.rb @@ -1477,6 +1477,7 @@ def test_flock_shared end def test_test + omit 'timestamp check is unstable on macOS' if RUBY_PLATFORM =~ /darwin/ fn1 = regular_file hardlinkfile sleep(1.1) diff --git a/test/ruby/test_numeric.rb b/test/ruby/test_numeric.rb index ab492743f6ec34..cf408fac0af411 100644 --- a/test/ruby/test_numeric.rb +++ b/test/ruby/test_numeric.rb @@ -22,10 +22,12 @@ def test_coerce assert_raise_with_message(TypeError, /:\u{3042}/) {1&:"\u{3042}"} assert_raise_with_message(TypeError, /:\u{3042}/) {1|:"\u{3042}"} assert_raise_with_message(TypeError, /:\u{3042}/) {1^:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1+:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1&:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1|:"\u{3042}"} - assert_raise_with_message(TypeError, /:"\\u3042"/) {1^:"\u{3042}"} + EnvUtil.with_default_internal(Encoding::US_ASCII) do + assert_raise_with_message(TypeError, /:"\\u3042"/) {1+:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1&:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1|:"\u{3042}"} + assert_raise_with_message(TypeError, /:"\\u3042"/) {1^:"\u{3042}"} + end assert_raise_with_message(TypeError, /:\u{3044}/) {1+"\u{3044}".to_sym} assert_raise_with_message(TypeError, /:\u{3044}/) {1&"\u{3044}".to_sym} assert_raise_with_message(TypeError, /:\u{3044}/) {1|"\u{3044}".to_sym} diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 7885acc87e6112..9f1e03e6499782 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1036,10 +1036,12 @@ def test_match_without_regexp [Encoding::UTF_8, Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc| idx = key.encode(enc) pat = /#{idx}/ - test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } - test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + EnvUtil.with_default_internal(enc) do + test.call {|m| assert_raise_with_message(IndexError, pat, bug10877) {m[idx]} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.offset(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.begin(idx)} } + test.call {|m| assert_raise_with_message(IndexError, pat, bug18160) {m.end(idx)} } + end end test.call {|m| assert_equal(/a/, m.regexp) } test.call {|m| assert_equal("abc", m.string) } @@ -1988,7 +1990,7 @@ def test_bug_20886 end def per_instance_redos_test(global_timeout, per_instance_timeout, expected_timeout) - assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}") + assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}", timeout: 60) global_timeout = #{ EnvUtil.apply_timeout_scale(global_timeout).inspect } per_instance_timeout = #{ (per_instance_timeout ? EnvUtil.apply_timeout_scale(per_instance_timeout) : nil).inspect } expected_timeout = #{ EnvUtil.apply_timeout_scale(expected_timeout).inspect } diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index e3880a2828cf2e..d30af737c3bde8 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1221,6 +1221,106 @@ def test = 1.nil? }, insns: [:opt_nil_p] end + def test_getspecial_last_match + assert_compiles '"hello"', %q{ + def test(str) + str =~ /hello/ + $& + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_pre + assert_compiles '"hello "', %q{ + def test(str) + str =~ /world/ + $` + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_post + assert_compiles '" world"', %q{ + def test(str) + str =~ /hello/ + $' + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_match_last_group + assert_compiles '"world"', %q{ + def test(str) + str =~ /(hello) (world)/ + $+ + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_1 + assert_compiles '"hello"', %q{ + def test(str) + str =~ /(hello) (world)/ + $1 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_2 + assert_compiles '"world"', %q{ + def test(str) + str =~ /(hello) (world)/ + $2 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_numbered_match_nonexistent + assert_compiles 'nil', %q{ + def test(str) + str =~ /(hello)/ + $2 + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_no_match + assert_compiles 'nil', %q{ + def test(str) + str =~ /xyz/ + $& + end + test("hello world") + }, insns: [:getspecial] + end + + def test_getspecial_complex_pattern + assert_compiles '"123"', %q{ + def test(str) + str =~ /(\d+)/ + $1 + end + test("abc123def") + }, insns: [:getspecial] + end + + def test_getspecial_multiple_groups + assert_compiles '"456"', %q{ + def test(str) + str =~ /(\d+)-(\d+)/ + $2 + end + test("123-456") + }, insns: [:getspecial] + end + # tool/ruby_vm/views/*.erb relies on the zjit instructions a) being contiguous and # b) being reliably ordered after all the other instructions. def test_instruction_order diff --git a/test/socket/test_unix.rb b/test/socket/test_unix.rb index 3e7d85befcc1ad..0d284b29264618 100644 --- a/test/socket/test_unix.rb +++ b/test/socket/test_unix.rb @@ -146,6 +146,7 @@ def test_fd_passing_n2 end def test_fd_passing_race_condition + omit 'randomly crashes on macOS' if RUBY_PLATFORM =~ /darwin/ r1, w = IO.pipe s1, s2 = UNIXSocket.pair s1.nonblock = s2.nonblock = true diff --git a/tool/lib/core_assertions.rb b/tool/lib/core_assertions.rb index 00d180fa8ce088..47cc6574c878d1 100644 --- a/tool/lib/core_assertions.rb +++ b/tool/lib/core_assertions.rb @@ -501,13 +501,10 @@ def assert_raise_with_message(exception, expected, msg = nil, &block) assert = :assert_match end - ex = m = nil - EnvUtil.with_default_internal(of: expected) do - ex = assert_raise(exception, msg || proc {"Exception(#{exception}) with message matches to #{expected.inspect}"}) do - yield - end - m = ex.message + ex = assert_raise(exception, msg || proc {"Exception(#{exception}) with message matches to #{expected.inspect}"}) do + yield end + m = ex.message msg = message(msg, "") {"Expected Exception(#{exception}) was raised, but the message doesn't match"} if assert == :assert_equal diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 43fde7db7fd629..a096f3fad69069 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -10,7 +10,7 @@ use crate::state::ZJITState; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; -use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; +use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -378,6 +378,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::PutSpecialObject { value_type } => gen_putspecialobject(asm, *value_type), Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state))?, + Insn::GetSpecialSymbol { symbol_type, state: _ } => gen_getspecial_symbol(asm, *symbol_type), + Insn::GetSpecialNumber { nth, state } => gen_getspecial_number(asm, *nth, &function.frame_state(*state)), &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state))?, Insn::ArrayExtend { .. } @@ -640,6 +642,37 @@ fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> O asm_ccall!(asm, rb_vm_get_special_object, ep_reg, Opnd::UImm(u64::from(value_type))) } +fn gen_getspecial_symbol(asm: &mut Assembler, symbol_type: SpecialBackrefSymbol) -> Opnd { + // Fetch a "special" backref based on the symbol type + + let backref = asm_ccall!(asm, rb_backref_get,); + + match symbol_type { + SpecialBackrefSymbol::LastMatch => { + asm_ccall!(asm, rb_reg_last_match, backref) + } + SpecialBackrefSymbol::PreMatch => { + asm_ccall!(asm, rb_reg_match_pre, backref) + } + SpecialBackrefSymbol::PostMatch => { + asm_ccall!(asm, rb_reg_match_post, backref) + } + SpecialBackrefSymbol::LastGroup => { + asm_ccall!(asm, rb_reg_match_last, backref) + } + } +} + +fn gen_getspecial_number(asm: &mut Assembler, nth: u64, state: &FrameState) -> Opnd { + // Fetch the N-th match from the last backref based on type shifted by 1 + + let backref = asm_ccall!(asm, rb_backref_get,); + + gen_prepare_call_with_gc(asm, state); + + asm_ccall!(asm, rb_reg_nth_match, Opnd::Imm((nth >> 1).try_into().unwrap()), backref) +} + /// Compile an interpreter entry block to be inserted into an ISEQ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) { asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0)); @@ -1313,6 +1346,7 @@ fn compile_iseq(iseq: IseqPtr) -> Option { if !get_option!(disable_hir_opt) { function.optimize(); } + function.dump_hir(); #[cfg(debug_assertions)] if let Err(err) = function.validate() { debug!("ZJIT: compile_iseq: {err:?}"); @@ -1396,13 +1430,12 @@ c_callable! { /// instructions, so this should be used primarily for cb.has_dropped_bytes() situations. fn function_stub_hit(iseq: IseqPtr, branch_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 { with_vm_lock(src_loc!(), || { - /// gen_push_frame() doesn't set PC and SP, so we need to set them before exit - fn set_pc_and_sp(iseq: IseqPtr, ec: EcPtr, sp: *mut VALUE) { - let cfp = unsafe { get_ec_cfp(ec) }; - let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported - unsafe { rb_set_cfp_pc(cfp, pc) }; - unsafe { rb_set_cfp_sp(cfp, sp) }; - } + // gen_push_frame() doesn't set PC and SP, so we need to set them before exit. + // function_stub_hit_body() may allocate and call gc_validate_pc(), so we always set PC. + let cfp = unsafe { get_ec_cfp(ec) }; + let pc = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; // TODO: handle opt_pc once supported + unsafe { rb_set_cfp_pc(cfp, pc) }; + unsafe { rb_set_cfp_sp(cfp, sp) }; // If we already know we can't compile the ISEQ, fail early without cb.mark_all_executable(). // TODO: Alan thinks the payload status part of this check can happen without the VM lock, since the whole @@ -1411,7 +1444,6 @@ c_callable! { let payload = get_or_create_iseq_payload(iseq); if cb.has_dropped_bytes() || payload.status == IseqStatus::CantCompile { // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); return ZJITState::get_exit_code().raw_ptr(cb); } @@ -1421,7 +1453,6 @@ c_callable! { code_ptr } else { // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); ZJITState::get_exit_code() }; cb.mark_all_executable(); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index c93a6858f18fab..c88965f891ac59 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -321,6 +321,29 @@ impl From for u32 { } } +/// Special regex backref symbol types +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum SpecialBackrefSymbol { + LastMatch, // $& + PreMatch, // $` + PostMatch, // $' + LastGroup, // $+ +} + +impl TryFrom for SpecialBackrefSymbol { + type Error = String; + + fn try_from(value: u8) -> Result { + match value as char { + '&' => Ok(SpecialBackrefSymbol::LastMatch), + '`' => Ok(SpecialBackrefSymbol::PreMatch), + '\'' => Ok(SpecialBackrefSymbol::PostMatch), + '+' => Ok(SpecialBackrefSymbol::LastGroup), + c => Err(format!("invalid backref symbol: '{}'", c)), + } + } +} + /// Print adaptor for [`Const`]. See [`PtrPrintMap`]. struct ConstPrinter<'a> { inner: &'a Const, @@ -415,6 +438,7 @@ pub enum SideExitReason { PatchPoint(Invariant), CalleeSideExit, ObjToStringFallback, + UnknownSpecialVariable(u64), } impl std::fmt::Display for SideExitReason { @@ -494,6 +518,8 @@ pub enum Insn { GetLocal { level: u32, ep_offset: u32 }, /// Set a local variable in a higher scope or the heap SetLocal { level: u32, ep_offset: u32, val: InsnId }, + GetSpecialSymbol { symbol_type: SpecialBackrefSymbol, state: InsnId }, + GetSpecialNumber { nth: u64, state: InsnId }, /// Own a FrameState so that instructions can look up their dominating FrameState when /// generating deopt side-exits and frame reconstruction metadata. Does not directly generate @@ -774,6 +800,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), Insn::GetLocal { level, ep_offset } => write!(f, "GetLocal l{level}, EP@{ep_offset}"), Insn::SetLocal { val, level, ep_offset } => write!(f, "SetLocal l{level}, EP@{ep_offset}, {val}"), + Insn::GetSpecialSymbol { symbol_type, .. } => write!(f, "GetSpecialSymbol {symbol_type:?}"), + Insn::GetSpecialNumber { nth, .. } => write!(f, "GetSpecialNumber {nth}"), Insn::ToArray { val, .. } => write!(f, "ToArray {val}"), Insn::ToNewArray { val, .. } => write!(f, "ToNewArray {val}"), Insn::ArrayExtend { left, right, .. } => write!(f, "ArrayExtend {left}, {right}"), @@ -1221,6 +1249,8 @@ impl Function { &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &SetIvar { self_val, id, val, state } => SetIvar { self_val: find!(self_val), id, val: find!(val), state }, &SetLocal { val, ep_offset, level } => SetLocal { val: find!(val), ep_offset, level }, + &GetSpecialSymbol { symbol_type, state } => GetSpecialSymbol { symbol_type, state }, + &GetSpecialNumber { nth, state } => GetSpecialNumber { nth, state }, &ToArray { val, state } => ToArray { val: find!(val), state }, &ToNewArray { val, state } => ToNewArray { val: find!(val), state }, &ArrayExtend { left, right, state } => ArrayExtend { left: find!(left), right: find!(right), state }, @@ -1306,6 +1336,8 @@ impl Function { Insn::ArrayMax { .. } => types::BasicObject, Insn::GetGlobal { .. } => types::BasicObject, Insn::GetIvar { .. } => types::BasicObject, + Insn::GetSpecialSymbol { .. } => types::BasicObject, + Insn::GetSpecialNumber { .. } => types::BasicObject, Insn::ToNewArray { .. } => types::ArrayExact, Insn::ToArray { .. } => types::ArrayExact, Insn::ObjToString { .. } => types::BasicObject, @@ -1995,6 +2027,8 @@ impl Function { worklist.push_back(state); } &Insn::GetGlobal { state, .. } | + &Insn::GetSpecialSymbol { state, .. } | + &Insn::GetSpecialNumber { state, .. } | &Insn::SideExit { state, .. } => worklist.push_back(state), } } @@ -2143,7 +2177,10 @@ impl Function { #[cfg(debug_assertions)] self.assert_validates(); self.eliminate_dead_code(); #[cfg(debug_assertions)] self.assert_validates(); + } + /// Dump HIR passed to codegen if specified by options. + pub fn dump_hir(&self) { // Dump HIR after optimization match get_option!(dump_hir_opt) { Some(DumpHIR::WithoutSnapshot) => println!("Optimized HIR:\n{}", FunctionPrinter::without_snapshot(&self)), @@ -2157,7 +2194,6 @@ impl Function { } } - /// Validates the following: /// 1. Basic block jump args match parameter arity. /// 2. Every terminator must be in the last position. @@ -3323,6 +3359,29 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let anytostring = fun.push_insn(block, Insn::AnyToString { val, str, state: exit_id }); state.stack_push(anytostring); } + YARVINSN_getspecial => { + let key = get_arg(pc, 0).as_u64(); + let svar = get_arg(pc, 1).as_u64(); + + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + + if svar == 0 { + // TODO: Handle non-backref + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnknownSpecialVariable(key) }); + // End the block + break; + } else if svar & 0x01 != 0 { + // Handle symbol backrefs like $&, $`, $', $+ + let shifted_svar: u8 = (svar >> 1).try_into().unwrap(); + let symbol_type = SpecialBackrefSymbol::try_from(shifted_svar).expect("invalid backref symbol"); + let result = fun.push_insn(block, Insn::GetSpecialSymbol { symbol_type, state: exit_id }); + state.stack_push(result); + } else { + // Handle number backrefs like $1, $2, $3 + let result = fun.push_insn(block, Insn::GetSpecialNumber { nth: svar, state: exit_id }); + state.stack_push(result); + } + } _ => { // Unknown opcode; side-exit into the interpreter let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });