diff --git a/doc/strscan/strscan.md b/doc/strscan/strscan.md index c0bf5413623b07..c4ab2034624947 100644 --- a/doc/strscan/strscan.md +++ b/doc/strscan/strscan.md @@ -37,7 +37,7 @@ Some examples here assume that certain helper methods are defined: - `match_values_cleared?(scanner)`: Returns whether the scanner's [match values][9] are cleared. -See examples at [helper methods](doc/strscan/helper_methods.md). +See examples at [helper methods](helper_methods.md). ## The `StringScanner` \Object diff --git a/enc/Makefile.in b/enc/Makefile.in index 17888f8f9b5e10..2a3c45169fbb22 100644 --- a/enc/Makefile.in +++ b/enc/Makefile.in @@ -31,6 +31,7 @@ BUILTIN_ENCS = enc/ascii.c enc/us_ascii.c\ enc/unicode.c enc/utf_8.c BUILTIN_TRANSES = enc/trans/newline.trans +BUILTIN_TRANS_CSRCS = $(BUILTIN_TRANSES:.trans=.c) RUBY_SO_NAME = @RUBY_SO_NAME@ LIBRUBY = @LIBRUBY@ diff --git a/enc/depend b/enc/depend index a458b63887e858..4bf97dc880f195 100644 --- a/enc/depend +++ b/enc/depend @@ -157,15 +157,15 @@ clean: % end % unless inplace $(Q)$(RM) enc/unicode/*/casefold.h enc/unicode/*/name2ctype.h - $(Q)$(RM) enc/jis/props.h - -$(Q)$(RMDIR) enc/unicode<%=ignore_error%> + $(Q)$(RM) enc/trans/newline.c enc/jis/props.h + -$(Q)$(RMDIR) enc/trnas enc/unicode<%=ignore_error%> % end % workdirs.reverse_each do|d| -$(Q)$(RMDIR) <%=pathrep[d]%><%=ignore_error%> % end clean-srcs: - $(Q)$(RM) <%=pathrep['$(TRANSCSRCS)']%> + $(Q)$(RM) <%=pathrep['$(TRANSCSRCS)']%> <%=pathrep['$(BUILTIN_TRANS_CSRCS)']%> -$(Q)$(RMDIR) <%=pathrep['enc/trans']%><%=ignore_error%> $(Q)$(RM) enc/unicode/*/casefold.h enc/unicode/*/name2ctype.h $(Q)$(RM) enc/jis/props.h diff --git a/gc/default/default.c b/gc/default/default.c index 2d862b0b212489..54b93dc8d07cf6 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -6197,7 +6197,7 @@ rb_gc_impl_writebarrier_remember(void *objspace_ptr, VALUE obj) struct rb_gc_object_metadata_names { // Must be ID only ID ID_wb_protected, ID_age, ID_old, ID_uncollectible, ID_marking, - ID_marked, ID_pinned, ID_object_id, ID_shareable; + ID_marked, ID_pinned, ID_remembered, ID_object_id, ID_shareable; }; #define RB_GC_OBJECT_METADATA_ENTRY_COUNT (sizeof(struct rb_gc_object_metadata_names) / sizeof(ID)) @@ -6219,6 +6219,7 @@ rb_gc_impl_object_metadata(void *objspace_ptr, VALUE obj) I(marking); I(marked); I(pinned); + I(remembered); I(object_id); I(shareable); #undef I @@ -6238,6 +6239,7 @@ rb_gc_impl_object_metadata(void *objspace_ptr, VALUE obj) if (RVALUE_MARKING(objspace, obj)) SET_ENTRY(marking, Qtrue); if (RVALUE_MARKED(objspace, obj)) SET_ENTRY(marked, Qtrue); if (RVALUE_PINNED(objspace, obj)) SET_ENTRY(pinned, Qtrue); + if (RVALUE_REMEMBERED(objspace, obj)) SET_ENTRY(remembered, Qtrue); if (rb_obj_id_p(obj)) SET_ENTRY(object_id, rb_obj_id(obj)); if (FL_TEST(obj, FL_SHAREABLE)) SET_ENTRY(shareable, Qtrue); diff --git a/internal/basic_operators.h b/internal/basic_operators.h index 5dc8d7fe8d69fb..493d2fa7f733d7 100644 --- a/internal/basic_operators.h +++ b/internal/basic_operators.h @@ -24,6 +24,7 @@ enum ruby_basic_operators { BOP_SUCC, BOP_GT, BOP_GE, + BOP_GTGT, BOP_NOT, BOP_NEQ, BOP_MATCH, diff --git a/jit.c b/jit.c index 43c932e5a00ffa..dd28bd6ad02aef 100644 --- a/jit.c +++ b/jit.c @@ -765,3 +765,5 @@ rb_yarv_str_eql_internal(VALUE str1, VALUE str2) // We wrap this since it's static inline return rb_str_eql_internal(str1, str2); } + +void rb_jit_str_concat_codepoint(VALUE str, VALUE codepoint); diff --git a/string.c b/string.c index 2dec3a11e61246..c794b36748e6e6 100644 --- a/string.c +++ b/string.c @@ -12580,9 +12580,9 @@ rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc) return rb_enc_interned_str(ptr, strlen(ptr), enc); } -#if USE_YJIT +#if USE_YJIT || USE_ZJIT void -rb_yjit_str_concat_codepoint(VALUE str, VALUE codepoint) +rb_jit_str_concat_codepoint(VALUE str, VALUE codepoint) { if (RB_LIKELY(ENCODING_GET_INLINED(str) == rb_ascii8bit_encindex())) { ssize_t code = RB_NUM2SSIZE(codepoint); diff --git a/test/ruby/test_class.rb b/test/ruby/test_class.rb index f40817e7a1ef54..10b7655e9a5562 100644 --- a/test/ruby/test_class.rb +++ b/test/ruby/test_class.rb @@ -887,4 +887,34 @@ def test_method_table_assignment_just_after_class_init class C; end end; end + + def test_singleton_cc_invalidation + assert_separately([], "#{<<~"begin;"}\n#{<<~"end;"}") + begin; + class T + def hi + "hi" + end + end + + t = T.new + t.singleton_class + + def hello(t) + t.hi + end + + 5.times do + hello(t) # populate inline cache on `t.singleton_class`. + end + + class T + remove_method :hi # invalidate `t.singleton_class` ccs for `hi` + end + + assert_raise NoMethodError do + hello(t) + end + end; + end end diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index d821d8ad5cca69..6609bb2461c31d 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -3017,7 +3017,35 @@ def test test Ractor.new { test }.value - } + }, call_threshold: 2 + end + + def test_ivar_get_with_already_multi_ractor_mode + assert_compiles '42', %q{ + class Foo + def self.set_bar + @bar = [] # needs to be a ractor unshareable object + end + + def self.bar + @bar + rescue Ractor::IsolationError + 42 + end + end + + Foo.set_bar + r = Ractor.new { + Ractor.receive + Foo.bar + } + + Foo.bar + Foo.bar + + r << :go + r.value + }, call_threshold: 2 end def test_ivar_set_with_multi_ractor_mode diff --git a/vm.c b/vm.c index 1a8a6d059b569b..b15ee0ba236b57 100644 --- a/vm.c +++ b/vm.c @@ -2444,6 +2444,7 @@ vm_init_redefined_flag(void) OP(GT, GT), (C(Integer), C(Float)); OP(GE, GE), (C(Integer), C(Float)); OP(LTLT, LTLT), (C(String), C(Array)); + OP(GTGT, GTGT), (C(Integer)); OP(AREF, AREF), (C(Array), C(Hash), C(Integer)); OP(ASET, ASET), (C(Array), C(Hash)); OP(Length, LENGTH), (C(Array), C(String), C(Hash)); diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 67a461cd16d95c..9c28177a6054b2 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -273,7 +273,7 @@ fn main() { .allowlist_function("rb_yjit_sendish_sp_pops") .allowlist_function("rb_yjit_invokeblock_sp_pops") .allowlist_function("rb_yjit_set_exception_return") - .allowlist_function("rb_yjit_str_concat_codepoint") + .allowlist_function("rb_jit_str_concat_codepoint") .allowlist_type("rstring_offsets") .allowlist_function("rb_assert_holding_vm_lock") .allowlist_function("rb_jit_shape_too_complex_p") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index a426ad07737496..50762c64d3eceb 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -6244,7 +6244,7 @@ fn jit_rb_str_concat_codepoint( guard_object_is_fixnum(jit, asm, codepoint, StackOpnd(0)); - asm.ccall(rb_yjit_str_concat_codepoint as *const u8, vec![recv, codepoint]); + asm.ccall(rb_jit_str_concat_codepoint as *const u8, vec![recv, codepoint]); // The receiver is the return value, so we only need to pop the codepoint argument off the stack. // We can reuse the receiver slot in the stack as the return value. diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index cfaf48c3f0b68e..5562f73be26dab 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -123,7 +123,6 @@ extern "C" { pub fn rb_float_new(d: f64) -> VALUE; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; - pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 66d4e5111d7bbd..6efef9c55c39c4 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -330,22 +330,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_HASH: ruby_basic_operators = 26; -pub const BOP_CALL: ruby_basic_operators = 27; -pub const BOP_AND: ruby_basic_operators = 28; -pub const BOP_OR: ruby_basic_operators = 29; -pub const BOP_CMP: ruby_basic_operators = 30; -pub const BOP_DEFAULT: ruby_basic_operators = 31; -pub const BOP_PACK: ruby_basic_operators = 32; -pub const BOP_INCLUDE_P: ruby_basic_operators = 33; -pub const BOP_LAST_: ruby_basic_operators = 34; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; pub const imemo_env: imemo_type = 0; @@ -1277,4 +1278,5 @@ extern "C" { ); pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index fe082504b82302..0860c073cd17ed 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -278,6 +278,7 @@ fn main() { .allowlist_function("rb_jit_get_page_size") .allowlist_function("rb_jit_array_len") .allowlist_function("rb_jit_iseq_builtin_attrs") + .allowlist_function("rb_jit_str_concat_codepoint") .allowlist_function("rb_zjit_iseq_inspect") .allowlist_function("rb_zjit_iseq_insn_set") .allowlist_function("rb_zjit_local_id") @@ -325,6 +326,7 @@ fn main() { .allowlist_function("rb_attr_get") .allowlist_function("rb_ivar_defined") .allowlist_function("rb_ivar_get") + .allowlist_function("rb_ivar_get_at_no_ractor_check") .allowlist_function("rb_ivar_set") .allowlist_function("rb_mod_name") .allowlist_var("rb_vm_insn_count") diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index d17286f51fda25..a9bd57f368c59b 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -559,7 +559,8 @@ impl Assembler { asm.store(mem_out, SCRATCH0_OPND); } } - Insn::Lea { out, .. } => { + Insn::Lea { opnd, out } => { + *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state); let mem_out = split_memory_write(out, SCRATCH0_OPND); asm.push_insn(insn); if let Some(mem_out) = mem_out { @@ -1804,4 +1805,20 @@ mod tests { "); assert_snapshot!(cb.hexdump(), @"4c8b55f84c8b5df04d8b5b024d0f441a4c895df8"); } + + #[test] + fn test_lea_split_memory_read() { + let (mut asm, mut cb) = setup_asm(); + + let opnd = Opnd::Mem(Mem { base: MemBase::Stack { stack_idx: 0, num_bits: 64 }, disp: 0, num_bits: 64 }); + let _ = asm.lea(opnd); + asm.compile_with_num_regs(&mut cb, 0); + + assert_disasm_snapshot!(cb.disasm(), @" + 0x0: mov r11, qword ptr [rbp - 8] + 0x4: lea r11, [r11] + 0x7: mov qword ptr [rbp - 8], r11 + "); + assert_snapshot!(cb.hexdump(), @"4c8b5df84d8d1b4c895df8"); + } } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index f77b8cc4bf2ca6..bb19d7d8209ff8 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -349,6 +349,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::Const { val: Const::Value(val) } => gen_const_value(val), &Insn::Const { val: Const::CPtr(val) } => gen_const_cptr(val), &Insn::Const { val: Const::CInt64(val) } => gen_const_long(val), + &Insn::Const { val: Const::CUInt16(val) } => gen_const_uint16(val), Insn::Const { .. } => panic!("Unexpected Const in gen_insn: {insn}"), Insn::NewArray { elements, state } => gen_new_array(asm, opnds!(elements), &function.frame_state(*state)), Insn::NewHash { elements, state } => gen_new_hash(jit, asm, opnds!(elements), &function.frame_state(*state)), @@ -365,9 +366,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // If it happens we abort the compilation for now Insn::StringConcat { strings, state, .. } if strings.is_empty() => return Err(*state), Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), - &Insn::StringGetbyteFixnum { string, index } => gen_string_getbyte_fixnum(asm, opnd!(string), opnd!(index)), + &Insn::StringGetbyte { string, index } => gen_string_getbyte(asm, opnd!(string), opnd!(index)), Insn::StringSetbyteFixnum { string, index, value } => gen_string_setbyte_fixnum(asm, opnd!(string), opnd!(index), opnd!(value)), Insn::StringAppend { recv, other, state } => gen_string_append(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), + Insn::StringAppendCodepoint { recv, other, state } => gen_string_append_codepoint(jit, asm, opnd!(recv), opnd!(other), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param => unreachable!("block.insns should not have Insn::Param"), @@ -408,6 +410,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio let shift_amount = function.type_of(right).fixnum_value().unwrap() as u64; gen_fixnum_lshift(jit, asm, opnd!(left), shift_amount, &function.frame_state(state)) } + &Insn::FixnumRShift { left, right } => { + // We only create FixnumRShift when we know the shift amount statically and it's in [0, + // 63]. + let shift_amount = function.type_of(right).fixnum_value().unwrap() as u64; + gen_fixnum_rshift(asm, opnd!(left), shift_amount) + } &Insn::FixnumMod { left, right, state } => gen_fixnum_mod(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), &Insn::IsMethodCfunc { val, cd, cfunc, state: _ } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), @@ -1147,6 +1155,10 @@ fn gen_const_long(val: i64) -> lir::Opnd { Opnd::Imm(val) } +fn gen_const_uint16(val: u16) -> lir::Opnd { + Opnd::UImm(val as u64) +} + /// Compile a basic block argument fn gen_param(asm: &mut Assembler, idx: usize) -> lir::Opnd { // Allocate a register or a stack slot @@ -1753,6 +1765,15 @@ fn gen_fixnum_lshift(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, s out_val } +/// Compile Fixnum >> Fixnum +fn gen_fixnum_rshift(asm: &mut Assembler, left: lir::Opnd, shift_amount: u64) -> lir::Opnd { + // Shift amount is known statically to be in the range [0, 63] + assert!(shift_amount < 64); + let result = asm.rshift(left, shift_amount.into()); + // Re-tag the output value + asm.or(result, 1.into()) +} + fn gen_fixnum_mod(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { // Check for left % 0, which raises ZeroDivisionError asm.cmp(right, Opnd::from(VALUE::fixnum_from_usize(0))); @@ -2480,9 +2501,34 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec result } -fn gen_string_getbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd { - // TODO(max): Open-code rb_str_getbyte to avoid a call - asm_ccall!(asm, rb_str_getbyte, string, index) +// Generate RSTRING_PTR +fn get_string_ptr(asm: &mut Assembler, string: Opnd) -> Opnd { + asm_comment!(asm, "get string pointer for embedded or heap"); + let string = asm.load(string); + let flags = Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RBASIC_FLAGS); + asm.test(flags, (RSTRING_NOEMBED as u64).into()); + let heap_ptr = asm.load(Opnd::mem( + usize::BITS as u8, + string, + RUBY_OFFSET_RSTRING_AS_HEAP_PTR, + )); + // Load the address of the embedded array + // (struct RString *)(obj)->as.ary + let ary = asm.lea(Opnd::mem(VALUE_BITS, string, RUBY_OFFSET_RSTRING_AS_ARY)); + asm.csel_nz(heap_ptr, ary) +} + +fn gen_string_getbyte(asm: &mut Assembler, string: Opnd, index: Opnd) -> Opnd { + let string_ptr = get_string_ptr(asm, string); + // TODO(max): Use SIB indexing here once the backend supports it + let string_ptr = asm.add(string_ptr, index); + let byte = asm.load(Opnd::mem(8, string_ptr, 0)); + // Zero-extend the byte to 64 bits + let byte = byte.with_num_bits(64); + let byte = asm.and(byte, 0xFF.into()); + // Tag the byte + let byte = asm.lshift(byte, Opnd::UImm(1)); + asm.or(byte, Opnd::UImm(1)) } fn gen_string_setbyte_fixnum(asm: &mut Assembler, string: Opnd, index: Opnd, value: Opnd) -> Opnd { @@ -2495,6 +2541,11 @@ fn gen_string_append(jit: &mut JITState, asm: &mut Assembler, string: Opnd, val: asm_ccall!(asm, rb_str_buf_append, string, val) } +fn gen_string_append_codepoint(jit: &mut JITState, asm: &mut Assembler, string: Opnd, val: Opnd, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + asm_ccall!(asm, rb_jit_str_concat_codepoint, string, val) +} + /// Generate a JIT entry that just increments exit_compilation_failure and exits fn gen_compile_error_counter(cb: &mut CodeBlock, compile_error: &CompileError) -> Result { let mut asm = Assembler::new(); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 9070cb38be5f42..9ed3a1abf79ca2 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -130,7 +130,6 @@ unsafe extern "C" { pub fn rb_float_new(d: f64) -> VALUE; pub fn rb_hash_empty_p(hash: VALUE) -> VALUE; - pub fn rb_yjit_str_concat_codepoint(str: VALUE, codepoint: VALUE); pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; @@ -1382,6 +1381,7 @@ pub(crate) mod ids { name: _as_heap name: thread_ptr name: self_ content: b"self" + name: rb_ivar_get_at_no_ractor_check } /// Get an CRuby `ID` to an interned string, e.g. a particular method name. diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index aaecfa2f89769a..7bd392563987a3 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -392,22 +392,23 @@ pub const BOP_NIL_P: ruby_basic_operators = 15; pub const BOP_SUCC: ruby_basic_operators = 16; pub const BOP_GT: ruby_basic_operators = 17; pub const BOP_GE: ruby_basic_operators = 18; -pub const BOP_NOT: ruby_basic_operators = 19; -pub const BOP_NEQ: ruby_basic_operators = 20; -pub const BOP_MATCH: ruby_basic_operators = 21; -pub const BOP_FREEZE: ruby_basic_operators = 22; -pub const BOP_UMINUS: ruby_basic_operators = 23; -pub const BOP_MAX: ruby_basic_operators = 24; -pub const BOP_MIN: ruby_basic_operators = 25; -pub const BOP_HASH: ruby_basic_operators = 26; -pub const BOP_CALL: ruby_basic_operators = 27; -pub const BOP_AND: ruby_basic_operators = 28; -pub const BOP_OR: ruby_basic_operators = 29; -pub const BOP_CMP: ruby_basic_operators = 30; -pub const BOP_DEFAULT: ruby_basic_operators = 31; -pub const BOP_PACK: ruby_basic_operators = 32; -pub const BOP_INCLUDE_P: ruby_basic_operators = 33; -pub const BOP_LAST_: ruby_basic_operators = 34; +pub const BOP_GTGT: ruby_basic_operators = 19; +pub const BOP_NOT: ruby_basic_operators = 20; +pub const BOP_NEQ: ruby_basic_operators = 21; +pub const BOP_MATCH: ruby_basic_operators = 22; +pub const BOP_FREEZE: ruby_basic_operators = 23; +pub const BOP_UMINUS: ruby_basic_operators = 24; +pub const BOP_MAX: ruby_basic_operators = 25; +pub const BOP_MIN: ruby_basic_operators = 26; +pub const BOP_HASH: ruby_basic_operators = 27; +pub const BOP_CALL: ruby_basic_operators = 28; +pub const BOP_AND: ruby_basic_operators = 29; +pub const BOP_OR: ruby_basic_operators = 30; +pub const BOP_CMP: ruby_basic_operators = 31; +pub const BOP_DEFAULT: ruby_basic_operators = 32; +pub const BOP_PACK: ruby_basic_operators = 33; +pub const BOP_INCLUDE_P: ruby_basic_operators = 34; +pub const BOP_LAST_: ruby_basic_operators = 35; pub type ruby_basic_operators = u32; pub type rb_serial_t = ::std::os::raw::c_ulonglong; #[repr(C)] @@ -2033,6 +2034,7 @@ unsafe extern "C" { pub fn rb_obj_shape_id(obj: VALUE) -> shape_id_t; pub fn rb_shape_get_iv_index(shape_id: shape_id_t, id: ID, value: *mut attr_index_t) -> bool; pub fn rb_shape_transition_add_ivar_no_warnings(obj: VALUE, id: ID) -> shape_id_t; + pub fn rb_ivar_get_at_no_ractor_check(obj: VALUE, index: attr_index_t) -> VALUE; pub fn rb_gvar_get(arg1: ID) -> VALUE; pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE; pub fn rb_ensure_iv_list_size(obj: VALUE, current_len: u32, newsize: u32); @@ -2224,4 +2226,5 @@ unsafe extern "C" { end: *mut ::std::os::raw::c_void, ); pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; + pub fn rb_jit_str_concat_codepoint(str_: VALUE, codepoint: VALUE); } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 56d4de280bff71..0f5c992b88f892 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -206,6 +206,10 @@ pub fn init() -> Annotations { annotate!(rb_cString, "empty?", inline_string_empty_p, types::BoolExact, no_gc, leaf, elidable); annotate!(rb_cString, "<<", inline_string_append); annotate!(rb_cString, "==", inline_string_eq); + // Not elidable; has a side effect of setting the encoding if ENC_CODERANGE_UNKNOWN. + // TOOD(max): Turn this into a load/compare. Will need to side-exit or do the full call if + // ENC_CODERANGE_UNKNOWN. + annotate!(rb_cString, "ascii_only?", types::BoolExact, no_gc, leaf); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); annotate!(rb_cModule, "===", inline_module_eqq, types::BoolExact, no_gc, leaf); annotate!(rb_cArray, "length", types::Fixnum, no_gc, leaf, elidable); @@ -242,6 +246,8 @@ pub fn init() -> Annotations { annotate!(rb_cInteger, "<", inline_integer_lt); annotate!(rb_cInteger, "<=", inline_integer_le); annotate!(rb_cInteger, "<<", inline_integer_lshift); + annotate!(rb_cInteger, ">>", inline_integer_rshift); + annotate!(rb_cInteger, "to_s", types::StringExact); annotate!(rb_cString, "to_s", inline_string_to_s, types::StringExact); let thread_singleton = unsafe { rb_singleton_class(rb_cThread) }; annotate!(thread_singleton, "current", inline_thread_current, types::BasicObject, no_gc, leaf); @@ -370,7 +376,21 @@ fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir // String#getbyte with a Fixnum is leaf and nogc; otherwise it may run arbitrary Ruby code // when converting the index to a C integer. let index = fun.coerce_to(block, index, types::Fixnum, state); - let result = fun.push_insn(block, hir::Insn::StringGetbyteFixnum { string: recv, index }); + let unboxed_index = fun.push_insn(block, hir::Insn::UnboxFixnum { val: index }); + let len = fun.push_insn(block, hir::Insn::LoadField { + recv, + id: ID!(len), + offset: RUBY_OFFSET_RSTRING_LEN as i32, + return_type: types::CInt64, + }); + // TODO(max): Find a way to mark these guards as not needed for correctness... as in, once + // the data dependency is gone (say, the StringGetbyte is elided), they can also be elided. + // + // This is unlike most other guards. + let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, state }); + let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) }); + let _ = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: unboxed_index, right: zero, state }); + let result = fun.push_insn(block, hir::Insn::StringGetbyte { string: recv, index: unboxed_index }); return Some(result); } None @@ -424,10 +444,15 @@ fn inline_string_append(fun: &mut hir::Function, block: hir::BlockId, recv: hir: let recv = fun.coerce_to(block, recv, types::StringExact, state); let other = fun.coerce_to(block, other, types::String, state); let _ = fun.push_insn(block, hir::Insn::StringAppend { recv, other, state }); - Some(recv) - } else { - None + return Some(recv); + } + if fun.likely_a(recv, types::StringExact, state) && fun.likely_a(other, types::Fixnum, state) { + let recv = fun.coerce_to(block, recv, types::StringExact, state); + let other = fun.coerce_to(block, other, types::Fixnum, state); + let _ = fun.push_insn(block, hir::Insn::StringAppendCodepoint { recv, other, state }); + return Some(recv); } + None } fn inline_string_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { @@ -569,6 +594,16 @@ fn inline_integer_lshift(fun: &mut hir::Function, block: hir::BlockId, recv: hir try_inline_fixnum_op(fun, block, &|left, right| hir::Insn::FixnumLShift { left, right, state }, BOP_LTLT, recv, other, state) } +fn inline_integer_rshift(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { + let &[other] = args else { return None; }; + // Only convert to FixnumLShift if we know the shift amount is known at compile-time and could + // plausibly create a fixnum. + let Some(other_value) = fun.type_of(other).fixnum_value() else { return None; }; + // TODO(max): If other_value > 63, rewrite to constant zero. + if other_value < 0 || other_value > 63 { return None; } + try_inline_fixnum_op(fun, block, &|left, right| hir::Insn::FixnumRShift { left, right }, BOP_GTGT, recv, other, state) +} + fn inline_basic_object_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], _state: hir::InsnId) -> Option { let &[other] = args else { return None; }; let c_result = fun.push_insn(block, hir::Insn::IsBitEqual { left: recv, right: other }); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e5c1f7a54293da..5ede64d42c2d80 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -649,9 +649,10 @@ pub enum Insn { StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec, state: InsnId }, /// Call rb_str_getbyte with known-Fixnum index - StringGetbyteFixnum { string: InsnId, index: InsnId }, + StringGetbyte { string: InsnId, index: InsnId }, StringSetbyteFixnum { string: InsnId, index: InsnId, value: InsnId }, StringAppend { recv: InsnId, other: InsnId, state: InsnId }, + StringAppendCodepoint { recv: InsnId, other: InsnId, state: InsnId }, /// Combine count stack values into a regexp ToRegexp { opt: usize, values: Vec, state: InsnId }, @@ -890,6 +891,7 @@ pub enum Insn { FixnumOr { left: InsnId, right: InsnId }, FixnumXor { left: InsnId, right: InsnId }, FixnumLShift { left: InsnId, right: InsnId, state: InsnId }, + FixnumRShift { left: InsnId, right: InsnId }, // Distinct from `SendWithoutBlock` with `mid:to_s` because does not have a patch point for String to_s being redefined ObjToString { val: InsnId, cd: *const rb_call_data, state: InsnId }, @@ -988,6 +990,7 @@ impl Insn { Insn::FixnumOr { .. } => false, Insn::FixnumXor { .. } => false, Insn::FixnumLShift { .. } => false, + Insn::FixnumRShift { .. } => false, Insn::GetLocal { .. } => false, Insn::IsNil { .. } => false, Insn::LoadPC => false, @@ -1001,7 +1004,7 @@ impl Insn { // but we don't have type information here in `impl Insn`. See rb_range_new(). Insn::NewRange { .. } => true, Insn::NewRangeFixnum { .. } => false, - Insn::StringGetbyteFixnum { .. } => false, + Insn::StringGetbyte { .. } => false, Insn::IsBlockGiven => false, Insn::BoxFixnum { .. } => false, Insn::BoxBool { .. } => false, @@ -1115,8 +1118,8 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } - Insn::StringGetbyteFixnum { string, index, .. } => { - write!(f, "StringGetbyteFixnum {string}, {index}") + Insn::StringGetbyte { string, index, .. } => { + write!(f, "StringGetbyte {string}, {index}") } Insn::StringSetbyteFixnum { string, index, value, .. } => { write!(f, "StringSetbyteFixnum {string}, {index}, {value}") @@ -1124,6 +1127,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::StringAppend { recv, other, .. } => { write!(f, "StringAppend {recv}, {other}") } + Insn::StringAppendCodepoint { recv, other, .. } => { + write!(f, "StringAppendCodepoint {recv}, {other}") + } Insn::ToRegexp { values, opt, .. } => { write!(f, "ToRegexp")?; let mut prefix = " "; @@ -1229,6 +1235,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::FixnumOr { left, right, .. } => { write!(f, "FixnumOr {left}, {right}") }, Insn::FixnumXor { left, right, .. } => { write!(f, "FixnumXor {left}, {right}") }, Insn::FixnumLShift { left, right, .. } => { write!(f, "FixnumLShift {left}, {right}") }, + Insn::FixnumRShift { left, right, .. } => { write!(f, "FixnumRShift {left}, {right}") }, Insn::GuardType { val, guard_type, .. } => { write!(f, "GuardType {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, @@ -1735,6 +1742,15 @@ impl Function { self.blocks.len() } + pub fn assume_single_ractor_mode(&mut self, block: BlockId, state: InsnId) -> bool { + if unsafe { rb_jit_multi_ractor_p() } { + false + } else { + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); + true + } + } + /// Return a copy of the instruction where the instruction and its operands have been read from /// the union-find table (to find the current most-optimized version of this instruction). See /// [`UnionFind`] for more. @@ -1811,9 +1827,10 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, - &StringGetbyteFixnum { string, index } => StringGetbyteFixnum { string: find!(string), index: find!(index) }, + &StringGetbyte { string, index } => StringGetbyte { string: find!(string), index: find!(index) }, &StringSetbyteFixnum { string, index, value } => StringSetbyteFixnum { string: find!(string), index: find!(index), value: find!(value) }, &StringAppend { recv, other, state } => StringAppend { recv: find!(recv), other: find!(other), state: find!(state) }, + &StringAppendCodepoint { recv, other, state } => StringAppendCodepoint { recv: find!(recv), other: find!(other), state: find!(state) }, &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, @@ -1849,6 +1866,7 @@ impl Function { &FixnumOr { left, right } => FixnumOr { left: find!(left), right: find!(right) }, &FixnumXor { left, right } => FixnumXor { left: find!(left), right: find!(right) }, &FixnumLShift { left, right, state } => FixnumLShift { left: find!(left), right: find!(right), state }, + &FixnumRShift { left, right } => FixnumRShift { left: find!(left), right: find!(right) }, &ObjToString { val, cd, state } => ObjToString { val: find!(val), cd, @@ -2029,9 +2047,10 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, - Insn::StringGetbyteFixnum { .. } => types::Fixnum.union(types::NilClass), + Insn::StringGetbyte { .. } => types::Fixnum, Insn::StringSetbyteFixnum { .. } => types::Fixnum, Insn::StringAppend { .. } => types::StringExact, + Insn::StringAppendCodepoint { .. } => types::StringExact, Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, @@ -2070,6 +2089,7 @@ impl Function { Insn::FixnumOr { .. } => types::Fixnum, Insn::FixnumXor { .. } => types::Fixnum, Insn::FixnumLShift { .. } => types::Fixnum, + Insn::FixnumRShift { .. } => types::Fixnum, Insn::PutSpecialObject { .. } => types::BasicObject, Insn::SendWithoutBlock { .. } => types::BasicObject, Insn::SendWithoutBlockDirect { .. } => types::BasicObject, @@ -2366,6 +2386,17 @@ impl Function { } } + fn is_metaclass(&self, object: VALUE) -> bool { + unsafe { + if RB_TYPE_P(object, RUBY_T_CLASS) && rb_zjit_singleton_class_p(object) { + let attached = rb_class_attached_object(object); + RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) + } else { + false + } + } + } + /// Rewrite SendWithoutBlock opcodes into SendWithoutBlockDirect opcodes if we know the target /// ISEQ statically. This removes run-time method lookups and opens the door for inlining. /// Also try and inline constant caches, specialize object allocations, and more. @@ -2472,9 +2503,9 @@ impl Function { // Patch points: // Check for "defined with an un-shareable Proc in a different Ractor" - if !procv.shareable_p() { + if !procv.shareable_p() && !self.assume_single_ractor_mode(block, state) { // TODO(alan): Turn this into a ractor belonging guard to work better in multi ractor mode. - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); + self.push_insn_id(block, insn_id); continue; } self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if klass.instance_can_have_singleton_class() { @@ -2487,6 +2518,12 @@ impl Function { let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { recv, cd, cme, iseq, args, state }); self.make_equal_to(insn_id, send_direct); } else if def_type == VM_METHOD_TYPE_IVAR && args.is_empty() { + // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. + // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. + if self.is_metaclass(klass) && !self.assume_single_ractor_mode(block, state) { + self.push_insn_id(block, insn_id); continue; + } + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if klass.instance_can_have_singleton_class() { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass }, state }); @@ -2496,31 +2533,21 @@ impl Function { } let id = unsafe { get_cme_def_body_attr_id(cme) }; - // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. - // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. - if unsafe { rb_zjit_singleton_class_p(klass) } { - let attached = unsafe { rb_class_attached_object(klass) }; - if unsafe { RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) } { - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); - } - } let getivar = self.push_insn(block, Insn::GetIvar { self_val: recv, id, ic: std::ptr::null(), state }); self.make_equal_to(insn_id, getivar); } else if let (VM_METHOD_TYPE_ATTRSET, &[val]) = (def_type, args.as_slice()) { + // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. + // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. + if self.is_metaclass(klass) && !self.assume_single_ractor_mode(block, state) { + self.push_insn_id(block, insn_id); continue; + } + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); } let id = unsafe { get_cme_def_body_attr_id(cme) }; - // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. - // We omit gen_prepare_non_leaf_call on gen_setivar, so it's unsafe to raise for multi-ractor mode. - if unsafe { rb_zjit_singleton_class_p(klass) } { - let attached = unsafe { rb_class_attached_object(klass) }; - if unsafe { RB_TYPE_P(attached, RUBY_T_CLASS) || RB_TYPE_P(attached, RUBY_T_MODULE) } { - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); - } - } self.push_insn(block, Insn::SetIvar { self_val: recv, id, ic: std::ptr::null(), val, state }); self.make_equal_to(insn_id, val); } else if def_type == VM_METHOD_TYPE_OPTIMIZED { @@ -2646,12 +2673,9 @@ impl Function { self.push_insn_id(block, insn_id); continue; } let cref_sensitive = !unsafe { (*ice).ic_cref }.is_null(); - let multi_ractor_mode = unsafe { rb_jit_multi_ractor_p() }; - if cref_sensitive || multi_ractor_mode { + if cref_sensitive || !self.assume_single_ractor_mode(block, state) { self.push_insn_id(block, insn_id); continue; } - // Assume single-ractor mode. - self.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state }); // Invalidate output code on any constant writes associated with constants // referenced after the PatchPoint. self.push_insn(block, Insn::PatchPoint { invariant: Invariant::StableConstantNames { idlist }, state }); @@ -2677,8 +2701,8 @@ impl Function { self.insn_types[guard.0] = self.infer_type(guard); self.make_equal_to(insn_id, guard); } else { - self.push_insn(block, Insn::GuardTypeNot { val, guard_type: types::String, state}); - let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv: val, cd, args: vec![], state, reason: ObjToStringNotString }); + let recv = self.push_insn(block, Insn::GuardType { val, guard_type: Type::from_profiled_type(recv_type), state}); + let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv, cd, args: vec![], state, reason: ObjToStringNotString }); self.make_equal_to(insn_id, send_to_s); } } @@ -2818,11 +2842,6 @@ impl Function { self.push_insn_id(block, insn_id); continue; } assert!(recv_type.shape().is_valid()); - if !recv_type.flags().is_t_object() { - // Check if the receiver is a T_OBJECT - self.push_insn(block, Insn::IncrCounter(Counter::getivar_fallback_not_t_object)); - self.push_insn_id(block, insn_id); continue; - } if recv_type.shape().is_too_complex() { // too-complex shapes can't use index access self.push_insn(block, Insn::IncrCounter(Counter::getivar_fallback_too_complex)); @@ -2836,6 +2855,17 @@ impl Function { // entered the compiler. That means we can just return nil for this // shape + iv name self.push_insn(block, Insn::Const { val: Const::Value(Qnil) }) + } else if !recv_type.flags().is_t_object() { + // NOTE: it's fine to use rb_ivar_get_at_no_ractor_check because + // getinstancevariable does assume_single_ractor_mode() + let ivar_index_insn: InsnId = self.push_insn(block, Insn::Const { val: Const::CUInt16(ivar_index as u16) }); + self.push_insn(block, Insn::CCall { + cfunc: rb_ivar_get_at_no_ractor_check as *const u8, + recv: self_val, + args: vec![ivar_index_insn], + name: ID!(rb_ivar_get_at_no_ractor_check), + return_type: types::BasicObject, + elidable: true }) } else if recv_type.flags().is_embedded() { // See ROBJECT_FIELDS() from include/ruby/internal/core/robject.h let offset = ROBJECT_OFFSET_AS_ARY as i32 + (SIZEOF_VALUE * ivar_index.to_usize()) as i32; @@ -3399,6 +3429,11 @@ impl Function { // Don't bother re-inferring the type of val; we already know it. continue; } + Insn::AnyToString { str, .. } if self.is_a(str, types::String) => { + self.make_equal_to(insn_id, str); + // Don't bother re-inferring the type of str; we already know it. + continue; + } Insn::FixnumAdd { left, right, .. } => { self.fold_fixnum_bop(insn_id, left, right, |l, r| match (l, r) { (Some(l), Some(r)) => l.checked_add(r), @@ -3550,7 +3585,7 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } - &Insn::StringGetbyteFixnum { string, index } => { + &Insn::StringGetbyte { string, index } => { worklist.push_back(string); worklist.push_back(index); } @@ -3559,7 +3594,9 @@ impl Function { worklist.push_back(index); worklist.push_back(value); } - &Insn::StringAppend { recv, other, state } => { + &Insn::StringAppend { recv, other, state } + | &Insn::StringAppendCodepoint { recv, other, state } + => { worklist.push_back(recv); worklist.push_back(other); worklist.push_back(state); @@ -3626,6 +3663,7 @@ impl Function { | &Insn::FixnumAnd { left, right } | &Insn::FixnumOr { left, right } | &Insn::FixnumXor { left, right } + | &Insn::FixnumRShift { left, right } | &Insn::IsBitEqual { left, right } | &Insn::IsBitNotEqual { left, right } => { @@ -4258,6 +4296,7 @@ impl Function { | Insn::ObjectAlloc { val, .. } | Insn::DupArrayInclude { target: val, .. } | Insn::GetIvar { self_val: val, .. } + | Insn::CCall { recv: val, .. } | Insn::FixnumBitCheck { val, .. } // TODO (https://github.com/Shopify/ruby/issues/859) this should check Fixnum, but then test_checkkeyword_tests_fixnum_bit fails | Insn::DefinedIvar { self_val: val, .. } => { self.assert_subtype(insn_id, val, types::BasicObject) @@ -4279,7 +4318,6 @@ impl Function { | Insn::Send { recv, ref args, .. } | Insn::SendForward { recv, ref args, .. } | Insn::InvokeSuper { recv, ref args, .. } - | Insn::CCall { recv, ref args, .. } | Insn::CCallWithFrame { recv, ref args, .. } | Insn::CCallVariadic { recv, ref args, .. } | Insn::ArrayInclude { target: recv, elements: ref args, .. } => { @@ -4323,6 +4361,10 @@ impl Function { self.assert_subtype(insn_id, recv, types::StringExact)?; self.assert_subtype(insn_id, other, types::String) } + Insn::StringAppendCodepoint { recv, other, .. } => { + self.assert_subtype(insn_id, recv, types::StringExact)?; + self.assert_subtype(insn_id, other, types::Fixnum) + } // Instructions with Array operands Insn::ArrayDup { val, .. } => self.assert_subtype(insn_id, val, types::ArrayExact), Insn::ArrayExtend { left, right, .. } => { @@ -4386,12 +4428,26 @@ impl Function { | Insn::FixnumAnd { left, right } | Insn::FixnumOr { left, right } | Insn::FixnumXor { left, right } - | Insn::FixnumLShift { left, right, .. } | Insn::NewRangeFixnum { low: left, high: right, .. } => { self.assert_subtype(insn_id, left, types::Fixnum)?; self.assert_subtype(insn_id, right, types::Fixnum) } + Insn::FixnumLShift { left, right, .. } + | Insn::FixnumRShift { left, right, .. } => { + self.assert_subtype(insn_id, left, types::Fixnum)?; + self.assert_subtype(insn_id, right, types::Fixnum)?; + let Some(obj) = self.type_of(right).fixnum_value() else { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), "".into())); + }; + if obj < 0 { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), format!("{obj}"))); + } + if obj > 63 { + return Err(ValidationError::MismatchedOperandType(insn_id, right, "".into(), format!("{obj}"))); + } + Ok(()) + } Insn::GuardBitEquals { val, expected, .. } => { match expected { Const::Value(_) => self.assert_subtype(insn_id, val, types::RubyValue), @@ -4413,9 +4469,9 @@ impl Function { self.assert_subtype(insn_id, left, types::CInt64)?; self.assert_subtype(insn_id, right, types::CInt64) }, - Insn::StringGetbyteFixnum { string, index } => { + Insn::StringGetbyte { string, index } => { self.assert_subtype(insn_id, string, types::String)?; - self.assert_subtype(insn_id, index, types::Fixnum) + self.assert_subtype(insn_id, index, types::CInt64) }, Insn::StringSetbyteFixnum { string, index, value } => { self.assert_subtype(insn_id, string, types::String)?; @@ -5656,7 +5712,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { // ic is in arg 1 // Assume single-Ractor mode to omit gen_prepare_non_leaf_call on gen_getivar // TODO: We only really need this if self_val is a class/module - fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state: exit_id }); + if !fun.assume_single_ractor_mode(block, exit_id) { + // gen_getivar assumes single Ractor; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledYARVInsn(opcode) }); + break; // End the block + } let result = fun.push_insn(block, Insn::GetIvar { self_val: self_param, id, ic, state: exit_id }); state.stack_push(result); } @@ -5665,7 +5725,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let ic = get_arg(pc, 1).as_ptr(); // Assume single-Ractor mode to omit gen_prepare_non_leaf_call on gen_setivar // TODO: We only really need this if self_val is a class/module - fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::SingleRactorMode, state: exit_id }); + if !fun.assume_single_ractor_mode(block, exit_id) { + // gen_setivar assumes single Ractor; side-exit into the interpreter + fun.push_insn(block, Insn::SideExit { state: exit_id, reason: SideExitReason::UnhandledYARVInsn(opcode) }); + break; // End the block + } let val = state.stack_pop()?; fun.push_insn(block, Insn::SetIvar { self_val: self_param, id, ic, val, state: exit_id }); } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index e8c8d50dbe9256..9809c8bffbee01 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -4137,12 +4137,11 @@ mod hir_opt_tests { Jump bb2(v5, v6) bb2(v8:BasicObject, v9:BasicObject): v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) - v26:BasicObject = GuardTypeNot v9, String + v26:ArrayExact = GuardType v9, ArrayExact PatchPoint MethodRedefined(Array@0x1008, to_s@0x1010, cme:0x1018) PatchPoint NoSingletonClass(Array@0x1008) - v31:ArrayExact = GuardType v9, ArrayExact - v32:BasicObject = CCallWithFrame v31, :Array#to_s@0x1040 - v19:String = AnyToString v9, str: v32 + v31:BasicObject = CCallWithFrame v26, :Array#to_s@0x1040 + v19:String = AnyToString v9, str: v31 v21:StringExact = StringConcat v13, v19 CheckInterrupts Return v21 @@ -5232,6 +5231,148 @@ mod hir_opt_tests { "); } + #[test] + fn test_optimize_getivar_on_module() { + eval(" + module M + @foo = 42 + def self.test = @foo + end + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_class() { + eval(" + class C + @foo = 42 + def self.test = @foo + end + C.test + "); + assert_snapshot!(hir_string_proc("C.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_t_data() { + eval(" + class C < Range + def test = @a + end + obj = C.new 0, 1 + obj.instance_variable_set(:@a, 1) + obj.test + TEST = C.instance_method(:test) + "); + assert_snapshot!(hir_string_proc("TEST"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint SingleRactorMode + v16:HeapBasicObject = GuardType v6, HeapBasicObject + v17:HeapBasicObject = GuardShape v16, 0x1000 + v18:CUInt16[0] = Const CUInt16(0) + v19:BasicObject = CCall v17, :rb_ivar_get_at_no_ractor_check@0x1008, v18 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_optimize_getivar_on_module_multi_ractor() { + eval(" + module M + @foo = 42 + def self.test = @foo + end + Ractor.new {}.value + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + SideExit UnhandledYARVInsn(getinstancevariable) + "); + } + + #[test] + fn test_optimize_attr_reader_on_module_multi_ractor() { + eval(" + module M + @foo = 42 + class << self + attr_reader :foo + end + def self.test = foo + end + Ractor.new {}.value + M.test + "); + assert_snapshot!(hir_string_proc("M.method(:test)"), @r" + fn test@:7: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = SendWithoutBlock v6, :foo + CheckInterrupts + Return v11 + "); + } + #[test] fn test_dont_optimize_getivar_polymorphic() { set_call_threshold(3); @@ -5900,6 +6041,83 @@ mod hir_opt_tests { "); } + #[test] + fn test_fixnum_to_s_returns_string() { + eval(r#" + def test(x) = x.to_s + test 5 + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) + v21:Fixnum = GuardType v9, Fixnum + v22:StringExact = CCallVariadic v21, :Integer#to_s@0x1038 + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_bignum_to_s_returns_string() { + eval(r#" + def test(x) = x.to_s + test (2**65) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, to_s@0x1008, cme:0x1010) + v21:Integer = GuardType v9, Integer + v22:StringExact = CCallVariadic v21, :Integer#to_s@0x1038 + CheckInterrupts + Return v22 + "); + } + + #[test] + fn test_fold_any_to_string_with_known_string_exact() { + eval(r##" + def test(x) = "#{x}" + test 123 + "##); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v26:Fixnum = GuardType v9, Fixnum + PatchPoint MethodRedefined(Integer@0x1008, to_s@0x1010, cme:0x1018) + v30:StringExact = CCallVariadic v26, :Integer#to_s@0x1040 + v21:StringExact = StringConcat v13, v30 + CheckInterrupts + Return v21 + "); + } + #[test] fn test_array_aref_fixnum_literal() { eval(" @@ -6375,10 +6593,15 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1000) v26:StringExact = GuardType v11, StringExact v27:Fixnum = GuardType v12, Fixnum - v28:NilClass|Fixnum = StringGetbyteFixnum v26, v27 + v28:CInt64 = UnboxFixnum v27 + v29:CInt64 = LoadField v26, :len@0x1038 + v30:CInt64 = GuardLess v28, v29 + v31:CInt64[0] = Const CInt64(0) + v32:CInt64 = GuardGreaterEq v30, v31 + v33:Fixnum = StringGetbyte v26, v30 IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v28 + Return v33 "); } @@ -6407,6 +6630,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1000) v30:StringExact = GuardType v11, StringExact v31:Fixnum = GuardType v12, Fixnum + v32:CInt64 = UnboxFixnum v31 + v33:CInt64 = LoadField v30, :len@0x1038 + v34:CInt64 = GuardLess v32, v33 + v35:CInt64[0] = Const CInt64(0) + v36:CInt64 = GuardGreaterEq v34, v35 IncrCounter inline_cfunc_optimized_send_count v22:Fixnum[5] = Const Value(5) CheckInterrupts @@ -6749,6 +6977,111 @@ mod hir_opt_tests { "); } + #[test] + fn test_inline_integer_gtgt_with_known_fixnum() { + eval(" + def test(x) = x >> 5 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[5] = Const Value(5) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:Fixnum = FixnumRShift v23, v14 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_negative() { + eval(" + def test(x) = x >> -5 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[-5] = Const Value(-5) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:BasicObject = CCallWithFrame v23, :Integer#>>@0x1038, v14 + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_out_of_range() { + eval(" + def test(x) = x >> 64 + test(4) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[64] = Const Value(64) + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v23:Fixnum = GuardType v9, Fixnum + v24:BasicObject = CCallWithFrame v23, :Integer#>>@0x1038, v14 + CheckInterrupts + Return v24 + "); + } + + #[test] + fn test_dont_inline_integer_gtgt_with_unknown_fixnum() { + eval(" + def test(x, y) = x >> y + test(4, 5) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(Integer@0x1000, >>@0x1008, cme:0x1010) + v25:Fixnum = GuardType v11, Fixnum + v26:BasicObject = CCallWithFrame v25, :Integer#>>@0x1038, v12 + CheckInterrupts + Return v26 + "); + } + #[test] fn test_optimize_string_append() { eval(r#" @@ -6778,9 +7111,8 @@ mod hir_opt_tests { "); } - // TODO: This should be inlined just as in the interpreter #[test] - fn test_optimize_string_append_non_string() { + fn test_optimize_string_append_codepoint() { eval(r#" def test(x, y) = x << y test("iron", 4) @@ -6800,9 +7132,11 @@ mod hir_opt_tests { PatchPoint MethodRedefined(String@0x1000, <<@0x1008, cme:0x1010) PatchPoint NoSingletonClass(String@0x1000) v27:StringExact = GuardType v11, StringExact - v28:BasicObject = CCallWithFrame v27, :String#<<@0x1038, v12 + v28:Fixnum = GuardType v12, Fixnum + v29:StringExact = StringAppendCodepoint v27, v28 + IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v28 + Return v27 "); } @@ -6866,6 +7200,32 @@ mod hir_opt_tests { "); } + #[test] + fn test_dont_optimize_string_append_non_string() { + eval(r#" + def test = "iron" << :a + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v11:StringExact = StringCopy v10 + v13:StaticSymbol[:a] = Const Value(VALUE(0x1008)) + PatchPoint MethodRedefined(String@0x1010, <<@0x1018, cme:0x1020) + PatchPoint NoSingletonClass(String@0x1010) + v24:BasicObject = CCallWithFrame v11, :String#<<@0x1048, v13 + CheckInterrupts + Return v24 + "); + } + #[test] fn test_dont_optimize_when_passing_too_many_args() { eval(r#" @@ -6892,6 +7252,33 @@ mod hir_opt_tests { "); } + #[test] + fn test_optimize_string_ascii_only_p() { + eval(r#" + def test(x) = x.ascii_only? + test("iron") + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ascii_only?@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v22:StringExact = GuardType v9, StringExact + IncrCounter inline_cfunc_optimized_send_count + v24:BoolExact = CCall v22, :String#ascii_only?@0x1038 + CheckInterrupts + Return v24 + "); + } + #[test] fn test_dont_optimize_when_passing_too_few_args() { eval(r#" diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 314eb7777cbf6c..79ba1d30c53443 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -2121,7 +2121,42 @@ pub mod hir_build_tests { "); } - // TODO(max): Add a test for VM_OPT_NEWARRAY_SEND_PACK_BUFFER + #[test] + fn test_opt_newarray_send_pack_buffer() { + eval(r#" + def test(a,b) + sum = a+b + buf = "" + [a,b].pack 'C', buffer: buf + buf + end + "#); + assert_contains_opcode("test", YARVINSN_opt_newarray_send); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@7 + v3:BasicObject = GetLocal l0, SP@6 + v4:NilClass = Const Value(nil) + v5:NilClass = Const Value(nil) + Jump bb2(v1, v2, v3, v4, v5) + bb1(v8:BasicObject, v9:BasicObject, v10:BasicObject): + EntryPoint JIT(0) + v11:NilClass = Const Value(nil) + v12:NilClass = Const Value(nil) + Jump bb2(v8, v9, v10, v11, v12) + bb2(v14:BasicObject, v15:BasicObject, v16:BasicObject, v17:NilClass, v18:NilClass): + v25:BasicObject = SendWithoutBlock v15, :+, v16 + v29:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v30:StringExact = StringCopy v29 + v36:StringExact[VALUE(0x1008)] = Const Value(VALUE(0x1008)) + v37:StringExact = StringCopy v36 + v39:BasicObject = GetLocal l0, EP@3 + SideExit UnhandledNewarraySend(PACK_BUFFER) + "); + } #[test] fn test_opt_newarray_send_include_p() {