diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb index bd44f3ac3ece0b..fc592520e80b06 100644 --- a/bootstraptest/test_yjit.rb +++ b/bootstraptest/test_yjit.rb @@ -5416,3 +5416,22 @@ def foo 10.times.map { foo.dup }.last } + +# regression test for [Bug #21772] +# local variable type tracking desync +assert_normal_exit %q{ + def some_method = 0 + + def test_body(key) + some_method + key = key.to_s # setting of local relevant + + key == "symbol" + end + + def jit_caller = test_body("session_id") + + jit_caller # first iteration, non-escaped environment + alias some_method binding # induce environment escape + test_body(:symbol) +} diff --git a/encoding.c b/encoding.c index d14d371b04c319..749cbd586d00be 100644 --- a/encoding.c +++ b/encoding.c @@ -56,7 +56,7 @@ int rb_encdb_alias(const char *alias, const char *orig); #pragma GCC visibility pop #endif -static ID id_encoding; +static ID id_encoding, id_i_name; VALUE rb_cEncoding; #define ENCODING_LIST_CAPA 256 @@ -97,6 +97,8 @@ static rb_encoding *global_enc_ascii, *global_enc_utf_8, *global_enc_us_ascii; +static int filesystem_encindex = ENCINDEX_ASCII_8BIT; + #define GLOBAL_ENC_TABLE_LOCKING(tbl) \ for (struct enc_table *tbl = &global_enc_table, **locking = &tbl; \ locking; \ @@ -136,6 +138,7 @@ static VALUE enc_new(rb_encoding *encoding) { VALUE enc = TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding); + rb_ivar_set(enc, id_i_name, rb_fstring_cstr(encoding->name)); RB_OBJ_SET_FROZEN_SHAREABLE(enc); return enc; } @@ -1356,20 +1359,6 @@ enc_inspect(VALUE self) rb_enc_autoload_p(enc) ? " (autoload)" : ""); } -/* - * call-seq: - * enc.name -> string - * enc.to_s -> string - * - * Returns the name of the encoding. - * - * Encoding::UTF_8.name #=> "UTF-8" - */ -static VALUE -enc_name(VALUE self) -{ - return rb_fstring_cstr(rb_enc_name((rb_encoding*)RTYPEDDATA_GET_DATA(self))); -} static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args) @@ -1513,7 +1502,7 @@ static VALUE enc_dump(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); - return enc_name(self); + return rb_attr_get(self, id_i_name); } /* :nodoc: */ @@ -1602,12 +1591,7 @@ rb_locale_encoding(void) int rb_filesystem_encindex(void) { - int idx; - GLOBAL_ENC_TABLE_LOCKING(enc_table) { - idx = enc_registered(enc_table, "filesystem"); - } - if (idx < 0) idx = ENCINDEX_ASCII_8BIT; - return idx; + return filesystem_encindex; } rb_encoding * @@ -1659,7 +1643,9 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha } if (def == &default_external) { - enc_alias_internal(enc_table, "filesystem", Init_enc_set_filesystem_encoding()); + int fs_idx = Init_enc_set_filesystem_encoding(); + enc_alias_internal(enc_table, "filesystem", fs_idx); + filesystem_encindex = fs_idx; } } @@ -2002,12 +1988,19 @@ Init_Encoding(void) VALUE list; int i; + id_i_name = rb_intern_const("@name"); rb_cEncoding = rb_define_class("Encoding", rb_cObject); rb_define_alloc_func(rb_cEncoding, enc_s_alloc); rb_undef_method(CLASS_OF(rb_cEncoding), "new"); - rb_define_method(rb_cEncoding, "to_s", enc_name, 0); + + /* The name of the encoding. + * + * Encoding::UTF_8.name #=> "UTF-8" + */ + rb_attr(rb_cEncoding, rb_intern("name"), TRUE, FALSE, Qfalse); + rb_define_alias(rb_cEncoding, "to_s", "name"); + rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0); - rb_define_method(rb_cEncoding, "name", enc_name, 0); rb_define_method(rb_cEncoding, "names", enc_names, 0); rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0); rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0); diff --git a/include/ruby/fiber/scheduler.h b/include/ruby/fiber/scheduler.h index 537a3a7bb27a5c..4d764f68ae06ed 100644 --- a/include/ruby/fiber/scheduler.h +++ b/include/ruby/fiber/scheduler.h @@ -27,6 +27,7 @@ RBIMPL_SYMBOL_EXPORT_BEGIN() #define RUBY_FIBER_SCHEDULER_VERSION 3 struct timeval; +struct rb_thread_struct; /** * Wrap a `ssize_t` and `int errno` into a single `VALUE`. This interface should @@ -118,7 +119,7 @@ VALUE rb_fiber_scheduler_current(void); /** * Identical to rb_fiber_scheduler_current(), except it queries for that of the - * passed thread instead of the implicit current one. + * passed thread value instead of the implicit current one. * * @param[in] thread Target thread. * @exception rb_eTypeError `thread` is not a thread. @@ -127,6 +128,17 @@ VALUE rb_fiber_scheduler_current(void); */ VALUE rb_fiber_scheduler_current_for_thread(VALUE thread); +/** + * Identical to rb_fiber_scheduler_current_for_thread(), except it expects + * a threadptr instead of a thread value. + * + * @param[in] thread Target thread. + * @exception rb_eTypeError `thread` is not a thread. + * @retval RUBY_Qnil No scheduler is in effect in `thread`. + * @retval otherwise The scheduler that is in effect in `thread`. + */ +VALUE rb_fiber_scheduler_current_for_threadptr(struct rb_thread_struct *thread); + /** * Converts the passed timeout to an expression that rb_fiber_scheduler_block() * etc. expects. diff --git a/include/ruby/internal/intern/select.h b/include/ruby/internal/intern/select.h index 6ba84c6e63464e..ba75213618ca74 100644 --- a/include/ruby/internal/intern/select.h +++ b/include/ruby/internal/intern/select.h @@ -72,6 +72,8 @@ struct timeval; * someone else, vastly varies among operating systems. You would better avoid * touching an fd from more than one threads. * + * NOTE: this function is used in native extensions, so change its API with care. + * * @internal * * Although any file descriptors are possible here, it makes completely no diff --git a/internal/thread.h b/internal/thread.h index 21efeeebc085d7..ea891b4372f8f4 100644 --- a/internal/thread.h +++ b/internal/thread.h @@ -56,8 +56,8 @@ VALUE rb_mutex_owned_p(VALUE self); VALUE rb_exec_recursive_outer_mid(VALUE (*f)(VALUE g, VALUE h, int r), VALUE g, VALUE h, ID mid); void ruby_mn_threads_params(void); -int rb_thread_io_wait(struct rb_io *io, int events, struct timeval * timeout); -int rb_thread_wait_for_single_fd(int fd, int events, struct timeval * timeout); +int rb_thread_io_wait(struct rb_thread_struct *th, struct rb_io *io, int events, struct timeval * timeout); +int rb_thread_wait_for_single_fd(struct rb_thread_struct *th, int fd, int events, struct timeval * timeout); size_t rb_thread_io_close_interrupt(struct rb_io *); void rb_thread_io_close_wait(struct rb_io *); diff --git a/io.c b/io.c index e0e7d40548ee69..e739cee3795eec 100644 --- a/io.c +++ b/io.c @@ -1291,7 +1291,8 @@ internal_writev_func(void *ptr) static ssize_t rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_read_memory(scheduler, fptr->self, buf, count, 0); @@ -1301,7 +1302,7 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) } struct io_internal_read_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1324,7 +1325,8 @@ rb_io_read_memory(rb_io_t *fptr, void *buf, size_t count) static ssize_t rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, buf, count, 0); @@ -1334,7 +1336,7 @@ rb_io_write_memory(rb_io_t *fptr, const void *buf, size_t count) } struct io_internal_write_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1360,7 +1362,9 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) { if (!iovcnt) return 0; - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { // This path assumes at least one `iov`: VALUE result = rb_fiber_scheduler_io_write_memory(scheduler, fptr->self, iov[0].iov_base, iov[0].iov_len, 0); @@ -1371,7 +1375,7 @@ rb_writev_internal(rb_io_t *fptr, const struct iovec *iov, int iovcnt) } struct io_internal_writev_struct iis = { - .th = rb_thread_current(), + .th = th->self, .fptr = fptr, .nonblock = 0, .fd = fptr->fd, @@ -1453,7 +1457,8 @@ io_fflush(rb_io_t *fptr) VALUE rb_io_wait(VALUE io, VALUE events, VALUE timeout) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { return rb_fiber_scheduler_io_wait(scheduler, io, events, timeout); @@ -1474,7 +1479,7 @@ rb_io_wait(VALUE io, VALUE events, VALUE timeout) tv = &tv_storage; } - int ready = rb_thread_io_wait(fptr, RB_NUM2INT(events), tv); + int ready = rb_thread_io_wait(th, fptr, RB_NUM2INT(events), tv); if (ready < 0) { rb_sys_fail(0); @@ -1498,17 +1503,15 @@ io_from_fd(int fd) } static int -io_wait_for_single_fd(int fd, int events, struct timeval *timeout) +io_wait_for_single_fd(int fd, int events, struct timeval *timeout, rb_thread_t *th, VALUE scheduler) { - VALUE scheduler = rb_fiber_scheduler_current(); - if (scheduler != Qnil) { return RTEST( rb_fiber_scheduler_io_wait(scheduler, io_from_fd(fd), RB_INT2NUM(events), rb_fiber_scheduler_make_timeout(timeout)) ); } - return rb_thread_wait_for_single_fd(fd, events, timeout); + return rb_thread_wait_for_single_fd(th, fd, events, timeout); } int @@ -1516,7 +1519,8 @@ rb_io_wait_readable(int f) { io_fd_check_closed(f); - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); switch (errno) { case EINTR: @@ -1536,7 +1540,7 @@ rb_io_wait_readable(int f) ); } else { - io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL); + io_wait_for_single_fd(f, RUBY_IO_READABLE, NULL, th, scheduler); } return TRUE; @@ -1550,7 +1554,8 @@ rb_io_wait_writable(int f) { io_fd_check_closed(f); - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); switch (errno) { case EINTR: @@ -1579,7 +1584,7 @@ rb_io_wait_writable(int f) ); } else { - io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL); + io_wait_for_single_fd(f, RUBY_IO_WRITABLE, NULL, th, scheduler); } return TRUE; @@ -1591,7 +1596,9 @@ rb_io_wait_writable(int f) int rb_wait_for_single_fd(int fd, int events, struct timeval *timeout) { - return io_wait_for_single_fd(fd, events, timeout); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); + return io_wait_for_single_fd(fd, events, timeout, th, scheduler); } int diff --git a/scheduler.c b/scheduler.c index 63c22b55aaa8d5..592bdcd1ef45ef 100644 --- a/scheduler.c +++ b/scheduler.c @@ -451,7 +451,7 @@ rb_fiber_scheduler_set(VALUE scheduler) } static VALUE -rb_fiber_scheduler_current_for_threadptr(rb_thread_t *thread) +fiber_scheduler_current_for_threadptr(rb_thread_t *thread) { RUBY_ASSERT(thread); @@ -467,13 +467,18 @@ VALUE rb_fiber_scheduler_current(void) { RUBY_ASSERT(ruby_thread_has_gvl_p()); - return rb_fiber_scheduler_current_for_threadptr(GET_THREAD()); + return fiber_scheduler_current_for_threadptr(GET_THREAD()); } // This function is allowed to be called without holding the GVL. VALUE rb_fiber_scheduler_current_for_thread(VALUE thread) { - return rb_fiber_scheduler_current_for_threadptr(rb_thread_ptr(thread)); + return fiber_scheduler_current_for_threadptr(rb_thread_ptr(thread)); +} + +VALUE rb_fiber_scheduler_current_for_threadptr(rb_thread_t *thread) +{ + return fiber_scheduler_current_for_threadptr(thread); } /* diff --git a/test/ruby/test_ractor.rb b/test/ruby/test_ractor.rb index 09c470911a5aaa..6ae511217aca09 100644 --- a/test/ruby/test_ractor.rb +++ b/test/ruby/test_ractor.rb @@ -201,6 +201,18 @@ def test_max_cpu_1 RUBY end + def test_symbol_proc_is_shareable + pr = :symbol.to_proc + assert_make_shareable(pr) + end + + # [Bug #21775] + def test_ifunc_proc_not_shareable + h = Hash.new { self } + pr = h.to_proc + assert_unshareable(pr, /not supported yet/, exception: RuntimeError) + end + def assert_make_shareable(obj) refute Ractor.shareable?(obj), "object was already shareable" Ractor.make_shareable(obj) diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 50ffb6138c34bd..df7d2cf2234631 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -340,6 +340,21 @@ def test(n) } end + def test_return_nonparam_local + # Use dead code (if false) to create a local without initialization instructions. + assert_compiles 'nil', %q{ + def foo(a) + if false + x = nil + end + x + end + def test = foo(1) + test + test + }, call_threshold: 2 + end + def test_setlocal_on_eval assert_compiles '1', %q{ @b = binding diff --git a/thread.c b/thread.c index 0beb84a5b4575b..e9d36cb1f69bc0 100644 --- a/thread.c +++ b/thread.c @@ -226,7 +226,7 @@ vm_check_ints_blocking(rb_execution_context_t *ec) // When a signal is received, we yield to the scheduler as soon as possible: if (result || RUBY_VM_INTERRUPTED(ec)) { - VALUE scheduler = rb_fiber_scheduler_current(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { rb_fiber_scheduler_yield(scheduler); } @@ -1075,7 +1075,7 @@ thread_join_sleep(VALUE arg) } while (!thread_finished(target_th)) { - VALUE scheduler = rb_fiber_scheduler_current(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (!limit) { if (scheduler != Qnil) { @@ -1424,17 +1424,18 @@ rb_thread_sleep_deadly(void) static void rb_thread_sleep_deadly_allow_spurious_wakeup(VALUE blocker, VALUE timeout, rb_hrtime_t end) { - VALUE scheduler = rb_fiber_scheduler_current(); + rb_thread_t *th = GET_THREAD(); + VALUE scheduler = rb_fiber_scheduler_current_for_threadptr(th); if (scheduler != Qnil) { rb_fiber_scheduler_block(scheduler, blocker, timeout); } else { RUBY_DEBUG_LOG("..."); if (end) { - sleep_hrtime_until(GET_THREAD(), end, SLEEP_SPURIOUS_CHECK); + sleep_hrtime_until(th, end, SLEEP_SPURIOUS_CHECK); } else { - sleep_forever(GET_THREAD(), SLEEP_DEADLOCKABLE); + sleep_forever(th, SLEEP_DEADLOCKABLE); } } } @@ -4601,7 +4602,7 @@ wait_for_single_fd_blocking_region(rb_thread_t *th, struct pollfd *fds, nfds_t n * returns a mask of events */ static int -thread_io_wait(struct rb_io *io, int fd, int events, struct timeval *timeout) +thread_io_wait(rb_thread_t *th, struct rb_io *io, int fd, int events, struct timeval *timeout) { struct pollfd fds[1] = {{ .fd = fd, @@ -4614,8 +4615,8 @@ thread_io_wait(struct rb_io *io, int fd, int events, struct timeval *timeout) enum ruby_tag_type state; volatile int lerrno; - rb_execution_context_t *ec = GET_EC(); - rb_thread_t *th = rb_ec_thread_ptr(ec); + RUBY_ASSERT(th); + rb_execution_context_t *ec = th->ec; if (io) { blocking_operation.ec = ec; @@ -4749,7 +4750,7 @@ init_set_fd(int fd, rb_fdset_t *fds) } static int -thread_io_wait(struct rb_io *io, int fd, int events, struct timeval *timeout) +thread_io_wait(rb_thread_t *th, struct rb_io *io, int fd, int events, struct timeval *timeout) { rb_fdset_t rfds, wfds, efds; struct select_args args; @@ -4758,7 +4759,7 @@ thread_io_wait(struct rb_io *io, int fd, int events, struct timeval *timeout) struct rb_io_blocking_operation blocking_operation; if (io) { args.io = io; - blocking_operation.ec = GET_EC(); + blocking_operation.ec = th->ec; rb_io_blocking_operation_enter(io, &blocking_operation); args.blocking_operation = &blocking_operation; } @@ -4783,15 +4784,15 @@ thread_io_wait(struct rb_io *io, int fd, int events, struct timeval *timeout) #endif /* ! USE_POLL */ int -rb_thread_wait_for_single_fd(int fd, int events, struct timeval *timeout) +rb_thread_wait_for_single_fd(rb_thread_t *th, int fd, int events, struct timeval *timeout) { - return thread_io_wait(NULL, fd, events, timeout); + return thread_io_wait(th, NULL, fd, events, timeout); } int -rb_thread_io_wait(struct rb_io *io, int events, struct timeval * timeout) +rb_thread_io_wait(rb_thread_t *th, struct rb_io *io, int events, struct timeval * timeout) { - return thread_io_wait(io, io->fd, events, timeout); + return thread_io_wait(th, io, io->fd, events, timeout); } /* diff --git a/thread_pthread.c b/thread_pthread.c index 66323598607398..a2e42da13e70b1 100644 --- a/thread_pthread.c +++ b/thread_pthread.c @@ -2988,7 +2988,7 @@ timer_thread_deq_wakeup(rb_vm_t *vm, rb_hrtime_t now, uint32_t *event_serial) static void timer_thread_wakeup_thread_locked(struct rb_thread_sched *sched, rb_thread_t *th, uint32_t event_serial) { - if (sched->running != th && th->event_serial == event_serial) { + if (sched->running != th && th->sched.event_serial == event_serial) { thread_sched_to_ready_common(sched, th, true, false); } } diff --git a/thread_pthread.h b/thread_pthread.h index f579e53781a74c..992e9fb0803d67 100644 --- a/thread_pthread.h +++ b/thread_pthread.h @@ -48,7 +48,7 @@ struct rb_thread_sched_waiting { struct ccan_list_node node; }; -// per-Thead scheduler helper data +// per-Thread scheduler helper data struct rb_thread_sched_item { struct { struct ccan_list_node ubf; @@ -70,6 +70,7 @@ struct rb_thread_sched_item { } node; struct rb_thread_sched_waiting waiting_reason; + uint32_t event_serial; bool finished; bool malloc_stack; diff --git a/thread_pthread_mn.c b/thread_pthread_mn.c index ad4d0915e790b2..5c21f212e4ab37 100644 --- a/thread_pthread_mn.c +++ b/thread_pthread_mn.c @@ -65,7 +65,7 @@ thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, volatile bool timedout = false, need_cancel = false; - uint32_t event_serial = ++th->event_serial; // overflow is okay + uint32_t event_serial = ++th->sched.event_serial; // overflow is okay if (ubf_set(th, ubf_event_waiting, (void *)th)) { return false; @@ -81,11 +81,11 @@ thread_sched_wait_events(struct rb_thread_sched *sched, rb_thread_t *th, int fd, RB_INTERNAL_THREAD_HOOK(RUBY_INTERNAL_THREAD_EVENT_SUSPENDED, th); if (th->sched.waiting_reason.flags == thread_sched_waiting_none) { - th->event_serial++; + th->sched.event_serial++; // timer thread has dequeued us already, but it won't try to wake us because we bumped our serial } else if (RUBY_VM_INTERRUPTED(th->ec)) { - th->event_serial++; // make sure timer thread doesn't try to wake us + th->sched.event_serial++; // make sure timer thread doesn't try to wake us need_cancel = true; } else { diff --git a/vm.c b/vm.c index f832fe401f124b..99f96ff7a0dbc2 100644 --- a/vm.c +++ b/vm.c @@ -1594,7 +1594,10 @@ rb_proc_ractor_make_shareable(VALUE self, VALUE replace_self) proc->is_isolated = TRUE; } else { - VALUE proc_self = vm_block_self(vm_proc_block(self)); + const struct rb_block *block = vm_proc_block(self); + if (block->type != block_type_symbol) rb_raise(rb_eRuntimeError, "not supported yet"); + + VALUE proc_self = vm_block_self(block); if (!rb_ractor_shareable_p(proc_self)) { rb_raise(rb_eRactorIsolationError, "Proc's self is not shareable: %" PRIsVALUE, diff --git a/vm_core.h b/vm_core.h index 4195ea5e59c9a4..d391c4258a4337 100644 --- a/vm_core.h +++ b/vm_core.h @@ -1130,7 +1130,6 @@ typedef struct rb_thread_struct { struct rb_thread_sched_item sched; bool mn_schedulable; rb_atomic_t serial; // only for RUBY_DEBUG_LOG() - uint32_t event_serial; VALUE last_status; /* $? */ diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 865f80ca4f0cab..4a53e8834c5fb7 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -2518,6 +2518,7 @@ fn gen_setlocal_generic( ep_offset: u32, level: u32, ) -> Option { + // Post condition: The type of of the set local is updated in the Context. let value_type = asm.ctx.get_opnd_type(StackOpnd(0)); // Fallback because of write barrier @@ -2539,6 +2540,11 @@ fn gen_setlocal_generic( ); asm.stack_pop(1); + // Set local type in the context + if level == 0 { + let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); + asm.ctx.set_local_type(local_idx, value_type); + } return Some(KeepCompiling); } @@ -2591,6 +2597,7 @@ fn gen_setlocal_generic( ); } + // Set local type in the context if level == 0 { let local_idx = ep_offset_to_local_idx(jit.get_iseq(), ep_offset).as_usize(); asm.ctx.set_local_type(local_idx, value_type); diff --git a/zjit/src/asm/arm64/opnd.rs b/zjit/src/asm/arm64/opnd.rs index 667533ab938e0e..d7185ac23b38ff 100644 --- a/zjit/src/asm/arm64/opnd.rs +++ b/zjit/src/asm/arm64/opnd.rs @@ -98,6 +98,8 @@ pub const X2_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 2 }; pub const X3_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 3 }; pub const X4_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 4 }; pub const X5_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 5 }; +pub const X6_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 6 }; +pub const X7_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 7 }; // caller-save registers pub const X9_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 9 }; diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 55a65e3ea6161e..e00ea270d52dea 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -24,13 +24,15 @@ pub const EC: Opnd = Opnd::Reg(X20_REG); pub const SP: Opnd = Opnd::Reg(X21_REG); // C argument registers on this platform -pub const C_ARG_OPNDS: [Opnd; 6] = [ +pub const C_ARG_OPNDS: [Opnd; 8] = [ Opnd::Reg(X0_REG), Opnd::Reg(X1_REG), Opnd::Reg(X2_REG), Opnd::Reg(X3_REG), Opnd::Reg(X4_REG), - Opnd::Reg(X5_REG) + Opnd::Reg(X5_REG), + Opnd::Reg(X6_REG), + Opnd::Reg(X7_REG) ]; // C return value register on this platform @@ -199,6 +201,8 @@ pub const ALLOC_REGS: &[Reg] = &[ X3_REG, X4_REG, X5_REG, + X6_REG, + X7_REG, X11_REG, X12_REG, ]; @@ -231,7 +235,7 @@ impl Assembler { /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec { - vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] + vec![X1_REG, X6_REG, X7_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] } /// How many bytes a call and a [Self::frame_setup] would change native SP @@ -488,31 +492,13 @@ impl Assembler { } */ Insn::CCall { opnds, .. } => { - assert!(opnds.len() <= C_ARG_OPNDS.len()); - - // Load each operand into the corresponding argument - // register. - // Note: the iteration order is reversed to avoid corrupting x0, - // which is both the return value and first argument register - if !opnds.is_empty() { - let mut args: Vec<(Opnd, Opnd)> = vec![]; - for (idx, opnd) in opnds.iter_mut().enumerate().rev() { - // If the value that we're sending is 0, then we can use - // the zero register, so in this case we'll just send - // a UImm of 0 along as the argument to the move. - let value = match opnd { - Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), - Opnd::Mem(_) => split_memory_address(asm, *opnd), - _ => *opnd - }; - args.push((C_ARG_OPNDS[idx], value)); - } - asm.parallel_mov(args); - } - - // Now we push the CCall without any arguments so that it - // just performs the call. - *opnds = vec![]; + opnds.iter_mut().for_each(|opnd| { + *opnd = match opnd { + Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), + Opnd::Mem(_) => split_memory_address(asm, *opnd), + _ => *opnd + }; + }); asm.push_insn(insn); }, Insn::Cmp { left, right } => { @@ -1857,17 +1843,19 @@ mod tests { assert_disasm_snapshot!(cb.disasm(), @r" 0x0: str x1, [sp, #-0x10]! - 0x4: str x9, [sp, #-0x10]! - 0x8: str x10, [sp, #-0x10]! - 0xc: str x11, [sp, #-0x10]! - 0x10: str x12, [sp, #-0x10]! - 0x14: str x13, [sp, #-0x10]! - 0x18: str x14, [sp, #-0x10]! - 0x1c: str x15, [sp, #-0x10]! - 0x20: mrs x16, nzcv - 0x24: str x16, [sp, #-0x10]! + 0x4: str x6, [sp, #-0x10]! + 0x8: str x7, [sp, #-0x10]! + 0xc: str x9, [sp, #-0x10]! + 0x10: str x10, [sp, #-0x10]! + 0x14: str x11, [sp, #-0x10]! + 0x18: str x12, [sp, #-0x10]! + 0x1c: str x13, [sp, #-0x10]! + 0x20: str x14, [sp, #-0x10]! + 0x24: str x15, [sp, #-0x10]! + 0x28: mrs x16, nzcv + 0x2c: str x16, [sp, #-0x10]! "); - assert_snapshot!(cb.hexdump(), @"e10f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8"); + assert_snapshot!(cb.hexdump(), @"e10f1ff8e60f1ff8e70f1ff8e90f1ff8ea0f1ff8eb0f1ff8ec0f1ff8ed0f1ff8ee0f1ff8ef0f1ff810423bd5f00f1ff8"); } #[test] @@ -1887,9 +1875,11 @@ mod tests { 0x18: ldr x11, [sp], #0x10 0x1c: ldr x10, [sp], #0x10 0x20: ldr x9, [sp], #0x10 - 0x24: ldr x1, [sp], #0x10 + 0x24: ldr x7, [sp], #0x10 + 0x28: ldr x6, [sp], #0x10 + 0x2c: ldr x1, [sp], #0x10 "); - assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e10741f8"); + assert_snapshot!(cb.hexdump(), @"10421bd5f00741f8ef0741f8ee0741f8ed0741f8ec0741f8eb0741f8ea0741f8e90741f8e70741f8e60741f8e10741f8"); } #[test] @@ -2658,14 +2648,14 @@ mod tests { ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); - assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x15, x0 - 0x4: mov x0, x1 - 0x8: mov x1, x15 - 0xc: mov x16, #0 - 0x10: blr x16 + assert_disasm_snapshot!(cb.disasm(), @r" + 0x0: mov x15, x1 + 0x4: mov x1, x0 + 0x8: mov x0, x15 + 0xc: mov x16, #0 + 0x10: blr x16 "); - assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faa100080d200023fd6"); } #[test] @@ -2681,17 +2671,17 @@ mod tests { ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); - assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x15, x2 - 0x4: mov x2, x3 - 0x8: mov x3, x15 - 0xc: mov x15, x0 - 0x10: mov x0, x1 - 0x14: mov x1, x15 - 0x18: mov x16, #0 - 0x1c: blr x16 + assert_disasm_snapshot!(cb.disasm(), @r" + 0x0: mov x15, x1 + 0x4: mov x1, x0 + 0x8: mov x0, x15 + 0xc: mov x15, x3 + 0x10: mov x3, x2 + 0x14: mov x2, x15 + 0x18: mov x16, #0 + 0x1c: blr x16 "); - assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0301aae10300aae0030faaef0303aae30302aae2030faa100080d200023fd6"); } #[test] @@ -2706,15 +2696,15 @@ mod tests { ]); asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); - assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x15, x0 - 0x4: mov x0, x1 - 0x8: mov x1, x2 - 0xc: mov x2, x15 - 0x10: mov x16, #0 - 0x14: blr x16 + assert_disasm_snapshot!(cb.disasm(), @r" + 0x0: mov x15, x1 + 0x4: mov x1, x2 + 0x8: mov x2, x0 + 0xc: mov x0, x15 + 0x10: mov x16, #0 + 0x14: blr x16 "); - assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0301aae10302aae20300aae0030faa100080d200023fd6"); } #[test] diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index f4ed3d7cb78ce6..af19f056d339ef 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1559,9 +1559,8 @@ impl Assembler frame_setup_idxs.push(asm.insns.len()); } - let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) { - (Insn::ParallelMov { .. }, Some(Insn::CCall { .. })) | - (Insn::CCall { .. }, _) if !pool.is_empty() => { + let before_ccall = match &insn { + Insn::CCall { .. } if !pool.is_empty() => { // If C_RET_REG is in use, move it to another register. // This must happen before last-use registers are deallocated. if let Some(vreg_idx) = pool.vreg_for(&C_RET_REG) { @@ -1612,6 +1611,25 @@ impl Assembler if cfg!(target_arch = "x86_64") && saved_regs.len() % 2 == 1 { asm.cpush(Opnd::Reg(saved_regs.last().unwrap().0)); } + if let Insn::ParallelMov { moves } = &insn { + if moves.len() > C_ARG_OPNDS.len() { + let difference = moves.len().saturating_sub(C_ARG_OPNDS.len()); + + #[cfg(target_arch = "x86_64")] + let offset = { + // double quadword alignment + ((difference + 3) / 4) * 4 + }; + + #[cfg(target_arch = "aarch64")] + let offset = { + // quadword alignment + if difference % 2 == 0 { difference } else { difference + 1 } + }; + + asm.sub_into(NATIVE_STACK_PTR, (offset * 8).into()); + } + } } // Allocate a register for the output operand if it exists @@ -1703,7 +1721,23 @@ impl Assembler // Push instruction(s) let is_ccall = matches!(insn, Insn::CCall { .. }); match insn { - Insn::ParallelMov { moves } => { + Insn::CCall { opnds, fptr, start_marker, end_marker, out } => { + let mut moves: Vec<(Opnd, Opnd)> = vec![]; + let num_reg_args = opnds.len().min(C_ARG_OPNDS.len()); + let num_stack_args = opnds.len().saturating_sub(C_ARG_OPNDS.len()); + + if num_stack_args > 0 { + for (i, opnd) in opnds.iter().skip(num_reg_args).enumerate() { + moves.push((Opnd::mem(64, NATIVE_STACK_PTR, 8 * i as i32), *opnd)); + } + } + + if num_reg_args > 0 { + for (i, opnd) in opnds.iter().take(num_reg_args).enumerate() { + moves.push((C_ARG_OPNDS[i], *opnd)); + } + } + // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg. if let Some(moves) = Self::resolve_parallel_moves(&moves, None) { for (dst, src) in moves { @@ -1713,13 +1747,12 @@ impl Assembler // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it. asm.push_insn(Insn::ParallelMov { moves }); } - } - Insn::CCall { opnds, fptr, start_marker, end_marker, out } => { + // Split start_marker and end_marker here to avoid inserting push/pop between them. if let Some(start_marker) = start_marker { asm.push_insn(Insn::PosMarker(start_marker)); } - asm.push_insn(Insn::CCall { opnds, fptr, start_marker: None, end_marker: None, out }); + asm.push_insn(Insn::CCall { opnds: vec![], fptr, start_marker: None, end_marker: None, out }); if let Some(end_marker) = end_marker { asm.push_insn(Insn::PosMarker(end_marker)); } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 9f780617cc4ac5..4af704644910f3 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -351,21 +351,7 @@ impl Assembler { }; asm.push_insn(insn); }, - Insn::CCall { opnds, .. } => { - assert!(opnds.len() <= C_ARG_OPNDS.len()); - - // Load each operand into the corresponding argument register. - if !opnds.is_empty() { - let mut args: Vec<(Opnd, Opnd)> = vec![]; - for (idx, opnd) in opnds.iter_mut().enumerate() { - args.push((C_ARG_OPNDS[idx], *opnd)); - } - asm.parallel_mov(args); - } - - // Now we push the CCall without any arguments so that it - // just performs the call. - *opnds = vec![]; + Insn::CCall { .. } => { asm.push_insn(insn); }, Insn::Lea { .. } => { diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 67f53a3477c099..07f519db1c0e7d 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -398,15 +398,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::Send { cd, blockiseq, state, reason, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendForward { cd, blockiseq, state, reason, .. } => gen_send_forward(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::SendWithoutBlock { cd, state, reason, .. } => gen_send_without_block(jit, asm, cd, &function.frame_state(state), reason), - // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. + // Give up SendWithoutBlockDirect for 6+ args since the callee doesn't know how to read arguments from the stack. Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::TooManyArgsForLir), Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)), &Insn::InvokeSuper { cd, blockiseq, state, reason, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state), reason), &Insn::InvokeBlock { cd, state, reason, .. } => gen_invokeblock(jit, asm, cd, &function.frame_state(state), reason), - // Ensure we have enough room fit ec, self, and arguments - // TODO remove this check when we have stack args (we can use Time.new to test it) - Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)), &Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), @@ -453,10 +450,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardGreaterEq { left, right, state } => gen_guard_greater_eq(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, recv, args, name, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, *name, opnd!(recv), opnds!(args)), - // Give up CCallWithFrame for 7+ args since asm.ccall() supports at most 6 args (recv + args). - // There's no test case for this because no core cfuncs have this many parameters. But C extensions could have such methods. - Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => - gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), Insn::CCallWithFrame { cfunc, recv, name, args, cme, state, blockiseq, .. } => gen_ccall_with_frame(jit, asm, *cfunc, *name, opnd!(recv), opnds!(args), *cme, *blockiseq, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, name, args, cme, state, blockiseq, return_type: _, elidable: _ } => { @@ -722,10 +715,6 @@ fn gen_fixnum_bit_check(asm: &mut Assembler, val: Opnd, index: u8) -> Opnd { } fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec) -> lir::Opnd { - assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, - "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", - unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, - bf.argc); if leaf { gen_prepare_leaf_call_with_gc(asm, state); } else { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 77bd172396fdda..dfb8bbaecb55b4 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -1731,6 +1731,12 @@ fn iseq_get_return_value(iseq: IseqPtr, captured_opnd: Option, ci_flags: let ep_offset = unsafe { *rb_iseq_pc_at_idx(iseq, 1) }.as_u32(); let local_idx = ep_offset_to_local_idx(iseq, ep_offset); + // Only inline if the local is a parameter (not a method-defined local) as we are indexing args. + let param_size = unsafe { rb_get_iseq_body_param_size(iseq) } as usize; + if local_idx >= param_size { + return None; + } + if unsafe { rb_simple_iseq_p(iseq) } { return Some(IseqReturn::LocalVariable(local_idx.try_into().unwrap())); } diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 4c62971c1eacb1..e3c74c86364007 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -761,6 +761,44 @@ mod hir_opt_tests { "); } + #[test] + fn test_no_inline_nonparam_local_return() { + // Methods that return non-parameter local variables should NOT be inlined, + // because the local variable index will be out of bounds for args. + // The method must have a parameter so param_size > 0, and return a local + // that's not a parameter so local_idx >= param_size. + // Use dead code (if false) to create a local without initialization instructions, + // resulting in just getlocal + leave which enters the inlining code path. + eval(" + def foo(a) + if false + x = nil + end + x + end + def test = foo(1) + test; test + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:8: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Object@0x1000) + v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v21:BasicObject = SendWithoutBlockDirect v20, :foo (0x1038), v11 + CheckInterrupts + Return v21 + "); + } + #[test] fn test_optimize_send_to_aliased_cfunc() { eval("