diff --git a/.github/workflows/zjit-macos.yml b/.github/workflows/zjit-macos.yml index 8e5ca954cb8a43..30024a6d0c9d1b 100644 --- a/.github/workflows/zjit-macos.yml +++ b/.github/workflows/zjit-macos.yml @@ -40,6 +40,7 @@ jobs: - test_task: 'zjit-test-all' configure: '--enable-zjit=dev' + testopts: '--seed=11831' - test_task: 'btest' configure: '--enable-zjit=dev' @@ -48,6 +49,7 @@ jobs: GITPULLOPTIONS: --no-tags origin ${{ github.ref }} RUN_OPTS: ${{ matrix.zjit_opts }} SPECOPTS: ${{ matrix.specopts }} + TESTOPTS: ${{ matrix.testopts }} runs-on: macos-14 @@ -146,6 +148,7 @@ jobs: make -s ${{ matrix.test_task }} ${TESTS:+TESTS="$TESTS"} RUN_OPTS="$RUN_OPTS" SPECOPTS="$SPECOPTS" + TESTOPTS="$TESTOPTS" timeout-minutes: 60 env: RUBY_TESTOPTS: '-q --tty=no' diff --git a/.github/workflows/zjit-ubuntu.yml b/.github/workflows/zjit-ubuntu.yml index ecfd0ddeded21f..3e2db58f72b959 100644 --- a/.github/workflows/zjit-ubuntu.yml +++ b/.github/workflows/zjit-ubuntu.yml @@ -42,6 +42,7 @@ jobs: - test_task: 'zjit-test-all' configure: '--enable-zjit=dev' + testopts: '--seed=11831' - test_task: 'btest' configure: '--enable-zjit=dev' @@ -51,6 +52,7 @@ jobs: RUN_OPTS: ${{ matrix.zjit_opts }} YJIT_BENCH_OPTS: ${{ matrix.yjit_bench_opts }} SPECOPTS: ${{ matrix.specopts }} + TESTOPTS: ${{ matrix.testopts }} RUBY_DEBUG: ci BUNDLE_JOBS: 8 # for yjit-bench RUST_BACKTRACE: 1 @@ -167,6 +169,7 @@ jobs: run: >- make -s ${{ matrix.test_task }} ${TESTS:+TESTS="$TESTS"} RUN_OPTS="$RUN_OPTS" MSPECOPT=--debug SPECOPTS="$SPECOPTS" + TESTOPTS="$TESTOPTS" ZJIT_BINDGEN_DIFF_OPTS="$ZJIT_BINDGEN_DIFF_OPTS" timeout-minutes: 90 env: diff --git a/Cargo.toml b/Cargo.toml index 6ab7e3a45f7c7d..6f61e5f95cfd35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,8 @@ path = "jit.rs" [features] disasm = ["yjit?/disasm", "zjit?/disasm"] -runtime_checks = [] +# TODO(GH-642) Turning this on trips a btest failure. +runtime_checks = [] # ["yjit?/runtime_checks", "zjit?/runtime_checks"] yjit = [ "dep:yjit" ] zjit = [ "dep:zjit" ] diff --git a/configure.ac b/configure.ac index 4766b8813cae21..8e08b91edbbc32 100644 --- a/configure.ac +++ b/configure.ac @@ -4014,10 +4014,16 @@ AS_IF([test x"$JIT_CARGO_SUPPORT" != "xno" -o \( x"$YJIT_SUPPORT" != "xno" -a x" YJIT_LIBS= ZJIT_LIBS= + # There's more processing below to get the feature set for the + # top-level crate, so capture at this point for feature set of + # just the zjit crate. + ZJIT_TEST_FEATURES="${rb_cargo_features}" + AS_IF([test x"${YJIT_SUPPORT}" != x"no"], [ rb_cargo_features="$rb_cargo_features,yjit" ]) AS_IF([test x"${ZJIT_SUPPORT}" != x"no"], [ + AC_SUBST(ZJIT_TEST_FEATURES) rb_cargo_features="$rb_cargo_features,zjit" ]) # if YJIT and ZJIT release mode diff --git a/prism/config.yml b/prism/config.yml index cb154500b3fa09..257bd389ed02ab 100644 --- a/prism/config.yml +++ b/prism/config.yml @@ -101,6 +101,8 @@ errors: - EXPECT_FOR_DELIMITER - EXPECT_IDENT_REQ_PARAMETER - EXPECT_IN_DELIMITER + - EXPECT_LPAREN_AFTER_NOT_LPAREN + - EXPECT_LPAREN_AFTER_NOT_OTHER - EXPECT_LPAREN_REQ_PARAMETER - EXPECT_MESSAGE - EXPECT_RBRACKET diff --git a/prism/prism.c b/prism/prism.c index ffe874fe08b475..85647020d832cb 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -13142,14 +13142,6 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8); } -/** - * Returns true if the current token is any of the nine given types. - */ -static inline bool -match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) { - return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9); -} - /** * If the current token is of the specified type, lex forward by one token and * return true. Otherwise, return false. For example: @@ -17412,6 +17404,14 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm // If we found a label, we need to immediately return to the caller. if (pm_symbol_node_label_p(node)) return node; + // Call nodes (arithmetic operations) are not allowed in patterns + if (PM_NODE_TYPE(node) == PM_CALL_NODE) { + pm_parser_err_node(parser, node, diag_id); + pm_missing_node_t *missing_node = pm_missing_node_create(parser, node->location.start, node->location.end); + pm_node_destroy(parser, node); + return (pm_node_t *) missing_node; + } + // Now that we have a primitive, we need to check if it's part of a range. if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) { pm_token_t operator = parser->previous; @@ -17694,7 +17694,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag // Gather up all of the patterns into the list. while (accept1(parser, PM_TOKEN_COMMA)) { // Break early here in case we have a trailing comma. - if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) { + if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) { node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous); pm_node_list_append(&nodes, node); trailing_rest = true; @@ -19775,6 +19775,20 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_arguments_t arguments = { 0 }; pm_node_t *receiver = NULL; + // If we do not accept a command call, then we also do not accept a + // not without parentheses. In this case we need to reject this + // syntax. + if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { + if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) { + pm_parser_err(parser, parser->previous.end, parser->previous.end + 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN); + } else { + accept1(parser, PM_TOKEN_NEWLINE); + pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER); + } + + return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end); + } + accept1(parser, PM_TOKEN_NEWLINE); if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) { diff --git a/prism/templates/src/diagnostic.c.erb b/prism/templates/src/diagnostic.c.erb index ce98dc5acd5163..9a30a57e3b0eca 100644 --- a/prism/templates/src/diagnostic.c.erb +++ b/prism/templates/src/diagnostic.c.erb @@ -184,6 +184,8 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = { [PM_ERR_EXPECT_FOR_DELIMITER] = { "unexpected %s; expected a 'do', newline, or ';' after the 'for' loop collection", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IDENT_REQ_PARAMETER] = { "expected an identifier for the required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_IN_DELIMITER] = { "expected a delimiter after the patterns of an `in` clause", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN] = { "expected a `(` immediately after `not`", PM_ERROR_LEVEL_SYNTAX }, + [PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER] = { "expected a `(` after `not`", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_LPAREN_REQ_PARAMETER] = { "expected a `(` to start a required parameter", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_MESSAGE] = { "unexpected %s; expecting a message to send to the receiver", PM_ERROR_LEVEL_SYNTAX }, [PM_ERR_EXPECT_RBRACKET] = { "expected a matching `]`", PM_ERROR_LEVEL_SYNTAX }, diff --git a/template/Makefile.in b/template/Makefile.in index 96c8d8031bcea0..1e6d55c435e95b 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -113,6 +113,7 @@ ZJIT_OBJ=@ZJIT_OBJ@ JIT_CARGO_SUPPORT=@JIT_CARGO_SUPPORT@ CARGO_TARGET_DIR=@abs_top_builddir@/target CARGO_BUILD_ARGS=@CARGO_BUILD_ARGS@ +ZJIT_TEST_FEATURES=@ZJIT_TEST_FEATURES@ RUST_LIB=@RUST_LIB@ RUST_LIBOBJ = $(RUST_LIB:.a=.@OBJEXT@) LDFLAGS = @STATIC@ $(CFLAGS) @LDFLAGS@ diff --git a/test/prism/errors/command_calls_31.txt b/test/prism/errors/command_calls_31.txt new file mode 100644 index 00000000000000..e662b254444821 --- /dev/null +++ b/test/prism/errors/command_calls_31.txt @@ -0,0 +1,17 @@ +true && not true + ^~~~ expected a `(` after `not` + ^~~~ unexpected 'true', expecting end-of-input + +true || not true + ^~~~ expected a `(` after `not` + ^~~~ unexpected 'true', expecting end-of-input + +true && not (true) + ^ expected a `(` immediately after `not` + ^ unexpected '(', expecting end-of-input + +true && not +true +^~~~ expected a `(` after `not` +^~~~ unexpected 'true', expecting end-of-input + diff --git a/test/prism/errors/pattern_arithmetic_expressions.txt b/test/prism/errors/pattern_arithmetic_expressions.txt new file mode 100644 index 00000000000000..cfb36505312584 --- /dev/null +++ b/test/prism/errors/pattern_arithmetic_expressions.txt @@ -0,0 +1,3 @@ +case 1; in -1**2; end + ^~~~~ expected a pattern expression after the `in` keyword + diff --git a/test/prism/errors/pattern_match_implicit_rest.txt b/test/prism/errors/pattern_match_implicit_rest.txt new file mode 100644 index 00000000000000..8602c0add06ce5 --- /dev/null +++ b/test/prism/errors/pattern_match_implicit_rest.txt @@ -0,0 +1,3 @@ +a=>b, *, + ^ expected a pattern expression after `,` + diff --git a/test/ruby/test_process.rb b/test/ruby/test_process.rb index 5497b182f76b8a..f1894ab0c30f8a 100644 --- a/test/ruby/test_process.rb +++ b/test/ruby/test_process.rb @@ -1682,9 +1682,10 @@ def test_uid_from_name if u = Etc.getpwuid(Process.uid) assert_equal(Process.uid, Process::UID.from_name(u.name), u.name) end - assert_raise_with_message(ArgumentError, /\u{4e0d 5b58 5728}/) { + exc = assert_raise_kind_of(ArgumentError, SystemCallError) { Process::UID.from_name("\u{4e0d 5b58 5728}") } + assert_match(/\u{4e0d 5b58 5728}/, exc.message) if exc.is_a?(ArgumentError) end end diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index fdcee3cafd858e..299600eef95d53 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -999,6 +999,26 @@ def test_require_rubygems_with_auto_compact }, call_threshold: 2 end + def test_profile_under_nested_jit_call + assert_compiles '[nil, nil, 3]', %q{ + def profile + 1 + 2 + end + + def jit_call(flag) + if flag + profile + end + end + + def entry(flag) + jit_call(flag) + end + + [entry(false), entry(false), entry(true)] + }, call_threshold: 2 + end + def test_bop_redefinition assert_runs '[3, :+, 100]', %q{ def test diff --git a/zjit.c b/zjit.c index 0c0c85d3a62bee..61c17d32c30751 100644 --- a/zjit.c +++ b/zjit.c @@ -295,6 +295,18 @@ rb_zjit_profile_disable(const rb_iseq_t *iseq) } } +// Update a YARV instruction to a given opcode (to disable ZJIT profiling). +void +rb_zjit_iseq_insn_set(const rb_iseq_t *iseq, unsigned int insn_idx, enum ruby_vminsn_type bare_insn) +{ +#if RUBY_DEBUG + int insn = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[insn_idx]); + RUBY_ASSERT(vm_zjit_insn_to_bare_insn(insn) == (int)bare_insn); +#endif + const void *const *insn_table = rb_vm_get_insns_address_table(); + iseq->body->iseq_encoded[insn_idx] = (VALUE)insn_table[bare_insn]; +} + // Get profiling information for ISEQ void * rb_iseq_get_zjit_payload(const rb_iseq_t *iseq) diff --git a/zjit/Cargo.toml b/zjit/Cargo.toml index a86117d6e2f08e..a1da8e7cc0990a 100644 --- a/zjit/Cargo.toml +++ b/zjit/Cargo.toml @@ -18,3 +18,4 @@ expect-test = "1.5.1" [features] # Support --yjit-dump-disasm and RubyVM::YJIT.disasm using libcapstone. disasm = ["capstone"] +runtime_checks = [] diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 91de6dcd8d4567..dd167e9eb07ac3 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -335,6 +335,7 @@ fn main() { .allowlist_function("rb_zjit_get_page_size") .allowlist_function("rb_zjit_iseq_builtin_attrs") .allowlist_function("rb_zjit_iseq_inspect") + .allowlist_function("rb_zjit_iseq_insn_set") .allowlist_function("rb_set_cfp_(pc|sp)") .allowlist_function("rb_c_method_tracing_currently_enabled") .allowlist_function("rb_full_cfunc_return") diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index ac5c9908f55c43..149d09813b2018 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -415,26 +415,36 @@ impl Assembler // being used. It is okay not to use their output here. #[allow(unused_must_use)] match &mut insn { - Insn::Sub { left, right, out } | Insn::Add { left, right, out } => { match (*left, *right) { - (Opnd::Reg(_) | Opnd::VReg { .. }, Opnd::Reg(_) | Opnd::VReg { .. }) => { - merge_three_reg_mov(&live_ranges, &mut iterator, left, right, out); - asm.push_insn(insn); - }, + // When one operand is a register, legalize the other operand + // into possibly an immdiate and swap the order if necessary. + // Only the rhs of ADD can be an immediate, but addition is commutative. (reg_opnd @ (Opnd::Reg(_) | Opnd::VReg { .. }), other_opnd) | (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::VReg { .. })) => { *left = reg_opnd; *right = split_shifted_immediate(asm, other_opnd); + // Now `right` is either a register or an immediate, both can try to + // merge with a subsequent mov. + merge_three_reg_mov(&live_ranges, &mut iterator, left, left, out); asm.push_insn(insn); - }, + } _ => { *left = split_load_operand(asm, *left); *right = split_shifted_immediate(asm, *right); + merge_three_reg_mov(&live_ranges, &mut iterator, left, right, out); asm.push_insn(insn); } } - }, + } + Insn::Sub { left, right, out } => { + *left = split_load_operand(asm, *left); + *right = split_shifted_immediate(asm, *right); + // Now `right` is either a register or an immediate, + // both can try to merge with a subsequent mov. + merge_three_reg_mov(&live_ranges, &mut iterator, left, left, out); + asm.push_insn(insn); + } Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { @@ -919,10 +929,28 @@ impl Assembler ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, Insn::Add { left, right, out } => { - adds(cb, out.into(), left.into(), right.into()); + // Usually, we issue ADDS, so you could branch on overflow, but ADDS with + // out=31 refers to out=XZR, which discards the sum. So, instead of ADDS + // (aliased to CMN in this case) we issue ADD instead which writes the sum + // to the stack pointer; we assume you got x31 from NATIVE_STACK_POINTER. + let out: A64Opnd = out.into(); + if let A64Opnd::Reg(A64Reg { reg_no: 31, .. }) = out { + add(cb, out, left.into(), right.into()); + } else { + adds(cb, out, left.into(), right.into()); + } }, Insn::Sub { left, right, out } => { - subs(cb, out.into(), left.into(), right.into()); + // Usually, we issue SUBS, so you could branch on overflow, but SUBS with + // out=31 refers to out=XZR, which discards the result. So, instead of SUBS + // (aliased to CMP in this case) we issue SUB instead which writes the diff + // to the stack pointer; we assume you got x31 from NATIVE_STACK_POINTER. + let out: A64Opnd = out.into(); + if let A64Opnd::Reg(A64Reg { reg_no: 31, .. }) = out { + sub(cb, out, left.into(), right.into()); + } else { + subs(cb, out, left.into(), right.into()); + } }, Insn::Mul { left, right, out } => { // If the next instruction is jo (jump on overflow) @@ -1363,12 +1391,42 @@ mod tests { asm.mov(sp, new_sp); let new_sp = asm.sub(sp, 0x20.into()); asm.mov(sp, new_sp); - asm.compile_with_num_regs(&mut cb, 2); - assert_disasm!(cb, "e08300b11f000091e08300f11f000091", {" + asm.compile_with_num_regs(&mut cb, 2); + assert_disasm!(cb, "ff830091ff8300d1", " 0x0: add sp, sp, #0x20 0x4: sub sp, sp, #0x20 - "}); + "); + } + + #[test] + fn add_into() { + let (mut asm, mut cb) = setup_asm(); + + let sp = Opnd::Reg(XZR_REG); + asm.add_into(sp, 8.into()); + asm.add_into(Opnd::Reg(X20_REG), 0x20.into()); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "ff230091948200b1", " + 0x0: add sp, sp, #8 + 0x4: adds x20, x20, #0x20 + "); + } + + #[test] + fn sub_imm_reg() { + let (mut asm, mut cb) = setup_asm(); + + let difference = asm.sub(0x8.into(), Opnd::Reg(X5_REG)); + asm.load_into(Opnd::Reg(X1_REG), difference); + + asm.compile_with_num_regs(&mut cb, 1); + assert_disasm!(cb, "000180d2000005ebe10300aa", " + 0x0: mov x0, #8 + 0x4: subs x0, x0, x5 + 0x8: mov x1, x0 + "); } #[test] diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index e5453e4d554b67..94c53569b4e95c 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -270,6 +270,14 @@ impl From for Opnd { } } +/// Set of things we need to restore for side exits. +#[derive(Clone, Debug)] +pub struct SideExitContext { + pub pc: *const VALUE, + pub stack: Vec, + pub locals: Vec, +} + /// Branch target (something that we can jump to) /// for branch instructions #[derive(Clone, Debug)] @@ -281,12 +289,14 @@ pub enum Target Label(Label), /// Side exit to the interpreter SideExit { - pc: *const VALUE, - stack: Vec, - locals: Vec, - c_stack_bytes: usize, + /// Context to restore on regular side exits. None for side exits right + /// after JIT-to-JIT calls because we restore them before the JIT call. + context: Option, + /// We use this to enrich asm comments. reason: SideExitReason, - // Some if the side exit should write this label. We use it for patch points. + /// The number of bytes we need to adjust the C stack pointer by. + c_stack_bytes: usize, + /// Some if the side exit should write this label. We use it for patch points. label: Option