From aaf9b95d4bb31fdfb1195fd484647c879d3f44cc Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 23 Dec 2025 15:23:53 +0800 Subject: [PATCH] [LA64_DYNAREC] Optimized RCR opcodes --- src/dynarec/la64/dynarec_la64_00.c | 36 +++++++++++---------- src/dynarec/la64/dynarec_la64_emit_shift.c | 37 ++++++++++++++++++---- src/dynarec/la64/dynarec_la64_helper.h | 2 ++ 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 33549cfee..8fc89a75e 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -2334,15 +2334,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("RCR Ed, Ib"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); - SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - GETEDW(x4, x1, 0); - u8 = (F8) & (rex.w ? 0x3f : 0x1f); - MOV32w(x2, u8); - CALL_(rex.w ? (const_rcr64) : (const_rcr32), ed, x4, x1, x2); - WBACK; - if (!wback && !rex.w) ZEROUP(ed); + u8 = geted_ib(dyn, addr, ninst, nextop) & (0x1f + (rex.w * 0x20)); + if (u8) { + READFLAGS(X_CF); + SETFLAGS(X_CF | X_OF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose + GETED(1); + u8 = (F8) & (rex.w ? 0x3f : 0x1f); + emit_rcr32c(dyn, ninst, rex, ed, u8, x3, x4, x5); + WBACK; + } else { + if (MODREG && !rex.w && !rex.is32bits) { + GETED(1); + ZEROUP2(ed, ed); + } else { + FAKEED; + } + F8; + } break; case 4: case 6: @@ -2867,14 +2875,11 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 3: INST_NAME("RCR Ed, 1"); - MESSAGE(LOG_DUMP, "Need Optimization\n"); READFLAGS(X_CF); - SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION); - MOV32w(x2, 1); - GETEDW(x4, x1, 0); - CALL_(rex.w ? const_rcr64 : const_rcr32, ed, x4, x1, x2); + SETFLAGS(X_OF | X_CF, SF_SUBSET, NAT_FLAGS_FUSION); // removed PENDING on purpose + GETED(0); + emit_rcr32c(dyn, ninst, rex, ed, 1, x3, x4, x5); WBACK; - if (!wback && !rex.w) ZEROUP(ed); break; case 4: case 6: @@ -2951,7 +2956,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 3: INST_NAME("RCR Ed, CL"); MESSAGE(LOG_DUMP, "Need Optimization\n"); - READFLAGS(X_CF); if (BOX64DRENV(dynarec_safeflags) > 1) { READFLAGS(X_OF | X_CF); } else { diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c index 35c3b9a74..28988d4e7 100644 --- a/src/dynarec/la64/dynarec_la64_emit_shift.c +++ b/src/dynarec/la64/dynarec_la64_emit_shift.c @@ -1269,8 +1269,6 @@ void emit_rol16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { int64_t j64; - if (!rex.w) ZEROUP(s1); - BEQ_NEXT(s2, xZR); IFX (X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); @@ -1331,10 +1329,6 @@ void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s void emit_ror32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) { int64_t j64; - if (!rex.w) { - ZEROUP(s1); - } - BEQ_NEXT(s2, xZR); IFX (X_PEND) { SDxw(s2, xEmu, offsetof(x64emu_t, op2)); SDxw(s1, xEmu, offsetof(x64emu_t, op1)); @@ -1731,6 +1725,37 @@ void emit_rcr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); } +// emit RCR32 instruction, from s1 , constant c, store result in s1 using s3, s4 and s5 as scratch +void emit_rcr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5) +{ + if (!c) return; + + RESTORE_EFLAGS(s3); + IFX (X_OF) { + SRLI_D(s3, s1, rex.w ? 63 : 31); + XOR(s3, s3, xFlags); + BSTRINS_D(xFlags, s3, F_OF, F_OF); + } + IFX (X_CF) BSTRPICK_D(s5, s1, c - 1, c - 1); + if (c == 1) { + SRLIxw(s1, s1, 1); + BSTRINS_D(s1, xFlags, rex.w ? 63 : 31, rex.w ? 63 : 31); + } else { + SRLIxw(s4, s1, c); + BSTRINS_D(s4, xFlags, (rex.w ? 64 : 32) - c, (rex.w ? 64 : 32) - c); + SLLI_D(s3, s1, (rex.w ? 65 : 33) - c); + OR(s1, s3, s4); + if (!rex.w) ZEROUP(s1); + } + + IFX (X_CF) { + BSTRINS_D(xFlags, s5, F_CF, F_CF); + } + + IFXA (X_ALL, cpuext.lbt) SPILL_EFLAGS(); + if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR, s3, xZR); +} + // emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch void emit_ror16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4) { diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index 0d713ddfb..dd5053e8e 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -1248,6 +1248,7 @@ #define emit_shrd16c STEPNAME(emit_shrd16c) #define emit_shrd16 STEPNAME(emit_shrd16) #define emit_shld16 STEPNAME(emit_shld16) +#define emit_rcr32c STEPNAME(emit_rcr32c) #define emit_pf STEPNAME(emit_pf) @@ -1404,6 +1405,7 @@ void emit_shrd16c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin void emit_shld16c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5); void emit_shld16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); void emit_shrd16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6); +void emit_rcr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5); void emit_pf(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);