Skip to content
23 changes: 17 additions & 6 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2474,7 +2474,9 @@

;; Helper for emitting `MInst.FpuCSel16` / `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; instructions.
(decl fpu_csel (Type Cond Reg Reg) ConsumesFlags)
;;
;; Recursion: may recurse once to downgrade from F16 to F32 when FP16 is not enabled.
(decl rec fpu_csel (Type Cond Reg Reg) ConsumesFlags)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In each case where we have a rec, could we have a comment describing why it's bounded?

In particular I'm thinking these should be modeled after something like Rust safety comments -- so e.g. // Recursion: may recurse once to reuse implementation for F32 in the case of F16 or // Recursion: bounded by explicit depth parameter 'depth' or // Recursion: each iteration of count_the_bits_in_isle removes one bit from value and completes when zero or something like that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added ; Recursion: ... comments for all (decl rec ...) terms.

(rule (fpu_csel $F16 cond if_true if_false)
(fpu_csel $F32 cond if_true if_false))

Expand Down Expand Up @@ -2524,9 +2526,11 @@
dst))

;; Helper for emitting `MInst.MovToFpu` instructions.
;;
;; Recursion: may recurse once to downgrade from F16 to F32 when FP16 is not enabled.
(spec (mov_to_fpu x s)
(provide (= result (zero_ext 64 (conv_to s x)))))
(decl mov_to_fpu (Reg ScalarSize) Reg)
(decl rec mov_to_fpu (Reg ScalarSize) Reg)
(rule (mov_to_fpu x size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.MovToFpu dst x size))))
Expand Down Expand Up @@ -4017,7 +4021,9 @@
;; Note that we must make sure that all bits outside the lowest 16 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f16 (u16) Reg)
;;
;; Recursion: forms cycle with `constant_f32`. Invokes 32-bit case when FP16 is not supported.
(decl rec constant_f16 (u16) Reg)
(rule 3 (constant_f16 n)
(if-let false (use_fp16))
(constant_f32 n))
Expand All @@ -4036,7 +4042,9 @@
;; Note that we must make sure that all bits outside the lowest 32 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f32 (u32) Reg)
;;
;; Recursion: forms cycle with `constant_f16`. Invokes 16-bit case when FP16 is supported.
(decl rec constant_f32 (u32) Reg)
(rule 3 (constant_f32 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
false
Expand Down Expand Up @@ -4099,7 +4107,9 @@
;;
;; The 64-bit input here only uses the low bits for the lane size in
;; `VectorSize` and all other bits are ignored.
(decl splat_const (u64 VectorSize) Reg)
;;
;; Recursion: bounded since the recursive call always reduces lane size.
(decl rec splat_const (u64 VectorSize) Reg)

;; If the splat'd constant can itself be reduced in size then attempt to do so
;; as it will make it easier to create the immediates in the instructions below.
Expand Down Expand Up @@ -4956,7 +4966,8 @@
(MInst.CSel dst (Cond.Eq) tmp1 tmp2)
(value_reg dst))))

(decl lower_bmask (Type Type ValueRegs) ValueRegs)
; Recursion: bounded since recursive calls reduce type width (128-bit to 64-bit).
(decl rec lower_bmask (Type Type ValueRegs) ValueRegs)


;; For conversions that exactly fit a register, we can use csetm.
Expand Down
21 changes: 16 additions & 5 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
;; needs to handle situations such as when the `Value` is 64-bits an explicit
;; comparison must be made. Additionally if `Value` is smaller than 32-bits
;; then it must be sign-extended up to at least 32 bits.
(decl lower_cond (Value) Cond)
;;
;; Recursion: peeling away `uextend` operations must be bounded since each
;; extend must be on a strictly smaller type.
(decl rec lower_cond (Value) Cond)
(rule 0 (lower_cond val @ (value_type (fits_in_32 _))) (Cond.If32 (zext32 val)))
(rule 1 (lower_cond val @ (value_type $I64))
(Cond.IfXneq64I32 val 0))
Expand Down Expand Up @@ -737,7 +740,9 @@
(rule (lower (icmp cc a b @ (value_type (ty_int ty))))
(lower_icmp ty cc a b))

(decl lower_icmp (Type IntCC Value Value) XReg)
; Recursion: bounded since only recursive rules swap condition code order from
; greater into less, which can only apply once.
(decl rec lower_icmp (Type IntCC Value Value) XReg)

(rule (lower_icmp $I64 (IntCC.Equal) a b)
(pulley_xeq64 a b))
Expand Down Expand Up @@ -846,7 +851,9 @@
(rule 1 (lower (icmp cc a @ (value_type (ty_vec128 ty)) b))
(lower_vcmp ty cc a b))

(decl lower_vcmp (Type IntCC Value Value) VReg)
; Recursion: bounded since only recursive rules swap condition code order from
; greater into less, which can only apply once.
(decl rec lower_vcmp (Type IntCC Value Value) VReg)
(rule (lower_vcmp $I8X16 (IntCC.Equal) a b) (pulley_veq8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.NotEqual) a b) (pulley_vneq8x16 a b))
(rule (lower_vcmp $I8X16 (IntCC.SignedLessThan) a b) (pulley_vslt8x16 a b))
Expand Down Expand Up @@ -890,7 +897,9 @@
(rule 1 (lower (fcmp cc a b @ (value_type (ty_vec128 ty))))
(lower_vfcmp ty cc a b))

(decl lower_fcmp (Type FloatCC Value Value) XReg)
; Recursion: bounded since recursive rules only implement certain condition
; codes in terms of a smaller canonical set, to which recursive rules don't apply.
(decl rec lower_fcmp (Type FloatCC Value Value) XReg)

(rule (lower_fcmp $F32 (FloatCC.Equal) a b) (pulley_feq32 a b))
(rule (lower_fcmp $F64 (FloatCC.Equal) a b) (pulley_feq64 a b))
Expand Down Expand Up @@ -921,7 +930,9 @@
(if-let true (floatcc_unordered cc))
(pulley_xbxor32_s8 (lower_fcmp ty (floatcc_complement cc) a b) 1))

(decl lower_vfcmp (Type FloatCC Value Value) VReg)
; Recursion: bounded since recursive rules only implement certain condition
; codes in terms of a smaller canonical set, to which recursive rules don't apply.
(decl rec lower_vfcmp (Type FloatCC Value Value) VReg)

(rule (lower_vfcmp $F32X4 (FloatCC.Equal) a b) (pulley_veqf32x4 a b))
(rule (lower_vfcmp $F64X2 (FloatCC.Equal) a b) (pulley_veqf64x2 a b))
Expand Down
23 changes: 18 additions & 5 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1873,7 +1873,10 @@
;; Immediate Loading rules
;; TODO: Loading the zero reg directly causes a bunch of regalloc errors, we should look into it.
;; TODO: Load floats using `fld` instead of `ld`
(decl imm (Type u64) Reg)
;;
;; Recursion: bounded since either float cases are reduced to integers, or the
;; shift case reduces to a smaller constant.
(decl rec imm (Type u64) Reg)

;; Special-case 0.0 for floats to use the `(zero_reg)` directly.
;; See #7162 for why this doesn't fall out of the rules below.
Expand Down Expand Up @@ -2470,7 +2473,10 @@
(rule 0 (load_op_reg_type _) $I64)

;; Helper constructor to build a load instruction.
(decl gen_load (AMode LoadOP MemFlags) Reg)
;;
;; Recursion: recursive rule can only match once, since it matches on
;; `LoadOP.Flh` and emits `LoadOP.Lh`.
(decl rec gen_load (AMode LoadOP MemFlags) Reg)
(rule (gen_load amode op flags)
(let ((dst WritableReg (temp_writable_reg (load_op_reg_type op)))
(_ Unit (emit (MInst.Load dst op flags amode))))
Expand Down Expand Up @@ -2661,7 +2667,9 @@
(decl gen_stack_addr (StackSlot Offset32) Reg)
(extern constructor gen_stack_addr gen_stack_addr)

(decl gen_select_xreg (IntegerCompare XReg XReg) XReg)
; Recursion: bounded by only matching when one of the inputs is a zero register,
; but not both.
(decl rec gen_select_xreg (IntegerCompare XReg XReg) XReg)

(rule 6 (gen_select_xreg (int_compare_decompose cc x y) x y)
(if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc))
Expand Down Expand Up @@ -2994,7 +3002,10 @@

;; Generates a bitcast instruction.
;; Args are: src, src_ty, dst_ty
(decl gen_bitcast (Reg Type Type) Reg)
;;
;; Recursion: only recursive rule matches on vec-to-float, and emits vec-to-int
;; and int-to-float bitcasts, so this can only recurse once.
(decl rec gen_bitcast (Reg Type Type) Reg)

(rule 9 (gen_bitcast r (ty_supported_float_size $F16) (ty_supported_vec _)) (if-let false (has_zvfh)) (rv_vfmv_sf r (vstate_from_type $F32)))
(rule 8 (gen_bitcast r (ty_supported_vec ty) (ty_supported_float_size $F16)) (if-let false (has_zvfh)) (gen_bitcast (gen_bitcast r ty $I16) $I16 $F16))
Expand Down Expand Up @@ -3214,7 +3225,9 @@
(convert FloatCompare IntegerCompare float_to_int_compare)

;; Compare two floating point numbers and return a zero/non-zero result.
(decl fcmp_to_float_compare (FloatCC Type FReg FReg) FloatCompare)
;;
;; Recursion: at most once to convert unordered comparisons into ordered comparisons.
(decl rec fcmp_to_float_compare (FloatCC Type FReg FReg) FloatCompare)

;; Direct codegen for unordered comparisons is not that efficient, so invert
;; the comparison to get an ordered comparison and generate that. Then invert
Expand Down
9 changes: 7 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,9 @@

;;;; Multi-Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_extractlane (Type VReg u8) Reg)
; Recursion: recursive rules reduce to the index zero case, which are handled
; with higher-priority rules.
(decl rec gen_extractlane (Type VReg u8) Reg)

;; When extracting lane 0 for floats, we can use `vfmv.f.s` directly.
(rule 3 (gen_extractlane (ty_vec_fits_in_register ty) src 0)
Expand Down Expand Up @@ -1731,7 +1733,10 @@


;; Builds a vector mask corresponding to the FloatCC operation.
(decl gen_fcmp_mask (Type FloatCC Value Value) VReg)
;;
;; Recursion: recursive rules implement some condition codes in terms of a
;; smaller set of primtives, which recursive rules would not apply to twice.
(decl rec gen_fcmp_mask (Type FloatCC Value Value) VReg)

;; FloatCC.Equal

Expand Down
14 changes: 10 additions & 4 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,9 @@

;; Constructs a sequence of instructions that reverse all bits in `x` up to
;; the given type width.
(decl gen_bitrev (Type XReg) XReg)
;;
;; Recursion: at most once to implement 16- and 32-bit cases in terms of 64-bit.
(decl rec gen_bitrev (Type XReg) XReg)

(rule 0 (gen_bitrev (ty_16_or_32 (ty_int ty)) x)
(if-let shift_amt (u64_to_imm12 (u64_wrapping_sub 64 (ty_bits ty))))
Expand All @@ -1069,7 +1071,9 @@

;; Builds a sequence of instructions that swaps the bytes in `x` up to the given
;; type width.
(decl gen_bswap (Type XReg) XReg)
;;
;; Recursion: bounded depth since each step halves the type width.
(decl rec gen_bswap (Type XReg) XReg)

;; This is only here to make the rule below work. bswap.i8 isn't valid
(rule 0 (gen_bswap $I8 x) x)
Expand Down Expand Up @@ -2263,7 +2267,8 @@
(rule 0 (lower (icmp cc x @ (value_type (fits_in_64 ty)) y))
(lower_icmp cc x y))

(decl lower_icmp (IntCC Value Value) XReg)
; Recursion: at most once to implement >= in terms of <.
(decl rec lower_icmp (IntCC Value Value) XReg)
(rule 0 (lower_icmp cc x y)
(lower_int_compare (icmp_to_int_compare cc x y)))

Expand Down Expand Up @@ -2352,7 +2357,8 @@
(rule 20 (lower (icmp cc x @ (value_type $I128) y))
(lower_icmp_i128 cc x y))

(decl lower_icmp_i128 (IntCC ValueRegs ValueRegs) XReg)
; Recursion: at most once to implement some conditions in terms of a smaller primitive set.
(decl rec lower_icmp_i128 (IntCC ValueRegs ValueRegs) XReg)
(rule 0 (lower_icmp_i128 (IntCC.Equal) x y)
(let ((lo XReg (rv_xor (value_regs_get x 0) (value_regs_get y 0)))
(hi XReg (rv_xor (value_regs_get x 1) (value_regs_get y 1))))
Expand Down
10 changes: 7 additions & 3 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2982,7 +2982,7 @@
;; Helpers for generating immediate values ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Allocate a temporary register, initialized with an immediate.
(decl imm (Type u64) Reg)
(decl rec imm (Type u64) Reg)

;; 16-bit (or smaller) result type, any value
(rule 7 (imm (fits_in_16 (ty_int ty)) n)
Expand Down Expand Up @@ -3084,7 +3084,9 @@
(vec_load ty (memarg_const (emit_u128_be_const n))))

;; Variant with replicated immediate.
(decl vec_imm_splat (Type u64) Reg)
;;
;; Recursion: bounded since recursive rules reduce number of lanes.
(decl rec vec_imm_splat (Type u64) Reg)
(rule 1 (vec_imm_splat (ty_vec128 ty) 0)
(vec_imm_byte_mask ty 0))
(rule 2 (vec_imm_splat ty @ (multi_lane 8 _) n)
Expand Down Expand Up @@ -3387,7 +3389,9 @@
(rule (lower_bool $I8 cond) (select_bool_imm $I8 cond 1 0))

;; Lower a boolean condition to the values -1/0.
(decl lower_bool_to_mask (Type ProducesBool) Reg)
;;
;; Recursion: at most once to reduce 128-bit to 64-bit case.
(decl rec lower_bool_to_mask (Type ProducesBool) Reg)
(rule 0 (lower_bool_to_mask (fits_in_64 ty) producer)
(select_bool_imm ty producer -1 0))

Expand Down
11 changes: 8 additions & 3 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1929,7 +1929,9 @@
;;
;; Note that if `Type` is less than 64-bits then the upper bits of the `imm`
;; argument will be set to zero and lost.
(decl imm (Type u64) Reg)
;;
;; Recursion: at most once to implement floats with integer bit patterns.
(decl rec imm (Type u64) Reg)

;; Base case: integers of up to at most 32-bits.
;;
Expand Down Expand Up @@ -3346,7 +3348,9 @@
(ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpCondOr cc1 cc2 taken not_taken)))

;; Conditional jump based on a `CondResult`
(decl jmp_cond_result (CondResult MachLabel MachLabel) SideEffectNoResult)
;;
;; Recursion: at most to convert `And` into `Or`.
(decl rec jmp_cond_result (CondResult MachLabel MachLabel) SideEffectNoResult)
(rule (jmp_cond_result (CondResult.CC producer cc) taken not_taken)
(with_flags_side_effect producer (jmp_cond cc taken not_taken)))
(rule (jmp_cond_result cond @ (CondResult.And _ _ _) taken not_taken)
Expand Down Expand Up @@ -3549,7 +3553,8 @@
(rule 5 (emit_cmp (IntCC.NotEqual) a (u64_from_iconst 0)) (is_nonzero a))
(rule 6 (emit_cmp (IntCC.NotEqual) (u64_from_iconst 0) a) (is_nonzero a))

(decl emit_cmp_i128 (CC Gpr Gpr Gpr Gpr) CondResult)
; Recursion: at most one to eliminate "or equal" cases.
(decl rec emit_cmp_i128 (CC Gpr Gpr Gpr Gpr) CondResult)
;; Eliminate cases which compare something "or equal" by swapping arguments.
(rule 2 (emit_cmp_i128 (CC.NLE) a_hi a_lo b_hi b_lo)
(emit_cmp_i128 (CC.L) b_hi b_lo a_hi a_lo))
Expand Down
22 changes: 16 additions & 6 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,9 @@

;; Get the address of the mask to use when fixing up the lanes that weren't
;; correctly generated by the 16x8 shift.
(decl ishl_i8x16_mask (RegMemImm) SyntheticAmode)
;;
;; Recursion: at most once to convert memory case into register case.
(decl rec ishl_i8x16_mask (RegMemImm) SyntheticAmode)

;; When the shift amount is known, we can statically (i.e. at compile time)
;; determine the mask to use and only emit that.
Expand Down Expand Up @@ -732,7 +734,9 @@

;; Get the address of the mask to use when fixing up the lanes that weren't
;; correctly generated by the 16x8 shift.
(decl ushr_i8x16_mask (RegMemImm) SyntheticAmode)
;;
;; Recursion: at most once to convert memory case into register case.
(decl rec ushr_i8x16_mask (RegMemImm) SyntheticAmode)

;; When the shift amount is known, we can statically (i.e. at compile time)
;; determine the mask to use and only emit that.
Expand Down Expand Up @@ -1422,7 +1426,8 @@

;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl lower_bmask (Type Type ValueRegs) ValueRegs)
; Recursion: reduces 128-bit cases to 64-bit.
(decl rec lower_bmask (Type Type ValueRegs) ValueRegs)

;; Values that fit in a register
;;
Expand Down Expand Up @@ -2178,7 +2183,8 @@

(rule (lower (select cond x y)) (lower_select (is_nonzero_cmp cond) x y))

(decl lower_select (CondResult Value Value) InstOutput)
; Recursion: at most once to swap the And case for an Or.
(decl rec lower_select (CondResult Value Value) InstOutput)
(rule 0 (lower_select cond a @ (value_type (ty_int (fits_in_64 ty))) b)
(lower_select_gpr ty cond a b))
(rule 1 (lower_select cond a @ (value_type (is_xmm_type ty)) b)
Expand Down Expand Up @@ -4276,7 +4282,9 @@

;; Emits either a `round{ss,sd,ps,pd}` instruction, as appropriate, or generates
;; the appropriate libcall and sequence to call that.
(decl x64_round (Type RegMem RoundImm) Xmm)
;;
;; Recursion: at most once to convert memory case into register case.
(decl rec x64_round (Type RegMem RoundImm) Xmm)
(rule 1 (x64_round $F32 a imm)
(if-let true (has_sse41))
(x64_roundss a imm))
Expand Down Expand Up @@ -4683,7 +4691,9 @@
;; performant thing in the world so this is primarily here for completeness
;; of lowerings on all x86 cpus but if rules are ideally gated on the presence
;; of SSSE3 to use the `pshufb` instruction itself.
(decl lower_pshufb (Xmm RegMem) Xmm)
;;
;; Recursion: at most once to implement the memory load case.
(decl rec lower_pshufb (Xmm RegMem) Xmm)
(rule 1 (lower_pshufb src mask)
(if-let true (has_ssse3))
(x64_pshufb src mask))
Expand Down
8 changes: 6 additions & 2 deletions cranelift/codegen/src/prelude_opt.isle
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@
;; so that `iconst.i8 255` will give you a `-1_i64`.
;; When constructing, the rule will fail if the value cannot be represented in
;; the target type. If it fits, it'll be masked accordingly in the constant.
(decl iconst_s (Type i64) Value)
;;
;; Recursion: may recurse at most once to reduce reduce 128-bit to 64-bit.
(decl rec iconst_s (Type i64) Value)
(extractor (iconst_s ty c) (inst_data_value_tupled (iconst_sextend_etor ty c)))
(rule 0 (iconst_s ty c)
(if-let c_masked (u64_and (i64_cast_unsigned c)
Expand All @@ -147,7 +149,9 @@
;; so that `iconst.i8 255` will give you a `255_u64`.
;; When constructing, the rule will fail if the value cannot be represented in
;; the target type.
(decl iconst_u (Type u64) Value)
;;
;; Recursion: may recurse at most once to reduce reduce 128-bit to 64-bit.
(decl rec iconst_u (Type u64) Value)
(extractor (iconst_u ty c) (iconst ty (u64_from_imm64 c)))
(rule 0 (iconst_u ty c)
(if-let true (u64_lt_eq c (ty_umax ty)))
Expand Down
Loading
Loading