From 4f59819c26c72f86baa98a2805d411cd5b665759 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 16 Jan 2026 17:46:34 +0100 Subject: [PATCH 1/2] Regenerate intrinsics --- src/intrinsic/archs.rs | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/intrinsic/archs.rs b/src/intrinsic/archs.rs index 89a6cf7f3d6..3c1698df6de 100644 --- a/src/intrinsic/archs.rs +++ b/src/intrinsic/archs.rs @@ -24,6 +24,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "gcsss" => "__builtin_arm_gcsss", "isb" => "__builtin_arm_isb", "prefetch" => "__builtin_arm_prefetch", + "range.prefetch" => "__builtin_arm_range_prefetch", "sme.in.streaming.mode" => "__builtin_arm_in_streaming_mode", "sve.aesd" => "__builtin_sve_svaesd_u8", "sve.aese" => "__builtin_sve_svaese_u8", @@ -414,6 +415,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "s.wait.event.export.ready" => "__builtin_amdgcn_s_wait_event_export_ready", "s.wait.tensorcnt" => "__builtin_amdgcn_s_wait_tensorcnt", "s.waitcnt" => "__builtin_amdgcn_s_waitcnt", + "s.wakeup.barrier" => "__builtin_amdgcn_s_wakeup_barrier", "sad.hi.u8" => "__builtin_amdgcn_sad_hi_u8", "sad.u16" => "__builtin_amdgcn_sad_u16", "sad.u8" => "__builtin_amdgcn_sad_u8", @@ -4836,19 +4838,24 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "add.rm.d" => "__nvvm_add_rm_d", "add.rm.f" => "__nvvm_add_rm_f", "add.rm.ftz.f" => "__nvvm_add_rm_ftz_f", + "add.rm.ftz.sat.f" => "__nvvm_add_rm_ftz_sat_f", + "add.rm.sat.f" => "__nvvm_add_rm_sat_f", "add.rn.d" => "__nvvm_add_rn_d", "add.rn.f" => "__nvvm_add_rn_f", "add.rn.ftz.f" => "__nvvm_add_rn_ftz_f", + "add.rn.ftz.sat.f" => "__nvvm_add_rn_ftz_sat_f", + "add.rn.sat.f" => "__nvvm_add_rn_sat_f", "add.rp.d" => "__nvvm_add_rp_d", "add.rp.f" => "__nvvm_add_rp_f", "add.rp.ftz.f" => "__nvvm_add_rp_ftz_f", + "add.rp.ftz.sat.f" => "__nvvm_add_rp_ftz_sat_f", + "add.rp.sat.f" => "__nvvm_add_rp_sat_f", "add.rz.d" => "__nvvm_add_rz_d", "add.rz.f" => "__nvvm_add_rz_f", "add.rz.ftz.f" => "__nvvm_add_rz_ftz_f", + "add.rz.ftz.sat.f" => "__nvvm_add_rz_ftz_sat_f", + "add.rz.sat.f" => "__nvvm_add_rz_sat_f", "bar.warp.sync" => "__nvvm_bar_warp_sync", - "barrier0.and" => "__nvvm_bar0_and", - "barrier0.or" => "__nvvm_bar0_or", - "barrier0.popc" => "__nvvm_bar0_popc", "bf16x2.to.ue8m0x2.rp" => "__nvvm_bf16x2_to_ue8m0x2_rp", "bf16x2.to.ue8m0x2.rp.satfinite" => "__nvvm_bf16x2_to_ue8m0x2_rp_satfinite", "bf16x2.to.ue8m0x2.rz" => "__nvvm_bf16x2_to_ue8m0x2_rz", @@ -5050,6 +5057,8 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "fma.rm.d" => "__nvvm_fma_rm_d", "fma.rm.f" => "__nvvm_fma_rm_f", "fma.rm.ftz.f" => "__nvvm_fma_rm_ftz_f", + "fma.rm.ftz.sat.f" => "__nvvm_fma_rm_ftz_sat_f", + "fma.rm.sat.f" => "__nvvm_fma_rm_sat_f", "fma.rn.bf16" => "__nvvm_fma_rn_bf16", "fma.rn.bf16x2" => "__nvvm_fma_rn_bf16x2", "fma.rn.d" => "__nvvm_fma_rn_d", @@ -5061,16 +5070,22 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "fma.rn.ftz.relu.bf16x2" => "__nvvm_fma_rn_ftz_relu_bf16x2", "fma.rn.ftz.sat.bf16" => "__nvvm_fma_rn_ftz_sat_bf16", "fma.rn.ftz.sat.bf16x2" => "__nvvm_fma_rn_ftz_sat_bf16x2", + "fma.rn.ftz.sat.f" => "__nvvm_fma_rn_ftz_sat_f", "fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16", "fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2", "fma.rn.sat.bf16" => "__nvvm_fma_rn_sat_bf16", "fma.rn.sat.bf16x2" => "__nvvm_fma_rn_sat_bf16x2", + "fma.rn.sat.f" => "__nvvm_fma_rn_sat_f", "fma.rp.d" => "__nvvm_fma_rp_d", "fma.rp.f" => "__nvvm_fma_rp_f", "fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f", + "fma.rp.ftz.sat.f" => "__nvvm_fma_rp_ftz_sat_f", + "fma.rp.sat.f" => "__nvvm_fma_rp_sat_f", "fma.rz.d" => "__nvvm_fma_rz_d", "fma.rz.f" => "__nvvm_fma_rz_f", "fma.rz.ftz.f" => "__nvvm_fma_rz_ftz_f", + "fma.rz.ftz.sat.f" => "__nvvm_fma_rz_ftz_sat_f", + "fma.rz.sat.f" => "__nvvm_fma_rz_sat_f", "fmax.bf16" => "__nvvm_fmax_bf16", "fmax.bf16x2" => "__nvvm_fmax_bf16x2", "fmax.d" => "__nvvm_fmax_d", @@ -5274,6 +5289,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "read.ptx.sreg.pm1" => "__nvvm_read_ptx_sreg_pm1", "read.ptx.sreg.pm2" => "__nvvm_read_ptx_sreg_pm2", "read.ptx.sreg.pm3" => "__nvvm_read_ptx_sreg_pm3", + "read.ptx.sreg.pm4" => "__nvvm_read_ptx_sreg_pm4", "read.ptx.sreg.smid" => "__nvvm_read_ptx_sreg_smid", "read.ptx.sreg.tid.w" => "__nvvm_read_ptx_sreg_tid_w", "read.ptx.sreg.tid.x" => "__nvvm_read_ptx_sreg_tid_x", @@ -6370,6 +6386,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { fn spv(name: &str, full_name: &str) -> &'static str { match name { // spv + "group.memory.barrier.with.group.sync" => "__builtin_spirv_group_barrier", "num.subgroups" => "__builtin_spirv_num_subgroups", "subgroup.id" => "__builtin_spirv_subgroup_id", "subgroup.local.invocation.id" => { @@ -6377,6 +6394,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { } "subgroup.max.size" => "__builtin_spirv_subgroup_max_size", "subgroup.size" => "__builtin_spirv_subgroup_size", + "wave.ballot" => "__builtin_spirv_subgroup_ballot", _ => unimplemented!("***** unsupported LLVM intrinsic {full_name}"), } } @@ -7711,8 +7729,6 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "avx.ptestnzc.256" => "__builtin_ia32_ptestnzc256", "avx.ptestz.256" => "__builtin_ia32_ptestz256", "avx.rcp.ps.256" => "__builtin_ia32_rcpps256", - "avx.round.pd.256" => "__builtin_ia32_roundpd256", - "avx.round.ps.256" => "__builtin_ia32_roundps256", "avx.rsqrt.ps.256" => "__builtin_ia32_rsqrtps256", "avx.vpermilvar.pd" => "__builtin_ia32_vpermilvarpd", "avx.vpermilvar.pd.256" => "__builtin_ia32_vpermilvarpd256", @@ -8829,10 +8845,6 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "sse41.ptestc" => "__builtin_ia32_ptestc128", "sse41.ptestnzc" => "__builtin_ia32_ptestnzc128", "sse41.ptestz" => "__builtin_ia32_ptestz128", - "sse41.round.pd" => "__builtin_ia32_roundpd", - "sse41.round.ps" => "__builtin_ia32_roundps", - "sse41.round.sd" => "__builtin_ia32_roundsd", - "sse41.round.ss" => "__builtin_ia32_roundss", "sse42.crc32.32.16" => "__builtin_ia32_crc32hi", "sse42.crc32.32.32" => "__builtin_ia32_crc32si", "sse42.crc32.32.8" => "__builtin_ia32_crc32qi", @@ -8869,10 +8881,6 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "ssse3.psign.w.128" => "__builtin_ia32_psignw128", "sttilecfg" => "__builtin_ia32_tile_storeconfig", "stui" => "__builtin_ia32_stui", - "t2rpntlvwz0rs" => "__builtin_ia32_t2rpntlvwz0rs", - "t2rpntlvwz0rst1" => "__builtin_ia32_t2rpntlvwz0rst1", - "t2rpntlvwz1rs" => "__builtin_ia32_t2rpntlvwz1rs", - "t2rpntlvwz1rst1" => "__builtin_ia32_t2rpntlvwz1rst1", "tbm.bextri.u32" => "__builtin_ia32_bextri_u32", "tbm.bextri.u64" => "__builtin_ia32_bextri_u64", "tcmmimfp16ps" => "__builtin_ia32_tcmmimfp16ps", @@ -8881,14 +8889,19 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "tcmmrlfp16ps.internal" => "__builtin_ia32_tcmmrlfp16ps_internal", "tcvtrowd2ps" => "__builtin_ia32_tcvtrowd2ps", "tcvtrowd2ps.internal" => "__builtin_ia32_tcvtrowd2ps_internal", + "tcvtrowd2psi" => "__builtin_ia32_tcvtrowd2psi", "tcvtrowps2bf16h" => "__builtin_ia32_tcvtrowps2bf16h", "tcvtrowps2bf16h.internal" => "__builtin_ia32_tcvtrowps2bf16h_internal", + "tcvtrowps2bf16hi" => "__builtin_ia32_tcvtrowps2bf16hi", "tcvtrowps2bf16l" => "__builtin_ia32_tcvtrowps2bf16l", "tcvtrowps2bf16l.internal" => "__builtin_ia32_tcvtrowps2bf16l_internal", + "tcvtrowps2bf16li" => "__builtin_ia32_tcvtrowps2bf16li", "tcvtrowps2phh" => "__builtin_ia32_tcvtrowps2phh", "tcvtrowps2phh.internal" => "__builtin_ia32_tcvtrowps2phh_internal", + "tcvtrowps2phhi" => "__builtin_ia32_tcvtrowps2phhi", "tcvtrowps2phl" => "__builtin_ia32_tcvtrowps2phl", "tcvtrowps2phl.internal" => "__builtin_ia32_tcvtrowps2phl_internal", + "tcvtrowps2phli" => "__builtin_ia32_tcvtrowps2phli", "tdpbf16ps" => "__builtin_ia32_tdpbf16ps", "tdpbf16ps.internal" => "__builtin_ia32_tdpbf16ps_internal", "tdpbf8ps" => "__builtin_ia32_tdpbf8ps", @@ -8920,6 +8933,7 @@ fn map_arch_intrinsic(full_name: &str) -> &'static str { "tileloaddt164.internal" => "__builtin_ia32_tileloaddt164_internal", "tilemovrow" => "__builtin_ia32_tilemovrow", "tilemovrow.internal" => "__builtin_ia32_tilemovrow_internal", + "tilemovrowi" => "__builtin_ia32_tilemovrowi", "tilerelease" => "__builtin_ia32_tilerelease", "tilestored64" => "__builtin_ia32_tilestored64", "tilestored64.internal" => "__builtin_ia32_tilestored64_internal", From 2177aa9ac736b0c16c8ad73230508fcb273b980e Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 16 Jan 2026 18:09:37 +0100 Subject: [PATCH 2/2] Manually include intrinsic conversion that is not present in LLVM files --- src/intrinsic/old_archs.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/intrinsic/old_archs.rs b/src/intrinsic/old_archs.rs index 3a59707b2eb..8d3e3487b5c 100644 --- a/src/intrinsic/old_archs.rs +++ b/src/intrinsic/old_archs.rs @@ -242,6 +242,8 @@ pub(crate) fn old_archs(arch: &str, name: &str) -> ArchCheckResult { "avx.cvt.ps2.pd.256" => "__builtin_ia32_cvtps2pd256", "avx.cvtdq2.pd.256" => "__builtin_ia32_cvtdq2pd256", "avx.cvtdq2.ps.256" => "__builtin_ia32_cvtdq2ps256", + "avx.round.pd.256" => "__builtin_ia32_roundpd256", + "avx.round.ps.256" => "__builtin_ia32_roundps256", "avx.sqrt.pd.256" => "__builtin_ia32_sqrtpd256", "avx.sqrt.ps.256" => "__builtin_ia32_sqrtps256", "avx.storeu.dq.256" => "__builtin_ia32_storedqu256", @@ -1352,6 +1354,10 @@ pub(crate) fn old_archs(arch: &str, name: &str) -> ArchCheckResult { "sse41.pmovzxwd" => "__builtin_ia32_pmovzxwd128", "sse41.pmovzxwq" => "__builtin_ia32_pmovzxwq128", "sse41.pmuldq" => "__builtin_ia32_pmuldq128", + "sse41.round.pd" => "__builtin_ia32_roundpd", + "sse41.round.ps" => "__builtin_ia32_roundps", + "sse41.round.sd" => "__builtin_ia32_roundsd", + "sse41.round.ss" => "__builtin_ia32_roundss", "sse4a.movnt.sd" => "__builtin_ia32_movntsd", "sse4a.movnt.ss" => "__builtin_ia32_movntss", "ssse3.pabs.b.128" => "__builtin_ia32_pabsb128",