File tree Expand file tree Collapse file tree 2 files changed +12
-6
lines changed
Expand file tree Collapse file tree 2 files changed +12
-6
lines changed Original file line number Diff line number Diff line change @@ -263,11 +263,11 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
263263 internal_assert (fence_type_ptr) << " gpu_thread_barrier() parameter is not a constant integer.\n " ;
264264
265265 llvm::Function *barrier;
266- if ((barrier = module ->getFunction (" llvm.nvvm.barrier.cta.sync.aligned.all" ))) {
267- // LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615
266+ if ((barrier = module ->getFunction (" llvm.nvvm.barrier.cta.sync.aligned.all" )) && barrier-> getIntrinsicID () != 0 ) {
267+ // LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615
268268 builder->CreateCall (barrier, builder->getInt32 (0 ));
269- } else if ((barrier = module ->getFunction (" llvm.nvvm.barrier0" ))) {
270- // LLVM 19 : Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19
269+ } else if ((barrier = module ->getFunction (" llvm.nvvm.barrier0" )) && barrier-> getIntrinsicID () != 0 ) {
270+ // LLVM 21.1.5 and below : Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20
271271 builder->CreateCall (barrier);
272272 } else {
273273 internal_error << " Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n " ;
Original file line number Diff line number Diff line change 1- declare void @llvm.nvvm.barrier0 ()
2- declare void @llvm.nvvm.barrier.cta.sync.aligned.all (i32 )
1+ ; The two forward declared intrinsics below refer to the same thing.
2+ ; LLVM 20.1.6 introduced a new naming scheme for these intrinsics
3+ ; We have to declare both, such that we can access them from the Module's
4+ ; getFunction(), but one of those will map to an intrinsic, which we
5+ ; will use to determine which intrinsic is supported by LLVM.
6+ declare void @llvm.nvvm.barrier0 () ; LLVM <=20.1.5
7+ declare void @llvm.nvvm.barrier.cta.sync.aligned.all (i32 ) ; LLVM >=20.1.6
8+
39declare i32 @llvm.nvvm.read.ptx.sreg.tid.x ()
410declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x ()
511declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x ()
You can’t perform that action at this time.
0 commit comments