Skip to content

Commit 3daccd7

Browse files
committed
Use getIntrinsicID to test which one is actually the working one.
1 parent 9499444 commit 3daccd7

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

src/CodeGen_PTX_Dev.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,11 @@ void CodeGen_PTX_Dev::visit(const Call *op) {
263263
internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n";
264264

265265
llvm::Function *barrier;
266-
if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all"))) {
267-
// LLVM 20 and above: https://github.com/llvm/llvm-project/pull/140615
266+
if ((barrier = module->getFunction("llvm.nvvm.barrier.cta.sync.aligned.all")) && barrier->getIntrinsicID() != 0) {
267+
// LLVM 20.1.6 and above: https://github.com/llvm/llvm-project/pull/140615
268268
builder->CreateCall(barrier, builder->getInt32(0));
269-
} else if ((barrier = module->getFunction("llvm.nvvm.barrier0"))) {
270-
// LLVM 19: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 19
269+
} else if ((barrier = module->getFunction("llvm.nvvm.barrier0")) && barrier->getIntrinsicID() != 0) {
270+
// LLVM 21.1.5 and below: Testing for llvm.nvvm.barrier0 can be removed once we drop support for LLVM 20
271271
builder->CreateCall(barrier);
272272
} else {
273273
internal_error << "Could not find PTX barrier intrinsic llvm.nvvm.barrier0 nor llvm.nvvm.barrier.cta.sync.aligned.all\n";

src/runtime/ptx_dev.ll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
declare void @llvm.nvvm.barrier0()
2-
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32)
1+
; The two forward declared intrinsics below refer to the same thing.
2+
; LLVM 20.1.6 introduced a new naming scheme for these intrinsics
3+
; We have to declare both, such that we can access them from the Module's
4+
; getFunction(), but one of those will map to an intrinsic, which we
5+
; will use to determine which intrinsic is supported by LLVM.
6+
declare void @llvm.nvvm.barrier0() ; LLVM <=20.1.5
7+
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) ; LLVM >=20.1.6
8+
39
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
410
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
511
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

0 commit comments

Comments
 (0)