Skip to content

Commit 261840a

Browse files
committed
Filter out maxclusterrank for < SM_90
1 parent 3c17966 commit 261840a

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,8 +571,12 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
571571
if (getMaxNReg(F, Maxnreg))
572572
O << ".maxnreg " << Maxnreg << "\n";
573573

574+
// .maxclusterrank directive requires SM_90 or higher, make sure that we
575+
// filter it out for lower SM versions, as it causes a hard ptxas crash.
576+
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
577+
const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
574578
unsigned Maxclusterrank = 0;
575-
if (getMaxClusterRank(F, Maxclusterrank))
579+
if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90)
576580
O << ".maxclusterrank " << Maxclusterrank << "\n";
577581
}
578582

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_90
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK,CHECK_SM_80
3+
4+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5+
target triple = "nvptx64-unknown-unknown"
6+
7+
; CHECK: .maxntid 128, 1, 1
8+
; CHECK: .minnctapersm 2
9+
; CHECK_SM_90: .maxclusterrank 8
10+
; CHECK_SM_80-NOT: .maxclusterrank 8
11+
12+
; Make sure that for SM version prior to 90 `.maxclusterrank` directive is
13+
; sielently ignored.
14+
define dso_local void @_Z18TestMaxClusterRankv() {
15+
entry:
16+
%a = alloca i32, align 4
17+
store volatile i32 1, ptr %a, align 4
18+
ret void
19+
}
20+
21+
!nvvm.annotations = !{!0, !1, !2, !3}
22+
23+
!0 = !{ptr @_Z18TestMaxClusterRankv, !"kernel", i32 1}
24+
!1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128}
25+
!2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2}
26+
!3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8}

0 commit comments

Comments
 (0)