Skip to content

Commit 2e58d92

Browse files
authored
[NVPTX] Check Before inserting AddrSpaceCastInst in NVPTXLoweringAlloca (#106127)
If `allocaInst` is already in `ADDRESS_SPACE_LOCAL`, there is no need to do an explicit cast which will actually fail assertion with `AddrSpaceCastInst`. Only insert the cast when needed.
1 parent 08d294d commit 2e58d92

File tree

2 files changed

+58
-14
lines changed

2 files changed

+58
-14
lines changed

llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
//
2525
//===----------------------------------------------------------------------===//
2626

27+
#include "MCTargetDesc/NVPTXBaseInfo.h"
2728
#include "NVPTX.h"
2829
#include "NVPTXUtilities.h"
29-
#include "MCTargetDesc/NVPTXBaseInfo.h"
3030
#include "llvm/IR/Function.h"
3131
#include "llvm/IR/Instructions.h"
3232
#include "llvm/IR/IntrinsicInst.h"
@@ -55,8 +55,8 @@ class NVPTXLowerAlloca : public FunctionPass {
5555

5656
char NVPTXLowerAlloca::ID = 1;
5757

58-
INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
59-
"Lower Alloca", false, false)
58+
INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
59+
false)
6060

6161
// =============================================================================
6262
// Main function for this pass.
@@ -70,14 +70,38 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
7070
for (auto &I : BB) {
7171
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
7272
Changed = true;
73+
74+
PointerType *AllocInstPtrTy =
75+
cast<PointerType>(allocaInst->getType()->getScalarType());
76+
unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
77+
assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
78+
AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
79+
"AllocaInst can only be in Generic or Local address space for "
80+
"NVPTX.");
81+
82+
Instruction *AllocaInLocalAS = allocaInst;
7383
auto ETy = allocaInst->getAllocatedType();
74-
auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
75-
auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
76-
auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
77-
auto NewASCToGeneric =
78-
new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
79-
NewASCToLocal->insertAfter(allocaInst);
80-
NewASCToGeneric->insertAfter(NewASCToLocal);
84+
85+
// We need to make sure that LLVM has info that alloca needs to go to
86+
// ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
87+
//
88+
// For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
89+
// ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
90+
// the alloca's users still use a generic pointer to operate on.
91+
//
92+
// For allocas already in ADDRESS_SPACE_LOCAL, we just need
93+
// addrspacecast to ADDRESS_SPACE_GENERIC.
94+
if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
95+
auto ASCastToLocalAS = new AddrSpaceCastInst(
96+
allocaInst, PointerType::get(ETy, ADDRESS_SPACE_LOCAL), "");
97+
ASCastToLocalAS->insertAfter(allocaInst);
98+
AllocaInLocalAS = ASCastToLocalAS;
99+
}
100+
101+
auto AllocaInGenericAS = new AddrSpaceCastInst(
102+
AllocaInLocalAS, PointerType::get(ETy, ADDRESS_SPACE_GENERIC), "");
103+
AllocaInGenericAS->insertAfter(AllocaInLocalAS);
104+
81105
for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
82106
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
83107
// use the converted generic address, in order to expose non-generic
@@ -87,23 +111,23 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
87111
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
88112
if (LI && LI->getPointerOperand() == allocaInst &&
89113
!LI->isVolatile()) {
90-
LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
114+
LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
91115
continue;
92116
}
93117
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
94118
if (SI && SI->getPointerOperand() == allocaInst &&
95119
!SI->isVolatile()) {
96-
SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
120+
SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
97121
continue;
98122
}
99123
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
100124
if (GI && GI->getPointerOperand() == allocaInst) {
101-
GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
125+
GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
102126
continue;
103127
}
104128
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
105129
if (BI && BI->getOperand(0) == allocaInst) {
106-
BI->setOperand(0, NewASCToGeneric);
130+
BI->setOperand(0, AllocaInGenericAS);
107131
continue;
108132
}
109133
}

llvm/test/CodeGen/NVPTX/lower-alloca.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -S -nvptx-lower-alloca -infer-address-spaces | FileCheck %s
2+
; RUN: opt < %s -S -nvptx-lower-alloca | FileCheck %s --check-prefix LOWERALLOCAONLY
23
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
34
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
45

@@ -11,13 +12,32 @@ define void @kernel() {
1112
%A = alloca i32
1213
; CHECK: addrspacecast ptr %A to ptr addrspace(5)
1314
; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
15+
; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
16+
; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
17+
; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
1418
; PTX: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}}
1519
store i32 0, ptr %A
1620
call void @callee(ptr %A)
1721
ret void
1822
}
1923

24+
define void @alloca_in_explicit_local_as() {
25+
; LABEL: @lower_alloca_addrspace5
26+
; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
27+
%A = alloca i32, addrspace(5)
28+
; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
29+
; PTX: st.local.u32 [%SP+0], {{%r[0-9]+}}
30+
; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
31+
; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
32+
store i32 0, ptr addrspace(5) %A
33+
call void @callee(ptr addrspace(5) %A)
34+
ret void
35+
}
36+
2037
declare void @callee(ptr)
38+
declare void @callee_addrspace5(ptr addrspace(5))
2139

2240
!nvvm.annotations = !{!0}
41+
!nvvm.annotations = !{!1}
2342
!0 = !{ptr @kernel, !"kernel", i32 1}
43+
!1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1}

0 commit comments

Comments
 (0)