Skip to content

Commit efe87fb

Browse files
authored
AMDGPU: Improve vector of pointer handling in amdgpu-promote-alloca (#114144)
1 parent 57ab62a commit efe87fb

File tree

2 files changed

+240
-46
lines changed

2 files changed

+240
-46
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,9 +1115,10 @@ bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
11151115
if (Val == OtherOp)
11161116
OtherOp = Inst->getOperand(OpIdx1);
11171117

1118-
if (isa<ConstantPointerNull>(OtherOp))
1118+
if (isa<ConstantPointerNull, ConstantAggregateZero>(OtherOp))
11191119
return true;
11201120

1121+
// TODO: getUnderlyingObject will not work on a vector getelementptr
11211122
Value *OtherObj = getUnderlyingObject(OtherOp);
11221123
if (!isa<AllocaInst>(OtherObj))
11231124
return false;
@@ -1195,36 +1196,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
11951196
continue;
11961197
}
11971198

1198-
// TODO: If we know the address is only observed through flat pointers, we
1199-
// could still promote.
1200-
if (UseInst->getOpcode() == Instruction::AddrSpaceCast)
1201-
return false;
1202-
1203-
// Do not promote vector/aggregate type instructions. It is hard to track
1204-
// their users.
1205-
if (isa<InsertValueInst>(User) || isa<InsertElementInst>(User))
1206-
return false;
1207-
1208-
// TODO: Handle vectors of pointers.
1209-
if (!User->getType()->isPointerTy())
1210-
return false;
1211-
12121199
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
12131200
// Be conservative if an address could be computed outside the bounds of
12141201
// the alloca.
12151202
if (!GEP->isInBounds())
12161203
return false;
1217-
}
1218-
1219-
// Only promote a select if we know that the other select operand is from
1220-
// another pointer that will also be promoted.
1221-
if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1204+
} else if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1205+
// Only promote a select if we know that the other select operand is from
1206+
// another pointer that will also be promoted.
12221207
if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
12231208
return false;
1224-
}
1209+
} else if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1210+
// Repeat for phis.
12251211

1226-
// Repeat for phis.
1227-
if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
12281212
// TODO: Handle more complex cases. We should be able to replace loops
12291213
// over arrays.
12301214
switch (Phi->getNumIncomingValues()) {
@@ -1237,6 +1221,15 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
12371221
default:
12381222
return false;
12391223
}
1224+
} else if (!isa<ExtractElementInst>(User)) {
1225+
// Do not promote vector/aggregate type instructions. It is hard to track
1226+
// their users.
1227+
1228+
// Do not promote addrspacecast.
1229+
//
1230+
// TODO: If we know the address is only observed through flat pointers, we
1231+
// could still promote.
1232+
return false;
12401233
}
12411234

12421235
WorkList.push_back(User);
@@ -1490,17 +1483,21 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
14901483

14911484
SmallVector<IntrinsicInst *> DeferredIntrs;
14921485

1486+
PointerType *NewPtrTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
1487+
14931488
for (Value *V : WorkList) {
14941489
CallInst *Call = dyn_cast<CallInst>(V);
14951490
if (!Call) {
14961491
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1497-
PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
1492+
Value *LHS = CI->getOperand(0);
1493+
Value *RHS = CI->getOperand(1);
14981494

1499-
if (isa<ConstantPointerNull>(CI->getOperand(0)))
1500-
CI->setOperand(0, ConstantPointerNull::get(NewTy));
1495+
Type *NewTy = LHS->getType()->getWithNewType(NewPtrTy);
1496+
if (isa<ConstantPointerNull, ConstantAggregateZero>(LHS))
1497+
CI->setOperand(0, Constant::getNullValue(NewTy));
15011498

1502-
if (isa<ConstantPointerNull>(CI->getOperand(1)))
1503-
CI->setOperand(1, ConstantPointerNull::get(NewTy));
1499+
if (isa<ConstantPointerNull, ConstantAggregateZero>(RHS))
1500+
CI->setOperand(1, Constant::getNullValue(NewTy));
15041501

15051502
continue;
15061503
}
@@ -1510,25 +1507,23 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
15101507
if (isa<AddrSpaceCastInst>(V))
15111508
continue;
15121509

1513-
PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
1514-
1515-
assert(isa<PointerType>(V->getType()));
1510+
assert(V->getType()->isPtrOrPtrVectorTy());
15161511

1517-
// FIXME: It doesn't really make sense to try to do this for all
1518-
// instructions.
1512+
Type *NewTy = V->getType()->getWithNewType(NewPtrTy);
15191513
V->mutateType(NewTy);
15201514

15211515
// Adjust the types of any constant operands.
15221516
if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
1523-
if (isa<ConstantPointerNull>(SI->getOperand(1)))
1524-
SI->setOperand(1, ConstantPointerNull::get(NewTy));
1517+
if (isa<ConstantPointerNull, ConstantAggregateZero>(SI->getOperand(1)))
1518+
SI->setOperand(1, Constant::getNullValue(NewTy));
15251519

1526-
if (isa<ConstantPointerNull>(SI->getOperand(2)))
1527-
SI->setOperand(2, ConstantPointerNull::get(NewTy));
1520+
if (isa<ConstantPointerNull, ConstantAggregateZero>(SI->getOperand(2)))
1521+
SI->setOperand(2, Constant::getNullValue(NewTy));
15281522
} else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
15291523
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
1530-
if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
1531-
Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
1524+
if (isa<ConstantPointerNull, ConstantAggregateZero>(
1525+
Phi->getIncomingValue(I)))
1526+
Phi->setIncomingValue(I, Constant::getNullValue(NewTy));
15321527
}
15331528
}
15341529

0 commit comments

Comments
 (0)