Skip to content

Commit 79e06bf

Browse files
authored
[AMDGPU] Extended vector promotion to aggregate types. (#143784)
Extends the `amdgpu-promote-alloca-to-vector` pass to also promote aggregate types whose elements are all the same type to vector registers. The motivation for this extension was to account for IR generated by the frontend containing several singleton struct types containing vectors or vector-like elements, though the implementation is strictly more general.
1 parent 24bbc82 commit 79e06bf

File tree

2 files changed

+351
-41
lines changed

2 files changed

+351
-41
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 65 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,39 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818818
return I;
819819
}
820820

821+
/// Get the underlying type of a homogeneous aggregate type, or nullptr if the
822+
/// type is non-homogeneous.
823+
static Type *getHomogeneousType(Type *Ty) {
824+
Type *ElemTy = nullptr;
825+
SmallVector<Type *> WorkList;
826+
WorkList.push_back(Ty);
827+
while (!WorkList.empty()) {
828+
Type *CurTy = WorkList.pop_back_val();
829+
830+
// Check if the current type is an aggregate type.
831+
if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832+
WorkList.push_back(VectorTy->getElementType());
833+
continue;
834+
}
835+
if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836+
WorkList.push_back(ArrayTy->getElementType());
837+
continue;
838+
}
839+
if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840+
WorkList.append(StructTy->element_begin(), StructTy->element_end());
841+
continue;
842+
}
843+
844+
// If not, it must be the same as all other non-aggregate types.
845+
if (!ElemTy)
846+
ElemTy = CurTy;
847+
else if (ElemTy != CurTy)
848+
return nullptr;
849+
}
850+
851+
return ElemTy;
852+
}
853+
821854
// FIXME: Should try to pick the most likely to be profitable allocas first.
822855
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
823856
LLVM_DEBUG(dbgs() << "Trying to promote to vector: " << Alloca << '\n');
@@ -828,42 +861,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
828861
}
829862

830863
Type *AllocaTy = Alloca.getAllocatedType();
831-
auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832-
if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833-
uint64_t NumElems = 1;
834-
Type *ElemTy;
835-
do {
836-
NumElems *= ArrayTy->getNumElements();
837-
ElemTy = ArrayTy->getElementType();
838-
} while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839-
840-
// Check for array of vectors
841-
auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842-
if (InnerVectorTy) {
843-
NumElems *= InnerVectorTy->getNumElements();
844-
ElemTy = InnerVectorTy->getElementType();
845-
}
864+
Type *ElemTy = getHomogeneousType(AllocaTy);
846865

847-
if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
848-
unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
849-
if (ElementSize > 0) {
850-
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
851-
// Expand vector if required to match padding of inner type,
852-
// i.e. odd size subvectors.
853-
// Storage size of new vector must match that of alloca for correct
854-
// behaviour of byte offsets and GEP computation.
855-
if (NumElems * ElementSize != AllocaSize)
856-
NumElems = AllocaSize / ElementSize;
857-
if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
858-
VectorTy = FixedVectorType::get(ElemTy, NumElems);
859-
}
860-
}
866+
if (!ElemTy || !VectorType::isValidElementType(ElemTy)) {
867+
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
868+
return false;
861869
}
862870

863-
if (!VectorTy) {
864-
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
871+
unsigned ElementSizeInBits = DL->getTypeSizeInBits(ElemTy);
872+
if (ElementSizeInBits != DL->getTypeAllocSizeInBits(ElemTy)) {
873+
LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
874+
"does not match the type's size\n");
875+
return false;
876+
}
877+
unsigned ElementSize = ElementSizeInBits / 8;
878+
if (ElementSize == 0) {
879+
LLVM_DEBUG(dbgs() << " Cannot create vector of zero-sized elements\n");
880+
return false;
881+
}
882+
883+
// Calculate the size of the corresponding vector, accounting for padding of
884+
// inner types, e.g., odd-sized subvectors. Storage size of new vector must
885+
// match that of alloca for correct behaviour of byte offsets and GEP
886+
// computation.
887+
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
888+
unsigned NumElems = AllocaSize / ElementSize;
889+
if (NumElems == 0) {
890+
LLVM_DEBUG(dbgs() << " Cannot vectorize an empty aggregate type\n");
865891
return false;
866892
}
893+
if (NumElems * ElementSize != AllocaSize) {
894+
LLVM_DEBUG(
895+
dbgs() << " Cannot convert type into vector of the same size\n");
896+
return false;
897+
}
898+
auto *VectorTy = FixedVectorType::get(ElemTy, NumElems);
899+
assert(VectorTy && "Failed to create vector type.");
867900

868901
const unsigned MaxElements =
869902
(MaxVectorRegs * 32) / DL->getTypeSizeInBits(VectorTy->getElementType());
@@ -895,15 +928,6 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
895928

896929
LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
897930

898-
Type *VecEltTy = VectorTy->getElementType();
899-
unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
900-
if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {
901-
LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
902-
"does not match the type's size\n");
903-
return false;
904-
}
905-
unsigned ElementSize = ElementSizeInBits / 8;
906-
assert(ElementSize > 0);
907931
for (auto *U : Uses) {
908932
Instruction *Inst = cast<Instruction>(U->getUser());
909933

@@ -943,7 +967,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
943967
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
944968
// If we can't compute a vector index from this GEP, then we can't
945969
// promote this alloca to vector.
946-
Value *Index = GEPToVectorIndex(GEP, &Alloca, VecEltTy, *DL, NewGEPInsts);
970+
Value *Index = GEPToVectorIndex(GEP, &Alloca, ElemTy, *DL, NewGEPInsts);
947971
if (!Index)
948972
return RejectUser(Inst, "cannot compute vector index for GEP");
949973

0 commit comments

Comments
 (0)