Skip to content

Commit 4692f0d

Browse files
authored
Revert "[AMDGPU] Extended vector promotion to aggregate types." (llvm#144366)
Reverts llvm#143784 Patch fails some internal tests. Will investigate more thoroughly before attempting to remerge.
1 parent f2734aa commit 4692f0d

File tree

2 files changed

+41
-351
lines changed

2 files changed

+41
-351
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 41 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -818,39 +818,6 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818818
return I;
819819
}
820820

821-
/// Get the underlying type of a homogeneous aggregate type, or nullptr if the
822-
/// type is non-homogeneous.
823-
static Type *getHomogeneousType(Type *Ty) {
824-
Type *ElemTy = nullptr;
825-
SmallVector<Type *> WorkList;
826-
WorkList.push_back(Ty);
827-
while (!WorkList.empty()) {
828-
Type *CurTy = WorkList.pop_back_val();
829-
830-
// Check if the current type is an aggregate type.
831-
if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832-
WorkList.push_back(VectorTy->getElementType());
833-
continue;
834-
}
835-
if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836-
WorkList.push_back(ArrayTy->getElementType());
837-
continue;
838-
}
839-
if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840-
WorkList.append(StructTy->element_begin(), StructTy->element_end());
841-
continue;
842-
}
843-
844-
// If not, it must be the same as all other non-aggregate types.
845-
if (!ElemTy)
846-
ElemTy = CurTy;
847-
else if (ElemTy != CurTy)
848-
return nullptr;
849-
}
850-
851-
return ElemTy;
852-
}
853-
854821
// FIXME: Should try to pick the most likely to be profitable allocas first.
855822
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
856823
LLVM_DEBUG(dbgs() << "Trying to promote to vector: " << Alloca << '\n');
@@ -861,42 +828,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
861828
}
862829

863830
Type *AllocaTy = Alloca.getAllocatedType();
864-
Type *ElemTy = getHomogeneousType(AllocaTy);
865-
866-
if (!ElemTy || !VectorType::isValidElementType(ElemTy)) {
867-
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
868-
return false;
869-
}
831+
auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832+
if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833+
uint64_t NumElems = 1;
834+
Type *ElemTy;
835+
do {
836+
NumElems *= ArrayTy->getNumElements();
837+
ElemTy = ArrayTy->getElementType();
838+
} while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839+
840+
// Check for array of vectors
841+
auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842+
if (InnerVectorTy) {
843+
NumElems *= InnerVectorTy->getNumElements();
844+
ElemTy = InnerVectorTy->getElementType();
845+
}
870846

871-
unsigned ElementSizeInBits = DL->getTypeSizeInBits(ElemTy);
872-
if (ElementSizeInBits != DL->getTypeAllocSizeInBits(ElemTy)) {
873-
LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
874-
"does not match the type's size\n");
875-
return false;
876-
}
877-
unsigned ElementSize = ElementSizeInBits / 8;
878-
if (ElementSize == 0) {
879-
LLVM_DEBUG(dbgs() << " Cannot create vector of zero-sized elements\n");
880-
return false;
847+
if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
848+
unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
849+
if (ElementSize > 0) {
850+
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
851+
// Expand vector if required to match padding of inner type,
852+
// i.e. odd size subvectors.
853+
// Storage size of new vector must match that of alloca for correct
854+
// behaviour of byte offsets and GEP computation.
855+
if (NumElems * ElementSize != AllocaSize)
856+
NumElems = AllocaSize / ElementSize;
857+
if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
858+
VectorTy = FixedVectorType::get(ElemTy, NumElems);
859+
}
860+
}
881861
}
882862

883-
// Calculate the size of the corresponding vector, accounting for padding of
884-
// inner types, e.g., odd-sized subvectors. Storage size of new vector must
885-
// match that of alloca for correct behaviour of byte offsets and GEP
886-
// computation.
887-
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
888-
unsigned NumElems = AllocaSize / ElementSize;
889-
if (NumElems == 0) {
890-
LLVM_DEBUG(dbgs() << " Cannot vectorize an empty aggregate type\n");
891-
return false;
892-
}
893-
if (NumElems * ElementSize != AllocaSize) {
894-
LLVM_DEBUG(
895-
dbgs() << " Cannot convert type into vector of the same size\n");
863+
if (!VectorTy) {
864+
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
896865
return false;
897866
}
898-
auto *VectorTy = FixedVectorType::get(ElemTy, NumElems);
899-
assert(VectorTy && "Failed to create vector type.");
900867

901868
const unsigned MaxElements =
902869
(MaxVectorRegs * 32) / DL->getTypeSizeInBits(VectorTy->getElementType());
@@ -928,6 +895,15 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
928895

929896
LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
930897

898+
Type *VecEltTy = VectorTy->getElementType();
899+
unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
900+
if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {
901+
LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
902+
"does not match the type's size\n");
903+
return false;
904+
}
905+
unsigned ElementSize = ElementSizeInBits / 8;
906+
assert(ElementSize > 0);
931907
for (auto *U : Uses) {
932908
Instruction *Inst = cast<Instruction>(U->getUser());
933909

@@ -967,7 +943,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
967943
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
968944
// If we can't compute a vector index from this GEP, then we can't
969945
// promote this alloca to vector.
970-
Value *Index = GEPToVectorIndex(GEP, &Alloca, ElemTy, *DL, NewGEPInsts);
946+
Value *Index = GEPToVectorIndex(GEP, &Alloca, VecEltTy, *DL, NewGEPInsts);
971947
if (!Index)
972948
return RejectUser(Inst, "cannot compute vector index for GEP");
973949

0 commit comments

Comments
 (0)