@@ -818,6 +818,39 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818
818
return I;
819
819
}
820
820
821
+ // / Get the underlying type of a homogeneous aggregate type, or nullptr if the
822
+ // / type is non-homogeneous.
823
+ static Type *getHomogeneousType (Type *Ty) {
824
+ Type *ElemTy = nullptr ;
825
+ SmallVector<Type *> WorkList;
826
+ WorkList.push_back (Ty);
827
+ while (!WorkList.empty ()) {
828
+ Type *CurTy = WorkList.pop_back_val ();
829
+
830
+ // Check if the current type is an aggregate type.
831
+ if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832
+ WorkList.push_back (VectorTy->getElementType ());
833
+ continue ;
834
+ }
835
+ if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836
+ WorkList.push_back (ArrayTy->getElementType ());
837
+ continue ;
838
+ }
839
+ if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840
+ WorkList.append (StructTy->element_begin (), StructTy->element_end ());
841
+ continue ;
842
+ }
843
+
844
+ // If not, it must be the same as all other non-aggregate types.
845
+ if (!ElemTy)
846
+ ElemTy = CurTy;
847
+ else if (ElemTy != CurTy)
848
+ return nullptr ;
849
+ }
850
+
851
+ return ElemTy;
852
+ }
853
+
821
854
// FIXME: Should try to pick the most likely to be profitable allocas first.
822
855
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector (AllocaInst &Alloca) {
823
856
LLVM_DEBUG (dbgs () << " Trying to promote to vector: " << Alloca << ' \n ' );
@@ -828,42 +861,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
828
861
}
829
862
830
863
Type *AllocaTy = Alloca.getAllocatedType ();
831
- auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832
- if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833
- uint64_t NumElems = 1 ;
834
- Type *ElemTy;
835
- do {
836
- NumElems *= ArrayTy->getNumElements ();
837
- ElemTy = ArrayTy->getElementType ();
838
- } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839
-
840
- // Check for array of vectors
841
- auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842
- if (InnerVectorTy) {
843
- NumElems *= InnerVectorTy->getNumElements ();
844
- ElemTy = InnerVectorTy->getElementType ();
845
- }
864
+ Type *ElemTy = getHomogeneousType (AllocaTy);
846
865
847
- if (VectorType::isValidElementType (ElemTy) && NumElems > 0 ) {
848
- unsigned ElementSize = DL->getTypeSizeInBits (ElemTy) / 8 ;
849
- if (ElementSize > 0 ) {
850
- unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
851
- // Expand vector if required to match padding of inner type,
852
- // i.e. odd size subvectors.
853
- // Storage size of new vector must match that of alloca for correct
854
- // behaviour of byte offsets and GEP computation.
855
- if (NumElems * ElementSize != AllocaSize)
856
- NumElems = AllocaSize / ElementSize;
857
- if (NumElems > 0 && (AllocaSize % ElementSize) == 0 )
858
- VectorTy = FixedVectorType::get (ElemTy, NumElems);
859
- }
860
- }
866
+ if (!ElemTy || !VectorType::isValidElementType (ElemTy)) {
867
+ LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
868
+ return false ;
861
869
}
862
870
863
- if (!VectorTy) {
864
- LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
871
+ unsigned ElementSizeInBits = DL->getTypeSizeInBits (ElemTy);
872
+ if (ElementSizeInBits != DL->getTypeAllocSizeInBits (ElemTy)) {
873
+ LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
874
+ " does not match the type's size\n " );
875
+ return false ;
876
+ }
877
+ unsigned ElementSize = ElementSizeInBits / 8 ;
878
+ if (ElementSize == 0 ) {
879
+ LLVM_DEBUG (dbgs () << " Cannot create vector of zero-sized elements\n " );
880
+ return false ;
881
+ }
882
+
883
+ // Calculate the size of the corresponding vector, accounting for padding of
884
+ // inner types, e.g., odd-sized subvectors. Storage size of new vector must
885
+ // match that of alloca for correct behaviour of byte offsets and GEP
886
+ // computation.
887
+ unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
888
+ unsigned NumElems = AllocaSize / ElementSize;
889
+ if (NumElems == 0 ) {
890
+ LLVM_DEBUG (dbgs () << " Cannot vectorize an empty aggregate type\n " );
865
891
return false ;
866
892
}
893
+ if (NumElems * ElementSize != AllocaSize) {
894
+ LLVM_DEBUG (
895
+ dbgs () << " Cannot convert type into vector of the same size\n " );
896
+ return false ;
897
+ }
898
+ auto *VectorTy = FixedVectorType::get (ElemTy, NumElems);
899
+ assert (VectorTy && " Failed to create vector type." );
867
900
868
901
const unsigned MaxElements =
869
902
(MaxVectorRegs * 32 ) / DL->getTypeSizeInBits (VectorTy->getElementType ());
@@ -895,15 +928,6 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
895
928
896
929
LLVM_DEBUG (dbgs () << " Attempting promotion to: " << *VectorTy << " \n " );
897
930
898
- Type *VecEltTy = VectorTy->getElementType ();
899
- unsigned ElementSizeInBits = DL->getTypeSizeInBits (VecEltTy);
900
- if (ElementSizeInBits != DL->getTypeAllocSizeInBits (VecEltTy)) {
901
- LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
902
- " does not match the type's size\n " );
903
- return false ;
904
- }
905
- unsigned ElementSize = ElementSizeInBits / 8 ;
906
- assert (ElementSize > 0 );
907
931
for (auto *U : Uses) {
908
932
Instruction *Inst = cast<Instruction>(U->getUser ());
909
933
@@ -943,7 +967,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
943
967
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
944
968
// If we can't compute a vector index from this GEP, then we can't
945
969
// promote this alloca to vector.
946
- Value *Index = GEPToVectorIndex (GEP, &Alloca, VecEltTy , *DL, NewGEPInsts);
970
+ Value *Index = GEPToVectorIndex (GEP, &Alloca, ElemTy , *DL, NewGEPInsts);
947
971
if (!Index)
948
972
return RejectUser (Inst, " cannot compute vector index for GEP" );
949
973
0 commit comments