@@ -818,39 +818,6 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818
818
return I;
819
819
}
820
820
821
- // / Get the underlying type of a homogeneous aggregate type, or nullptr if the
822
- // / type is non-homogeneous.
823
- static Type *getHomogeneousType (Type *Ty) {
824
- Type *ElemTy = nullptr ;
825
- SmallVector<Type *> WorkList;
826
- WorkList.push_back (Ty);
827
- while (!WorkList.empty ()) {
828
- Type *CurTy = WorkList.pop_back_val ();
829
-
830
- // Check if the current type is an aggregate type.
831
- if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832
- WorkList.push_back (VectorTy->getElementType ());
833
- continue ;
834
- }
835
- if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836
- WorkList.push_back (ArrayTy->getElementType ());
837
- continue ;
838
- }
839
- if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840
- WorkList.append (StructTy->element_begin (), StructTy->element_end ());
841
- continue ;
842
- }
843
-
844
- // If not, it must be the same as all other non-aggregate types.
845
- if (!ElemTy)
846
- ElemTy = CurTy;
847
- else if (ElemTy != CurTy)
848
- return nullptr ;
849
- }
850
-
851
- return ElemTy;
852
- }
853
-
854
821
// FIXME: Should try to pick the most likely to be profitable allocas first.
855
822
bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector (AllocaInst &Alloca) {
856
823
LLVM_DEBUG (dbgs () << " Trying to promote to vector: " << Alloca << ' \n ' );
@@ -861,42 +828,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
861
828
}
862
829
863
830
Type *AllocaTy = Alloca.getAllocatedType ();
864
- Type *ElemTy = getHomogeneousType (AllocaTy);
865
-
866
- if (!ElemTy || !VectorType::isValidElementType (ElemTy)) {
867
- LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
868
- return false ;
869
- }
831
+ auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832
+ if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833
+ uint64_t NumElems = 1 ;
834
+ Type *ElemTy;
835
+ do {
836
+ NumElems *= ArrayTy->getNumElements ();
837
+ ElemTy = ArrayTy->getElementType ();
838
+ } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839
+
840
+ // Check for array of vectors
841
+ auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842
+ if (InnerVectorTy) {
843
+ NumElems *= InnerVectorTy->getNumElements ();
844
+ ElemTy = InnerVectorTy->getElementType ();
845
+ }
870
846
871
- unsigned ElementSizeInBits = DL->getTypeSizeInBits (ElemTy);
872
- if (ElementSizeInBits != DL->getTypeAllocSizeInBits (ElemTy)) {
873
- LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
874
- " does not match the type's size\n " );
875
- return false ;
876
- }
877
- unsigned ElementSize = ElementSizeInBits / 8 ;
878
- if (ElementSize == 0 ) {
879
- LLVM_DEBUG (dbgs () << " Cannot create vector of zero-sized elements\n " );
880
- return false ;
847
+ if (VectorType::isValidElementType (ElemTy) && NumElems > 0 ) {
848
+ unsigned ElementSize = DL->getTypeSizeInBits (ElemTy) / 8 ;
849
+ if (ElementSize > 0 ) {
850
+ unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
851
+ // Expand vector if required to match padding of inner type,
852
+ // i.e. odd size subvectors.
853
+ // Storage size of new vector must match that of alloca for correct
854
+ // behaviour of byte offsets and GEP computation.
855
+ if (NumElems * ElementSize != AllocaSize)
856
+ NumElems = AllocaSize / ElementSize;
857
+ if (NumElems > 0 && (AllocaSize % ElementSize) == 0 )
858
+ VectorTy = FixedVectorType::get (ElemTy, NumElems);
859
+ }
860
+ }
881
861
}
882
862
883
- // Calculate the size of the corresponding vector, accounting for padding of
884
- // inner types, e.g., odd-sized subvectors. Storage size of new vector must
885
- // match that of alloca for correct behaviour of byte offsets and GEP
886
- // computation.
887
- unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
888
- unsigned NumElems = AllocaSize / ElementSize;
889
- if (NumElems == 0 ) {
890
- LLVM_DEBUG (dbgs () << " Cannot vectorize an empty aggregate type\n " );
891
- return false ;
892
- }
893
- if (NumElems * ElementSize != AllocaSize) {
894
- LLVM_DEBUG (
895
- dbgs () << " Cannot convert type into vector of the same size\n " );
863
+ if (!VectorTy) {
864
+ LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
896
865
return false ;
897
866
}
898
- auto *VectorTy = FixedVectorType::get (ElemTy, NumElems);
899
- assert (VectorTy && " Failed to create vector type." );
900
867
901
868
const unsigned MaxElements =
902
869
(MaxVectorRegs * 32 ) / DL->getTypeSizeInBits (VectorTy->getElementType ());
@@ -928,6 +895,15 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
928
895
929
896
LLVM_DEBUG (dbgs () << " Attempting promotion to: " << *VectorTy << " \n " );
930
897
898
+ Type *VecEltTy = VectorTy->getElementType ();
899
+ unsigned ElementSizeInBits = DL->getTypeSizeInBits (VecEltTy);
900
+ if (ElementSizeInBits != DL->getTypeAllocSizeInBits (VecEltTy)) {
901
+ LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
902
+ " does not match the type's size\n " );
903
+ return false ;
904
+ }
905
+ unsigned ElementSize = ElementSizeInBits / 8 ;
906
+ assert (ElementSize > 0 );
931
907
for (auto *U : Uses) {
932
908
Instruction *Inst = cast<Instruction>(U->getUser ());
933
909
@@ -967,7 +943,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
967
943
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
968
944
// If we can't compute a vector index from this GEP, then we can't
969
945
// promote this alloca to vector.
970
- Value *Index = GEPToVectorIndex (GEP, &Alloca, ElemTy , *DL, NewGEPInsts);
946
+ Value *Index = GEPToVectorIndex (GEP, &Alloca, VecEltTy , *DL, NewGEPInsts);
971
947
if (!Index)
972
948
return RejectUser (Inst, " cannot compute vector index for GEP" );
973
949
0 commit comments