@@ -848,15 +848,15 @@ Value *GenXPacketize::packetizeLLVMInstruction(Instruction *Inst) {
848
848
Inst->getType ()->getPointerAddressSpace ());
849
849
else {
850
850
// Map <N x OldTy>* to <N x NewTy*> using cast then GEP
851
- auto *TmpTy =
852
- PointerType::get (llvm::ArrayType::get (DstScalarTy, B-> VWidth ),
853
- Inst->getType ()->getPointerAddressSpace ());
851
+ auto *TmpTy = llvm::ArrayType::get (DstScalarTy, B-> VWidth );
852
+ auto *TmpPtrTy = PointerType::get (
853
+ TmpTy, Inst->getType ()->getPointerAddressSpace ());
854
854
auto *TmpInst =
855
- B->CAST ((Instruction::CastOps)Opcode, PacketizedSrc, TmpTy );
855
+ B->CAST ((Instruction::CastOps)Opcode, PacketizedSrc, TmpPtrTy );
856
856
SmallVector<Value *, 2 > VecIndices;
857
857
VecIndices.push_back (B->C (0 ));
858
858
VecIndices.push_back (B->CInc <uint32_t >(0 , B->VWidth ));
859
- ReplacedInst = B->GEPA (TmpInst, VecIndices);
859
+ ReplacedInst = B->GEPA (TmpTy, TmpInst, VecIndices);
860
860
break ;
861
861
}
862
862
}
@@ -869,31 +869,26 @@ Value *GenXPacketize::packetizeLLVMInstruction(Instruction *Inst) {
869
869
}
870
870
case Instruction::GetElementPtr: {
871
871
auto *GepInst = cast<GetElementPtrInst>(Inst);
872
- auto *Base = GepInst->getPointerOperand ();
873
- Value *VecSrc = nullptr ;
874
- if (isa<GlobalValue>(Base))
875
- VecSrc = Base;
876
- else if (isa<Argument>(Base))
877
- VecSrc = Base;
878
- else if (isa<Instruction>(Base) &&
879
- UniformInsts.count (cast<Instruction>(Base)))
880
- VecSrc = Base;
881
- else
882
- VecSrc = getPacketizeValue (Base);
872
+ auto *VecSrc = GepInst->getPointerOperand ();
873
+ auto *VecSrcTy = GepInst->getSourceElementType ();
874
+ if (!isa<GlobalValue>(VecSrc) && !isa<Argument>(VecSrc) &&
875
+ !(isa<Instruction>(VecSrc) &&
876
+ UniformInsts.count (cast<Instruction>(VecSrc))))
877
+ VecSrc = getPacketizeValue (VecSrc);
883
878
if (!isa<AllocaInst>(VecSrc)) {
884
879
// just packetize the GEP to a vector GEP.
885
880
SmallVector<Value *, 8 > VecIndices;
886
881
for (uint32_t Idx = 0 ; Idx < GepInst->getNumIndices (); ++Idx)
887
882
VecIndices.push_back (getPacketizeValue (GepInst->getOperand (1 + Idx)));
888
- ReplacedInst = B->GEPA (VecSrc, VecIndices);
883
+ ReplacedInst = B->GEPA (VecSrcTy, VecSrc, VecIndices);
889
884
} else {
890
885
if (GepInst->hasAllConstantIndices ()) {
891
886
// SOA GEP with scalar src and constant indices, result will be <N x
892
887
// Ty>* Ex. gep [4 x <8 x float>]*, 0, 0 --> <8 x float>*
893
888
SmallVector<Value *, 8 > VecIndices;
894
889
for (uint32_t Idx = 0 ; Idx < GepInst->getNumIndices (); ++Idx)
895
890
VecIndices.push_back (GepInst->getOperand (1 + Idx));
896
- ReplacedInst = B->GEPA (VecSrc, VecIndices);
891
+ ReplacedInst = B->GEPA (VecSrcTy, VecSrc, VecIndices);
897
892
} else {
898
893
// // SOA GEP with non-uniform indices. Need to vector GEP to each SIMD
899
894
// / lane.
@@ -903,22 +898,22 @@ Value *GenXPacketize::packetizeLLVMInstruction(Instruction *Inst) {
903
898
VecIndices.push_back (getPacketizeValue (GepInst->getOperand (1 + Idx)));
904
899
// Step to the SIMD lane
905
900
VecIndices.push_back (B->CInc <uint32_t >(0 , B->VWidth ));
906
- ReplacedInst = B->GEPA (VecSrc, VecIndices);
901
+ ReplacedInst = B->GEPA (VecSrcTy, VecSrc, VecIndices);
907
902
}
908
903
}
909
904
break ;
910
905
}
911
906
case Instruction::Load: {
912
907
auto *LI = cast<LoadInst>(Inst);
913
- auto *Src = LI->getPointerOperand ();
914
- auto *VecSrc = getPacketizeValue (Src );
908
+ auto *VecSrc = getPacketizeValue ( LI->getPointerOperand () );
909
+ auto *VecSrcTy = B-> getVectorType (LI-> getType () );
915
910
if (VecSrc->getType ()->isVectorTy ()) {
916
911
IGC_ASSERT (
917
912
cast<VectorType>(VecSrc->getType ())->getElementType ()->isPointerTy ());
918
913
auto Align = IGCLLVM::getAlignmentValue (LI);
919
- ReplacedInst = B->MASKED_GATHER (VecSrc, Align);
914
+ ReplacedInst = B->MASKED_GATHER (VecSrcTy, VecSrc, Align);
920
915
} else
921
- ReplacedInst = B->ALIGNED_LOAD (VecSrc, IGCLLVM::getAlign (*LI));
916
+ ReplacedInst = B->ALIGNED_LOAD (VecSrcTy, VecSrc, IGCLLVM::getAlign (*LI));
922
917
break ;
923
918
}
924
919
case Instruction::Store: {
@@ -1068,15 +1063,15 @@ Value *GenXPacketize::packetizeLLVMInstruction(Instruction *Inst) {
1068
1063
} else {
1069
1064
// vector struct input, need to loop over components and build up new
1070
1065
// struct allocation
1071
- auto *Alloca = B-> ALLOCA (
1072
- B->getVectorType (IGCLLVM::getNonOpaquePtrEltTy (Inst-> getType ())) );
1073
- uint32_t NumElems =
1074
- IGCLLVM::getNonOpaquePtrEltTy (Inst-> getType ()) ->getArrayNumElements ();
1066
+ auto *Ty = IGCLLVM::getNonOpaquePtrEltTy (Inst-> getType ());
1067
+ auto *VecTy = B->getVectorType (Ty );
1068
+ auto *Alloca = B-> ALLOCA (VecTy);
1069
+ uint32_t NumElems = Ty ->getArrayNumElements ();
1075
1070
for (uint32_t Idx = 0 ; Idx < NumElems; ++Idx) {
1076
- auto *TrueSrcElem = B->LOAD (TrueSrc, {0 , Idx});
1077
- auto *FalseSrcElem = B->LOAD (FalseSrc, {0 , Idx});
1071
+ auto *TrueSrcElem = B->LOAD (VecTy, TrueSrc, {0 , Idx});
1072
+ auto *FalseSrcElem = B->LOAD (VecTy, FalseSrc, {0 , Idx});
1078
1073
// mask store true components
1079
- auto *GEP = B->GEP (Alloca, {0 , Idx});
1074
+ auto *GEP = B->GEP (VecTy, Alloca, {0 , Idx});
1080
1075
B->MASKED_STORE (TrueSrcElem, GEP, 4 , VecCond);
1081
1076
// store false components to inverted mask
1082
1077
B->MASKED_STORE (FalseSrcElem, GEP, 4 , B->NOT (VecCond));
@@ -1743,7 +1738,7 @@ void GenXPacketize::fixupLLVMIntrinsics(Function &F) {
1743
1738
GlobalVariable *GenXPacketize::findGlobalExecMask () {
1744
1739
// look for the global EMask variable if exists
1745
1740
for (auto &Global : M->getGlobalList ()) {
1746
- auto *Ty = IGCLLVM::getNonOpaquePtrEltTy ( Global.getType () );
1741
+ auto *Ty = Global.getValueType ( );
1747
1742
if (Ty->isVectorTy () &&
1748
1743
cast<IGCLLVM::FixedVectorType>(Ty)->getNumElements () ==
1749
1744
CMSimdCFLower::MAX_SIMD_CF_WIDTH) {
0 commit comments