@@ -141,8 +141,9 @@ static Value *ZExtOrTruncIfNeeded(Value *From, Type *To,
141
141
Value *Res = From;
142
142
if (From->getType ()->isVectorTy () &&
143
143
From->getType ()->getVectorNumElements () == 1 ) {
144
- Res = CastInst::CreateBitOrPointerCast (
144
+ auto *TmpRes = CastInst::CreateBitOrPointerCast (
145
145
Res, From->getType ()->getVectorElementType (), " " , InsertBefore);
146
+ Res = TmpRes;
146
147
}
147
148
if (FromTySz < ToTySz)
148
149
Res = CastInst::CreateZExtOrBitCast (Res, To, " " , InsertBefore);
@@ -204,18 +205,18 @@ GenXThreadPrivateMemory::RestoreVectorAfterNormalization(Instruction *From,
204
205
Restored = CastInst::Create (Instruction::Trunc, From, To, " " );
205
206
} else if (EltSz == genx::QWordBits &&
206
207
!(m_useGlobalMem && To->getScalarType ()->isIntegerTy (64 ))) {
207
- auto *NewFrom = From;
208
208
if (!From->getType ()->getScalarType ()->isPointerTy () &&
209
209
To->getScalarType ()->isPointerTy ()) {
210
210
assert (From->getType ()->getScalarType ()->isIntegerTy (genx::DWordBits));
211
211
Type *NewTy =
212
212
VectorType::get (Type::getInt64Ty (*m_ctx),
213
213
From->getType ()->getVectorNumElements () / 2 );
214
- NewFrom = CastInst::CreateBitOrPointerCast (From, NewTy);
214
+ auto * NewFrom = CastInst::CreateBitOrPointerCast (From, NewTy);
215
215
NewFrom->insertAfter (From);
216
- Restored = CastInst::Create (CastInst::IntToPtr, NewFrom, To);
216
+ From = NewFrom;
217
+ Restored = CastInst::Create (CastInst::IntToPtr, From, To);
217
218
} else
218
- Restored = CastInst::CreateBitOrPointerCast (NewFrom , To);
219
+ Restored = CastInst::CreateBitOrPointerCast (From , To);
219
220
}
220
221
if (Restored != From)
221
222
Restored->insertAfter (From);
@@ -303,11 +304,12 @@ Value *GenXThreadPrivateMemory::lookForPtrReplacement(Value *Ptr) const {
303
304
} else if (isa<ExtractElementInst>(Ptr) &&
304
305
lookForPtrReplacement (
305
306
cast<ExtractElementInst>(Ptr)->getVectorOperand ())) {
306
- if (Ptr->getType ()->isPointerTy ())
307
- return CastInst::Create (Instruction::PtrToInt, Ptr,
308
- Type::getInt32Ty (*m_ctx), " " ,
309
- cast<Instruction>(Ptr));
310
- else
307
+ if (Ptr->getType ()->isPointerTy ()) {
308
+ auto *PTI = CastInst::Create (Instruction::PtrToInt, Ptr,
309
+ Type::getInt32Ty (*m_ctx));
310
+ PTI->insertAfter (cast<Instruction>(Ptr));
311
+ return PTI;
312
+ } else
311
313
return Ptr;
312
314
} else if (auto *CI = dyn_cast<IGCLLVM::CallInst>(Ptr)) {
313
315
if (!CI->isIndirectCall () &&
@@ -707,8 +709,9 @@ bool GenXThreadPrivateMemory::replacePhi(PHINode *Phi) {
707
709
V->getType ()->isVectorTy () != NonVecTy->isVectorTy ()) {
708
710
if (V->getType ()->isVectorTy ()) {
709
711
assert (V->getType ()->getVectorNumElements () == 1 );
710
- V = CastInst::Create (CastInst::BitCast, V, NonVecTy->getScalarType (),
711
- " " , cast<Instruction>(V));
712
+ auto *VCast = CastInst::Create (CastInst::BitCast, V, NonVecTy->getScalarType ());
713
+ VCast->insertAfter (cast<Instruction>(V));
714
+ V = VCast;
712
715
}
713
716
} else {
714
717
assert (0 && " New phi types mismatch" );
@@ -980,7 +983,6 @@ void GenXThreadPrivateMemory::addUsers(Value *V) {
980
983
981
984
void GenXThreadPrivateMemory::collectEachPossibleTPMUsers () {
982
985
assert (m_AIUsers.empty ());
983
- m_AlreadyAdded.clear ();
984
986
// At first collect every alloca user
985
987
for (auto B = m_allocaToIntrinsic.begin (), E = m_allocaToIntrinsic.end ();
986
988
B != E; ++B) {
@@ -990,15 +992,9 @@ void GenXThreadPrivateMemory::collectEachPossibleTPMUsers() {
990
992
}
991
993
// Then collect all pointer args - they may be used
992
994
// in loads/stores we need to lower to svm intrinsics
993
- // Process args if only we are sure
994
- // it's necessary
995
- if (m_useGlobalMem) {
996
- for (auto &Arg : m_args) {
997
- // SVM-pointer func arg users should be handled too
998
- if (checkSVMNecessary (Arg))
999
- addUsers (Arg);
1000
- }
1001
- }
995
+ // m_args already contatins only args that require processing
996
+ for (auto &Arg : m_args)
997
+ addUsers (Arg);
1002
998
}
1003
999
1004
1000
void GenXThreadPrivateMemory::addUsersIfNeeded (Value *V) {
@@ -1030,10 +1026,9 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
1030
1026
m_ST = STP->getSubtarget ();
1031
1027
for (auto &F : M)
1032
1028
visit (F);
1033
- if (std::find_if (m_alloca.begin (), m_alloca.end (), checkSVMNecessaryPred) !=
1034
- m_alloca.end () ||
1035
- std::find_if (m_args.begin (), m_args.end (), checkSVMNecessaryPred) !=
1036
- m_args.end ()) {
1029
+ if (!m_useGlobalMem &&
1030
+ std::find_if (m_alloca.begin (), m_alloca.end (), checkSVMNecessaryPred) !=
1031
+ m_alloca.end ()) {
1037
1032
LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
1038
1033
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1039
1034
M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem" , 1 );
@@ -1095,6 +1090,7 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
1095
1090
}
1096
1091
1097
1092
// Main loop where instructions are replaced one by one.
1093
+ m_AlreadyAdded.clear ();
1098
1094
collectEachPossibleTPMUsers ();
1099
1095
while (!m_AIUsers.empty ()) {
1100
1096
Instruction *I = m_AIUsers.front ();
@@ -1145,11 +1141,10 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
1145
1141
}
1146
1142
1147
1143
for (auto AllocaPair : m_allocaToIntrinsic) {
1148
- if (!AllocaPair.first ->use_empty ()) {
1149
- for (const auto &U : AllocaPair.first ->users ()) {
1150
- assert (U->getNumUses () == 0 );
1151
- cast<Instruction>(U)->eraseFromParent ();
1152
- }
1144
+ while (!AllocaPair.first ->user_empty ()) {
1145
+ const auto &U = AllocaPair.first ->user_back ();
1146
+ assert (U->getNumUses () == 0 );
1147
+ cast<Instruction>(U)->eraseFromParent ();
1153
1148
}
1154
1149
assert (AllocaPair.first ->use_empty () &&
1155
1150
" uses of replaced alloca aren't empty" );
@@ -1188,6 +1183,13 @@ void GenXThreadPrivateMemory::visitAllocaInst(AllocaInst &I) {
1188
1183
1189
1184
void GenXThreadPrivateMemory::visitFunction (Function &F) {
1190
1185
for (auto &Arg : F.args ())
1191
- if (Arg.getType ()->isPointerTy ())
1186
+ if (Arg.getType ()->isPointerTy () && checkSVMNecessaryPred (&Arg)) {
1187
+ LLVM_DEBUG (dbgs () << " Switching TPM to SVM: svm arg\n " );
1188
+ // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1189
+ if (!m_useGlobalMem)
1190
+ F.getParent ()->addModuleFlag (Module::ModFlagBehavior::Error,
1191
+ " genx.useGlobalMem" , 1 );
1192
+ m_useGlobalMem = true ;
1192
1193
m_args.push_back (&Arg);
1194
+ }
1193
1195
}
0 commit comments