Skip to content

Commit 1f6cea3

Browse files
zuban32sys_zuul
authored andcommitted
Make TPM processing more optimal time-wise
Change-Id: I9c468c280286594d8d4d2827bfb5dcddabf2bff6
1 parent 9695142 commit 1f6cea3

File tree

1 file changed

+34
-32
lines changed

1 file changed

+34
-32
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXThreadPrivateMemory.cpp

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,9 @@ static Value *ZExtOrTruncIfNeeded(Value *From, Type *To,
141141
Value *Res = From;
142142
if (From->getType()->isVectorTy() &&
143143
From->getType()->getVectorNumElements() == 1) {
144-
Res = CastInst::CreateBitOrPointerCast(
144+
auto *TmpRes = CastInst::CreateBitOrPointerCast(
145145
Res, From->getType()->getVectorElementType(), "", InsertBefore);
146+
Res = TmpRes;
146147
}
147148
if (FromTySz < ToTySz)
148149
Res = CastInst::CreateZExtOrBitCast(Res, To, "", InsertBefore);
@@ -204,18 +205,18 @@ GenXThreadPrivateMemory::RestoreVectorAfterNormalization(Instruction *From,
204205
Restored = CastInst::Create(Instruction::Trunc, From, To, "");
205206
} else if (EltSz == genx::QWordBits &&
206207
!(m_useGlobalMem && To->getScalarType()->isIntegerTy(64))) {
207-
auto *NewFrom = From;
208208
if (!From->getType()->getScalarType()->isPointerTy() &&
209209
To->getScalarType()->isPointerTy()) {
210210
assert(From->getType()->getScalarType()->isIntegerTy(genx::DWordBits));
211211
Type *NewTy =
212212
VectorType::get(Type::getInt64Ty(*m_ctx),
213213
From->getType()->getVectorNumElements() / 2);
214-
NewFrom = CastInst::CreateBitOrPointerCast(From, NewTy);
214+
auto *NewFrom = CastInst::CreateBitOrPointerCast(From, NewTy);
215215
NewFrom->insertAfter(From);
216-
Restored = CastInst::Create(CastInst::IntToPtr, NewFrom, To);
216+
From = NewFrom;
217+
Restored = CastInst::Create(CastInst::IntToPtr, From, To);
217218
} else
218-
Restored = CastInst::CreateBitOrPointerCast(NewFrom, To);
219+
Restored = CastInst::CreateBitOrPointerCast(From, To);
219220
}
220221
if (Restored != From)
221222
Restored->insertAfter(From);
@@ -303,11 +304,12 @@ Value *GenXThreadPrivateMemory::lookForPtrReplacement(Value *Ptr) const {
303304
} else if (isa<ExtractElementInst>(Ptr) &&
304305
lookForPtrReplacement(
305306
cast<ExtractElementInst>(Ptr)->getVectorOperand())) {
306-
if (Ptr->getType()->isPointerTy())
307-
return CastInst::Create(Instruction::PtrToInt, Ptr,
308-
Type::getInt32Ty(*m_ctx), "",
309-
cast<Instruction>(Ptr));
310-
else
307+
if (Ptr->getType()->isPointerTy()) {
308+
auto *PTI = CastInst::Create(Instruction::PtrToInt, Ptr,
309+
Type::getInt32Ty(*m_ctx));
310+
PTI->insertAfter(cast<Instruction>(Ptr));
311+
return PTI;
312+
} else
311313
return Ptr;
312314
} else if (auto *CI = dyn_cast<IGCLLVM::CallInst>(Ptr)) {
313315
if (!CI->isIndirectCall() &&
@@ -707,8 +709,9 @@ bool GenXThreadPrivateMemory::replacePhi(PHINode *Phi) {
707709
V->getType()->isVectorTy() != NonVecTy->isVectorTy()) {
708710
if (V->getType()->isVectorTy()) {
709711
assert(V->getType()->getVectorNumElements() == 1);
710-
V = CastInst::Create(CastInst::BitCast, V, NonVecTy->getScalarType(),
711-
"", cast<Instruction>(V));
712+
auto *VCast = CastInst::Create(CastInst::BitCast, V, NonVecTy->getScalarType());
713+
VCast->insertAfter(cast<Instruction>(V));
714+
V = VCast;
712715
}
713716
} else {
714717
assert(0 && "New phi types mismatch");
@@ -980,7 +983,6 @@ void GenXThreadPrivateMemory::addUsers(Value *V) {
980983

981984
void GenXThreadPrivateMemory::collectEachPossibleTPMUsers() {
982985
assert(m_AIUsers.empty());
983-
m_AlreadyAdded.clear();
984986
// At first collect every alloca user
985987
for (auto B = m_allocaToIntrinsic.begin(), E = m_allocaToIntrinsic.end();
986988
B != E; ++B) {
@@ -990,15 +992,9 @@ void GenXThreadPrivateMemory::collectEachPossibleTPMUsers() {
990992
}
991993
// Then collect all pointer args - they may be used
992994
// in loads/stores we need to lower to svm intrinsics
993-
// Process args if only we are sure
994-
// it's necessary
995-
if (m_useGlobalMem) {
996-
for (auto &Arg : m_args) {
997-
// SVM-pointer func arg users should be handled too
998-
if (checkSVMNecessary(Arg))
999-
addUsers(Arg);
1000-
}
1001-
}
995+
// m_args already contatins only args that require processing
996+
for (auto &Arg : m_args)
997+
addUsers(Arg);
1002998
}
1003999

10041000
void GenXThreadPrivateMemory::addUsersIfNeeded(Value *V) {
@@ -1030,10 +1026,9 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
10301026
m_ST = STP->getSubtarget();
10311027
for (auto &F : M)
10321028
visit(F);
1033-
if (std::find_if(m_alloca.begin(), m_alloca.end(), checkSVMNecessaryPred) !=
1034-
m_alloca.end() ||
1035-
std::find_if(m_args.begin(), m_args.end(), checkSVMNecessaryPred) !=
1036-
m_args.end()) {
1029+
if (!m_useGlobalMem &&
1030+
std::find_if(m_alloca.begin(), m_alloca.end(), checkSVMNecessaryPred) !=
1031+
m_alloca.end()) {
10371032
LLVM_DEBUG(dbgs() << "Switching TPM to SVM\n");
10381033
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
10391034
M.addModuleFlag(Module::ModFlagBehavior::Error, "genx.useGlobalMem", 1);
@@ -1095,6 +1090,7 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
10951090
}
10961091

10971092
// Main loop where instructions are replaced one by one.
1093+
m_AlreadyAdded.clear();
10981094
collectEachPossibleTPMUsers();
10991095
while (!m_AIUsers.empty()) {
11001096
Instruction *I = m_AIUsers.front();
@@ -1145,11 +1141,10 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
11451141
}
11461142

11471143
for (auto AllocaPair : m_allocaToIntrinsic) {
1148-
if (!AllocaPair.first->use_empty()) {
1149-
for (const auto &U : AllocaPair.first->users()) {
1150-
assert(U->getNumUses() == 0);
1151-
cast<Instruction>(U)->eraseFromParent();
1152-
}
1144+
while (!AllocaPair.first->user_empty()) {
1145+
const auto &U = AllocaPair.first->user_back();
1146+
assert(U->getNumUses() == 0);
1147+
cast<Instruction>(U)->eraseFromParent();
11531148
}
11541149
assert(AllocaPair.first->use_empty() &&
11551150
"uses of replaced alloca aren't empty");
@@ -1188,6 +1183,13 @@ void GenXThreadPrivateMemory::visitAllocaInst(AllocaInst &I) {
11881183

11891184
void GenXThreadPrivateMemory::visitFunction(Function &F) {
11901185
for (auto &Arg : F.args())
1191-
if (Arg.getType()->isPointerTy())
1186+
if (Arg.getType()->isPointerTy() && checkSVMNecessaryPred(&Arg)) {
1187+
LLVM_DEBUG(dbgs() << "Switching TPM to SVM: svm arg\n");
1188+
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1189+
if (!m_useGlobalMem)
1190+
F.getParent()->addModuleFlag(Module::ModFlagBehavior::Error,
1191+
"genx.useGlobalMem", 1);
1192+
m_useGlobalMem = true;
11921193
m_args.push_back(&Arg);
1194+
}
11931195
}

0 commit comments

Comments
 (0)