Skip to content

Commit bcf67bf

Browse files
Gang Y Chenigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: cfc2144
Selective scalarization to reduce unnecessary insert/extract keep vector-phi for vector coalescing.
1 parent 242dc4a commit bcf67bf

File tree

3 files changed

+180
-142
lines changed

3 files changed

+180
-142
lines changed

IGC/AdaptorOCL/UnifyIROCL.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -510,10 +510,10 @@ static void CommonOCLBasedPasses(
510510
mpm.add(createSROAPass());
511511
mpm.add(createIGCInstructionCombiningPass());
512512

513-
// true means selective scalarization
513+
// "false" to createScalarizerPass() means that vector load/stores are NOT scalarized
514514
if (IGC_IS_FLAG_ENABLED(DisableScalarizerGPGPU) == false)
515515
{
516-
mpm.add(createScalarizerPass(true));
516+
mpm.add(createScalarizerPass(false));
517517
}
518518

519519
// Create a dummy kernel to attach the symbol table if necessary

IGC/Compiler/Optimizer/Scalarizer.cpp

Lines changed: 163 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
6969

7070
char ScalarizeFunction::ID = 0;
7171

72-
ScalarizeFunction::ScalarizeFunction(bool selectiveScalarization) : FunctionPass(ID)
72+
ScalarizeFunction::ScalarizeFunction(bool scalarizingVectorLDSTType) : FunctionPass(ID)
7373
{
7474
initializeScalarizeFunctionPass(*PassRegistry::getPassRegistry());
7575

7676
for (int i = 0; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0;
77-
m_SelectiveScalarization = selectiveScalarization;
77+
m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType;
7878

7979
// Initialize SCM buffers and allocation
8080
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121121
m_SCM.clear();
122122
releaseAllSCMEntries();
123123
m_DRL.clear();
124-
m_Excludes.clear();
125-
126-
// collecting instructions that we want to avoid scalarization
127-
if (m_SelectiveScalarization)
128-
{
129-
buildExclusiveSet();
130-
}
131124

132125
// Scalarization. Iterate over all the instructions
133126
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,10 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139132
Instruction* currInst = &*sI;
140133
// Move iterator to next instruction BEFORE scalarizing current instruction
141134
++sI;
142-
if (!m_Excludes.count(currInst))
143-
{
144-
dispatchInstructionToScalarize(currInst);
145-
}
135+
dispatchInstructionToScalarize(currInst);
146136
}
147137

148138
resolveVectorValues();
@@ -171,119 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
171161
return true;
172162
}
173163

174-
void ScalarizeFunction::buildExclusiveSet()
175-
{
176-
inst_iterator sI = inst_begin(m_currFunc);
177-
inst_iterator sE = inst_end(m_currFunc);
178-
std::vector<llvm::Value*> workset;
179-
while (sI != sE)
180-
{
181-
Instruction* currInst = &*sI;
182-
++sI;
183-
if (CallInst* CI = dyn_cast<CallInst>(currInst))
184-
{
185-
unsigned numOperands = CI->getNumArgOperands();
186-
for (unsigned i = 0; i < numOperands; i++)
187-
{
188-
Value* operand = CI->getArgOperand(i);
189-
if (isa<VectorType>(operand->getType()))
190-
{
191-
workset.push_back(operand);
192-
}
193-
}
194-
}
195-
else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
196-
{
197-
Value* scalarIndexVal = IEI->getOperand(2);
198-
// If the index is not a constant - we cannot statically remove this inst
199-
if (!isa<ConstantInt>(scalarIndexVal)) {
200-
workset.push_back(IEI);
201-
}
202-
}
203-
else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
204-
{
205-
Value* scalarIndexVal = EEI->getOperand(1);
206-
// If the index is not a constant - we cannot statically remove this inst
207-
if (!isa<ConstantInt>(scalarIndexVal)) {
208-
workset.push_back(EEI->getOperand(0));
209-
}
210-
}
211-
else if (auto STI = dyn_cast<StoreInst>(currInst))
212-
{
213-
auto V = STI->getValueOperand();
214-
if (V->getType()->isVectorTy())
215-
{
216-
workset.push_back(V);
217-
}
218-
}
219-
}
220-
while (!workset.empty())
221-
{
222-
auto Def = workset.back();
223-
workset.pop_back();
224-
if (m_Excludes.count(Def))
225-
{
226-
continue;
227-
}
228-
if (auto IEI = dyn_cast<InsertElementInst>(Def))
229-
{
230-
m_Excludes.insert(IEI);
231-
if (!m_Excludes.count(IEI->getOperand(0)) &&
232-
(isa<PHINode>(IEI->getOperand(0)) ||
233-
isa<ShuffleVectorInst>(IEI->getOperand(0)) ||
234-
isa<InsertElementInst>(IEI->getOperand(0))))
235-
{
236-
workset.push_back(IEI->getOperand(0));
237-
}
238-
}
239-
else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
240-
{
241-
m_Excludes.insert(SVI);
242-
if (!m_Excludes.count(SVI->getOperand(0)) &&
243-
(isa<PHINode>(SVI->getOperand(0)) ||
244-
isa<ShuffleVectorInst>(SVI->getOperand(0)) ||
245-
isa<InsertElementInst>(SVI->getOperand(0))))
246-
{
247-
workset.push_back(SVI->getOperand(0));
248-
}
249-
if (!m_Excludes.count(SVI->getOperand(1)) &&
250-
(isa<PHINode>(SVI->getOperand(1)) ||
251-
isa<ShuffleVectorInst>(SVI->getOperand(1)) ||
252-
isa<InsertElementInst>(SVI->getOperand(1))))
253-
{
254-
workset.push_back(SVI->getOperand(1));
255-
}
256-
}
257-
else if (auto PHI = dyn_cast<PHINode>(Def))
258-
{
259-
m_Excludes.insert(PHI);
260-
for (int i = 0, n = PHI->getNumOperands(); i < n; ++i)
261-
if (!m_Excludes.count(PHI->getOperand(i)) &&
262-
(isa<PHINode>(PHI->getOperand(i)) ||
263-
isa<ShuffleVectorInst>(PHI->getOperand(i)) ||
264-
isa<InsertElementInst>(PHI->getOperand(i))))
265-
{
266-
workset.push_back(PHI->getOperand(i));
267-
}
268-
}
269-
else
270-
{
271-
continue;
272-
}
273-
// check use
274-
for (auto U : Def->users())
275-
{
276-
if (!m_Excludes.count(U) &&
277-
(isa<PHINode>(U) ||
278-
isa<ShuffleVectorInst>(U) ||
279-
isa<InsertElementInst>(U)))
280-
{
281-
workset.push_back(U);
282-
}
283-
}
284-
}
285-
}
286-
287164
void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
288165
{
289166
V_PRINT(scalarizer, "\tScalarizing Instruction: " << *I << "\n");
@@ -358,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
358235
case Instruction::GetElementPtr:
359236
scalarizeInstruction(dyn_cast<GetElementPtrInst>(I));
360237
break;
238+
case Instruction::Load:
239+
scalarizeInstruction(dyn_cast<LoadInst>(I));
240+
break;
241+
case Instruction::Store:
242+
scalarizeInstruction(dyn_cast<StoreInst>(I));
243+
break;
244+
361245
// The remaining instructions are not supported for scalarization. Keep "as is"
362246
default:
363247
recoverNonScalarizableInst(I);
@@ -1008,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1008892
m_removedInsts.insert(GI);
1009893
}
1010894

895+
void ScalarizeFunction::scalarizeInstruction(LoadInst* LI)
896+
{
897+
V_PRINT(scalarizer, "\t\tLoad instruction\n");
898+
IGC_ASSERT_MESSAGE(LI, "instruction type dynamic cast failed");
899+
900+
VectorType* dataType = dyn_cast<VectorType>(LI->getType());
901+
if (isScalarizableLoadStoreType(dataType) && m_pDL)
902+
{
903+
// Prepare empty SCM entry for the instruction
904+
SCMEntry* newEntry = getSCMEntry(LI);
905+
906+
// Get additional info from instruction
907+
unsigned int vectorSize = int_cast<unsigned int>(m_pDL->getTypeAllocSize(dataType));
908+
unsigned int elementSize = int_cast<unsigned int>(m_pDL->getTypeSizeInBits(dataType->getElementType()) / 8);
909+
IGC_ASSERT(elementSize);
910+
IGC_ASSERT_MESSAGE((vectorSize / elementSize > 0), "vector size should be a multiply of element size");
911+
IGC_ASSERT_MESSAGE((vectorSize % elementSize == 0), "vector size should be a multiply of element size");
912+
unsigned numDupElements = int_cast<unsigned>(dataType->getNumElements());
913+
914+
// Obtain scalarized arguments
915+
// 1 - to allow scalarizing Load with any pointer type
916+
// 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917+
#if 1
918+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919+
Value * GepPtr = LI->getOperand(0);
920+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
921+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
922+
Type* indexType = Type::getInt32Ty(*m_moduleContext);
923+
// Generate new (scalar) instructions
924+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925+
newScalarizedInsts.resize(numDupElements);
926+
for (unsigned dup = 0; dup < numDupElements; dup++)
927+
{
928+
Constant* laneVal = ConstantInt::get(indexType, dup);
929+
Value* pGEP = GetElementPtrInst::Create(nullptr, operandBase, laneVal, "GEP_lane", LI);
930+
newScalarizedInsts[dup] = new LoadInst(pGEP->getType()->getPointerElementType(), pGEP, LI->getName(), LI);
931+
}
932+
#else
933+
GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934+
if (!operand || operand->getNumIndices() != 1)
935+
{
936+
return recoverNonScalarizableInst(LI);
937+
}
938+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939+
Value* GepPtr = operand->getPointerOperand();
940+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942+
Type* indexType = operand->getOperand(1)->getType();
943+
// Generate new (scalar) instructions
944+
Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945+
Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946+
for (unsigned dup = 0; dup < numDupElements; dup++)
947+
{
948+
Constant* laneVal = ConstantInt::get(indexType, dup);
949+
Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950+
Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951+
pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952+
newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953+
}
954+
#endif
955+
// Add new value/s to SCM
956+
updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), LI, true);
957+
958+
// Remove original instruction
959+
m_removedInsts.insert(LI);
960+
return;
961+
}
962+
return recoverNonScalarizableInst(LI);
963+
}
964+
965+
void ScalarizeFunction::scalarizeInstruction(StoreInst* SI)
966+
{
967+
V_PRINT(scalarizer, "\t\tStore instruction\n");
968+
IGC_ASSERT_MESSAGE(SI, "instruction type dynamic cast failed");
969+
970+
int indexPtr = SI->getPointerOperandIndex();
971+
int indexData = 1 - indexPtr;
972+
VectorType* dataType = dyn_cast<VectorType>(SI->getOperand(indexData)->getType());
973+
if (isScalarizableLoadStoreType(dataType) && m_pDL)
974+
{
975+
// Get additional info from instruction
976+
unsigned int vectorSize = int_cast<unsigned int>(m_pDL->getTypeAllocSize(dataType));
977+
unsigned int elementSize = int_cast<unsigned int>(m_pDL->getTypeSizeInBits(dataType->getElementType()) / 8);
978+
IGC_ASSERT(elementSize);
979+
IGC_ASSERT_MESSAGE((vectorSize / elementSize > 0), "vector size should be a multiply of element size");
980+
IGC_ASSERT_MESSAGE((vectorSize % elementSize == 0), "vector size should be a multiply of element size");
981+
982+
unsigned numDupElements = int_cast<unsigned>(dataType->getNumElements());
983+
984+
// Obtain scalarized arguments
985+
// 1 - to allow scalarizing Load with any pointer type
986+
// 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987+
#if 1
988+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989+
990+
bool opIsConst;
991+
obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
992+
993+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994+
Value* GepPtr = SI->getOperand(indexPtr);
995+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
996+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
997+
Type* indexType = Type::getInt32Ty(*m_moduleContext);
998+
// Generate new (scalar) instructions
999+
for (unsigned dup = 0; dup < numDupElements; dup++)
1000+
{
1001+
Constant* laneVal = ConstantInt::get(indexType, dup);
1002+
Value* pGEP = GetElementPtrInst::Create(nullptr, operandBase, laneVal, "GEP_lane", SI);
1003+
new StoreInst(operand0[dup], pGEP, SI);
1004+
}
1005+
#else
1006+
GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007+
if (!operand1 || operand1->getNumIndices() != 1)
1008+
{
1009+
return recoverNonScalarizableInst(SI);
1010+
}
1011+
Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012+
bool opIsConst;
1013+
obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014+
1015+
// Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016+
Value* GepPtr = operand1->getPointerOperand();
1017+
PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018+
Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019+
Type* indexType = operand1->getOperand(1)->getType();
1020+
// Generate new (scalar) instructions
1021+
Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022+
for (unsigned dup = 0; dup < numDupElements; dup++)
1023+
{
1024+
Constant* laneVal = ConstantInt::get(indexType, dup);
1025+
Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026+
Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027+
pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028+
new StoreInst(operand0[dup], pGEP, SI);
1029+
}
1030+
#endif
1031+
// Remove original instruction
1032+
m_removedInsts.insert(SI);
1033+
return;
1034+
}
1035+
return recoverNonScalarizableInst(SI);
1036+
}
1037+
10111038
void ScalarizeFunction::obtainScalarizedValues(SmallVectorImpl<Value*>& retValues, bool* retIsConstant,
10121039
Value* origValue, Instruction* origInst, int destIdx)
10131040
{
@@ -1384,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
13841411
m_DRL.clear();
13851412
}
13861413

1387-
extern "C" FunctionPass* createScalarizerPass(bool selectiveScalarization)
1414+
bool ScalarizeFunction::isScalarizableLoadStoreType(VectorType* type)
1415+
{
1416+
// Scalarize Load/Store worth doing only if:
1417+
// 1. Gather/Scatter are supported
1418+
// 2. Load/Store type is a vector
1419+
return (m_ScalarizingVectorLDSTType && (NULL != type));
1420+
}
1421+
1422+
extern "C" FunctionPass* createScalarizerPass(bool scalarizingVectorLDSTType)
13881423
{
1389-
return new ScalarizeFunction(selectiveScalarization);
1424+
return new ScalarizeFunction(scalarizingVectorLDSTType);
13901425
}
13911426

13921427

0 commit comments

Comments
 (0)