@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
69
69
70
70
char ScalarizeFunction::ID = 0;
71
71
72
- ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
72
+ ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
73
73
{
74
74
initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
75
75
76
76
for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77
- m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
77
+ m_SelectiveScalarization = selectiveScalarization ;
78
78
79
79
// Initialize SCM buffers and allocation
80
80
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,6 +121,13 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121
121
m_SCM.clear ();
122
122
releaseAllSCMEntries ();
123
123
m_DRL.clear ();
124
+ m_Excludes.clear ();
125
+
126
+ // collecting instructions that we want to avoid scalarization
127
+ if (m_SelectiveScalarization)
128
+ {
129
+ buildExclusiveSet ();
130
+ }
124
131
125
132
// Scalarization. Iterate over all the instructions
126
133
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -132,7 +139,10 @@ bool ScalarizeFunction::runOnFunction(Function& F)
132
139
Instruction* currInst = &*sI ;
133
140
// Move iterator to next instruction BEFORE scalarizing current instruction
134
141
++sI ;
135
- dispatchInstructionToScalarize (currInst);
142
+ if (!m_Excludes.count (currInst))
143
+ {
144
+ dispatchInstructionToScalarize (currInst);
145
+ }
136
146
}
137
147
138
148
resolveVectorValues ();
@@ -161,6 +171,119 @@ bool ScalarizeFunction::runOnFunction(Function& F)
161
171
return true ;
162
172
}
163
173
174
+ void ScalarizeFunction::buildExclusiveSet ()
175
+ {
176
+ inst_iterator sI = inst_begin (m_currFunc);
177
+ inst_iterator sE = inst_end (m_currFunc);
178
+ std::vector<llvm::Value*> workset;
179
+ while (sI != sE )
180
+ {
181
+ Instruction* currInst = &*sI ;
182
+ ++sI ;
183
+ if (CallInst* CI = dyn_cast<CallInst>(currInst))
184
+ {
185
+ unsigned numOperands = CI->getNumArgOperands ();
186
+ for (unsigned i = 0 ; i < numOperands; i++)
187
+ {
188
+ Value* operand = CI->getArgOperand (i);
189
+ if (isa<VectorType>(operand->getType ()))
190
+ {
191
+ workset.push_back (operand);
192
+ }
193
+ }
194
+ }
195
+ else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
196
+ {
197
+ Value* scalarIndexVal = IEI->getOperand (2 );
198
+ // If the index is not a constant - we cannot statically remove this inst
199
+ if (!isa<ConstantInt>(scalarIndexVal)) {
200
+ workset.push_back (IEI);
201
+ }
202
+ }
203
+ else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
204
+ {
205
+ Value* scalarIndexVal = EEI->getOperand (1 );
206
+ // If the index is not a constant - we cannot statically remove this inst
207
+ if (!isa<ConstantInt>(scalarIndexVal)) {
208
+ workset.push_back (EEI->getOperand (0 ));
209
+ }
210
+ }
211
+ else if (auto STI = dyn_cast<StoreInst>(currInst))
212
+ {
213
+ auto V = STI->getValueOperand ();
214
+ if (V->getType ()->isVectorTy ())
215
+ {
216
+ workset.push_back (V);
217
+ }
218
+ }
219
+ }
220
+ while (!workset.empty ())
221
+ {
222
+ auto Def = workset.back ();
223
+ workset.pop_back ();
224
+ if (m_Excludes.count (Def))
225
+ {
226
+ continue ;
227
+ }
228
+ if (auto IEI = dyn_cast<InsertElementInst>(Def))
229
+ {
230
+ m_Excludes.insert (IEI);
231
+ if (!m_Excludes.count (IEI->getOperand (0 )) &&
232
+ (isa<PHINode>(IEI->getOperand (0 )) ||
233
+ isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
234
+ isa<InsertElementInst>(IEI->getOperand (0 ))))
235
+ {
236
+ workset.push_back (IEI->getOperand (0 ));
237
+ }
238
+ }
239
+ else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
240
+ {
241
+ m_Excludes.insert (SVI);
242
+ if (!m_Excludes.count (SVI->getOperand (0 )) &&
243
+ (isa<PHINode>(SVI->getOperand (0 )) ||
244
+ isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
245
+ isa<InsertElementInst>(SVI->getOperand (0 ))))
246
+ {
247
+ workset.push_back (SVI->getOperand (0 ));
248
+ }
249
+ if (!m_Excludes.count (SVI->getOperand (1 )) &&
250
+ (isa<PHINode>(SVI->getOperand (1 )) ||
251
+ isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
252
+ isa<InsertElementInst>(SVI->getOperand (1 ))))
253
+ {
254
+ workset.push_back (SVI->getOperand (1 ));
255
+ }
256
+ }
257
+ else if (auto PHI = dyn_cast<PHINode>(Def))
258
+ {
259
+ m_Excludes.insert (PHI);
260
+ for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
261
+ if (!m_Excludes.count (PHI->getOperand (i)) &&
262
+ (isa<PHINode>(PHI->getOperand (i)) ||
263
+ isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
264
+ isa<InsertElementInst>(PHI->getOperand (i))))
265
+ {
266
+ workset.push_back (PHI->getOperand (i));
267
+ }
268
+ }
269
+ else
270
+ {
271
+ continue ;
272
+ }
273
+ // check use
274
+ for (auto U : Def->users ())
275
+ {
276
+ if (!m_Excludes.count (U) &&
277
+ (isa<PHINode>(U) ||
278
+ isa<ShuffleVectorInst>(U) ||
279
+ isa<InsertElementInst>(U)))
280
+ {
281
+ workset.push_back (U);
282
+ }
283
+ }
284
+ }
285
+ }
286
+
164
287
void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
165
288
{
166
289
V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -235,13 +358,6 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
235
358
case Instruction::GetElementPtr:
236
359
scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
237
360
break ;
238
- case Instruction::Load:
239
- scalarizeInstruction (dyn_cast<LoadInst>(I));
240
- break ;
241
- case Instruction::Store:
242
- scalarizeInstruction (dyn_cast<StoreInst>(I));
243
- break ;
244
-
245
361
// The remaining instructions are not supported for scalarization. Keep "as is"
246
362
default :
247
363
recoverNonScalarizableInst (I);
@@ -892,149 +1008,6 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
892
1008
m_removedInsts.insert (GI);
893
1009
}
894
1010
895
- void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896
- {
897
- V_PRINT (scalarizer, " \t\t Load instruction\n " );
898
- IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899
-
900
- VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901
- if (isScalarizableLoadStoreType (dataType) && m_pDL)
902
- {
903
- // Prepare empty SCM entry for the instruction
904
- SCMEntry* newEntry = getSCMEntry (LI);
905
-
906
- // Get additional info from instruction
907
- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908
- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909
- IGC_ASSERT (elementSize);
910
- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911
- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912
- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913
-
914
- // Obtain scalarized arguments
915
- // 1 - to allow scalarizing Load with any pointer type
916
- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917
- #if 1
918
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919
- Value * GepPtr = LI->getOperand (0 );
920
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921
- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922
- Type* indexType = Type::getInt32Ty (*m_moduleContext);
923
- // Generate new (scalar) instructions
924
- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925
- newScalarizedInsts.resize (numDupElements);
926
- for (unsigned dup = 0 ; dup < numDupElements; dup++)
927
- {
928
- Constant* laneVal = ConstantInt::get (indexType, dup);
929
- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930
- newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931
- }
932
- #else
933
- GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934
- if (!operand || operand->getNumIndices() != 1)
935
- {
936
- return recoverNonScalarizableInst(LI);
937
- }
938
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939
- Value* GepPtr = operand->getPointerOperand();
940
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941
- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942
- Type* indexType = operand->getOperand(1)->getType();
943
- // Generate new (scalar) instructions
944
- Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945
- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946
- for (unsigned dup = 0; dup < numDupElements; dup++)
947
- {
948
- Constant* laneVal = ConstantInt::get(indexType, dup);
949
- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950
- Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951
- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952
- newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953
- }
954
- #endif
955
- // Add new value/s to SCM
956
- updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957
-
958
- // Remove original instruction
959
- m_removedInsts.insert (LI);
960
- return ;
961
- }
962
- return recoverNonScalarizableInst (LI);
963
- }
964
-
965
- void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966
- {
967
- V_PRINT (scalarizer, " \t\t Store instruction\n " );
968
- IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969
-
970
- int indexPtr = SI->getPointerOperandIndex ();
971
- int indexData = 1 - indexPtr;
972
- VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973
- if (isScalarizableLoadStoreType (dataType) && m_pDL)
974
- {
975
- // Get additional info from instruction
976
- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977
- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978
- IGC_ASSERT (elementSize);
979
- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980
- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981
-
982
- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983
-
984
- // Obtain scalarized arguments
985
- // 1 - to allow scalarizing Load with any pointer type
986
- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987
- #if 1
988
- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989
-
990
- bool opIsConst;
991
- obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992
-
993
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994
- Value* GepPtr = SI->getOperand (indexPtr);
995
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996
- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997
- Type* indexType = Type::getInt32Ty (*m_moduleContext);
998
- // Generate new (scalar) instructions
999
- for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000
- {
1001
- Constant* laneVal = ConstantInt::get (indexType, dup);
1002
- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003
- new StoreInst (operand0[dup], pGEP, SI);
1004
- }
1005
- #else
1006
- GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007
- if (!operand1 || operand1->getNumIndices() != 1)
1008
- {
1009
- return recoverNonScalarizableInst(SI);
1010
- }
1011
- Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012
- bool opIsConst;
1013
- obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014
-
1015
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016
- Value* GepPtr = operand1->getPointerOperand();
1017
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018
- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019
- Type* indexType = operand1->getOperand(1)->getType();
1020
- // Generate new (scalar) instructions
1021
- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022
- for (unsigned dup = 0; dup < numDupElements; dup++)
1023
- {
1024
- Constant* laneVal = ConstantInt::get(indexType, dup);
1025
- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026
- Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027
- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028
- new StoreInst(operand0[dup], pGEP, SI);
1029
- }
1030
- #endif
1031
- // Remove original instruction
1032
- m_removedInsts.insert (SI);
1033
- return ;
1034
- }
1035
- return recoverNonScalarizableInst (SI);
1036
- }
1037
-
1038
1011
void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
1039
1012
Value* origValue, Instruction* origInst, int destIdx)
1040
1013
{
@@ -1411,17 +1384,9 @@ void ScalarizeFunction::resolveDeferredInstructions()
1411
1384
m_DRL.clear ();
1412
1385
}
1413
1386
1414
- bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415
- {
1416
- // Scalarize Load/Store worth doing only if:
1417
- // 1. Gather/Scatter are supported
1418
- // 2. Load/Store type is a vector
1419
- return (m_ScalarizingVectorLDSTType && (NULL != type));
1420
- }
1421
-
1422
- extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
1387
+ extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1423
1388
{
1424
- return new ScalarizeFunction (scalarizingVectorLDSTType );
1389
+ return new ScalarizeFunction (selectiveScalarization );
1425
1390
}
1426
1391
1427
1392
0 commit comments