@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
69
69
70
70
char ScalarizeFunction::ID = 0;
71
71
72
- ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
72
+ ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
73
73
{
74
74
initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
75
75
76
76
for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77
- m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
77
+ m_SelectiveScalarization = selectiveScalarization ;
78
78
79
79
// Initialize SCM buffers and allocation
80
80
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,6 +121,13 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121
121
m_SCM.clear ();
122
122
releaseAllSCMEntries ();
123
123
m_DRL.clear ();
124
+ m_Excludes.clear ();
125
+
126
+ // collecting instructions that we want to avoid scalarization
127
+ if (m_SelectiveScalarization)
128
+ {
129
+ buildExclusiveSet ();
130
+ }
124
131
125
132
// Scalarization. Iterate over all the instructions
126
133
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -132,7 +139,14 @@ bool ScalarizeFunction::runOnFunction(Function& F)
132
139
Instruction* currInst = &*sI ;
133
140
// Move iterator to next instruction BEFORE scalarizing current instruction
134
141
++sI ;
135
- dispatchInstructionToScalarize (currInst);
142
+ if (m_Excludes.count (currInst))
143
+ {
144
+ recoverNonScalarizableInst (currInst);
145
+ }
146
+ else
147
+ {
148
+ dispatchInstructionToScalarize (currInst);
149
+ }
136
150
}
137
151
138
152
resolveVectorValues ();
@@ -161,6 +175,111 @@ bool ScalarizeFunction::runOnFunction(Function& F)
161
175
return true ;
162
176
}
163
177
178
+ void ScalarizeFunction::buildExclusiveSet ()
179
+ {
180
+ inst_iterator sI = inst_begin (m_currFunc);
181
+ inst_iterator sE = inst_end (m_currFunc);
182
+ std::vector<llvm::Value*> workset;
183
+ while (sI != sE )
184
+ {
185
+ Instruction* currInst = &*sI ;
186
+ ++sI ;
187
+ if (CallInst* CI = dyn_cast<CallInst>(currInst))
188
+ {
189
+ unsigned numOperands = CI->getNumArgOperands ();
190
+ for (unsigned i = 0 ; i < numOperands; i++)
191
+ {
192
+ Value* operand = CI->getArgOperand (i);
193
+ if (isa<VectorType>(operand->getType ()))
194
+ {
195
+ workset.push_back (operand);
196
+ }
197
+ }
198
+ }
199
+ else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200
+ {
201
+ Value* scalarIndexVal = IEI->getOperand (2 );
202
+ // If the index is not a constant - we cannot statically remove this inst
203
+ if (!isa<ConstantInt>(scalarIndexVal)) {
204
+ workset.push_back (IEI);
205
+ }
206
+ }
207
+ else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208
+ {
209
+ Value* scalarIndexVal = EEI->getOperand (1 );
210
+ // If the index is not a constant - we cannot statically remove this inst
211
+ if (!isa<ConstantInt>(scalarIndexVal)) {
212
+ workset.push_back (EEI->getOperand (0 ));
213
+ }
214
+ }
215
+ }
216
+ while (!workset.empty ())
217
+ {
218
+ auto Def = workset.back ();
219
+ workset.pop_back ();
220
+ if (m_Excludes.count (Def))
221
+ {
222
+ continue ;
223
+ }
224
+ if (auto IEI = dyn_cast<InsertElementInst>(Def))
225
+ {
226
+ m_Excludes.insert (IEI);
227
+ if (!m_Excludes.count (IEI->getOperand (0 )) &&
228
+ (isa<PHINode>(IEI->getOperand (0 )) ||
229
+ isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
230
+ isa<InsertElementInst>(IEI->getOperand (0 ))))
231
+ {
232
+ workset.push_back (IEI->getOperand (0 ));
233
+ }
234
+ }
235
+ else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
236
+ {
237
+ m_Excludes.insert (SVI);
238
+ if (!m_Excludes.count (SVI->getOperand (0 )) &&
239
+ (isa<PHINode>(SVI->getOperand (0 )) ||
240
+ isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
241
+ isa<InsertElementInst>(SVI->getOperand (0 ))))
242
+ {
243
+ workset.push_back (SVI->getOperand (0 ));
244
+ }
245
+ if (!m_Excludes.count (SVI->getOperand (1 )) &&
246
+ (isa<PHINode>(SVI->getOperand (1 )) ||
247
+ isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
248
+ isa<InsertElementInst>(SVI->getOperand (1 ))))
249
+ {
250
+ workset.push_back (SVI->getOperand (1 ));
251
+ }
252
+ }
253
+ else if (auto PHI = dyn_cast<PHINode>(Def))
254
+ {
255
+ m_Excludes.insert (PHI);
256
+ for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
257
+ if (!m_Excludes.count (PHI->getOperand (i)) &&
258
+ (isa<PHINode>(PHI->getOperand (i)) ||
259
+ isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
260
+ isa<InsertElementInst>(PHI->getOperand (i))))
261
+ {
262
+ workset.push_back (PHI->getOperand (i));
263
+ }
264
+ }
265
+ else
266
+ {
267
+ continue ;
268
+ }
269
+ // check use
270
+ for (auto U : Def->users ())
271
+ {
272
+ if (!m_Excludes.count (U) &&
273
+ (isa<PHINode>(U) ||
274
+ isa<ShuffleVectorInst>(U) ||
275
+ isa<InsertElementInst>(U)))
276
+ {
277
+ workset.push_back (U);
278
+ }
279
+ }
280
+ }
281
+ }
282
+
164
283
void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
165
284
{
166
285
V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -235,13 +354,6 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
235
354
case Instruction::GetElementPtr:
236
355
scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
237
356
break ;
238
- case Instruction::Load:
239
- scalarizeInstruction (dyn_cast<LoadInst>(I));
240
- break ;
241
- case Instruction::Store:
242
- scalarizeInstruction (dyn_cast<StoreInst>(I));
243
- break ;
244
-
245
357
// The remaining instructions are not supported for scalarization. Keep "as is"
246
358
default :
247
359
recoverNonScalarizableInst (I);
@@ -892,149 +1004,6 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
892
1004
m_removedInsts.insert (GI);
893
1005
}
894
1006
895
- void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896
- {
897
- V_PRINT (scalarizer, " \t\t Load instruction\n " );
898
- IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899
-
900
- VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901
- if (isScalarizableLoadStoreType (dataType) && m_pDL)
902
- {
903
- // Prepare empty SCM entry for the instruction
904
- SCMEntry* newEntry = getSCMEntry (LI);
905
-
906
- // Get additional info from instruction
907
- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908
- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909
- IGC_ASSERT (elementSize);
910
- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911
- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912
- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913
-
914
- // Obtain scalarized arguments
915
- // 1 - to allow scalarizing Load with any pointer type
916
- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917
- #if 1
918
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919
- Value * GepPtr = LI->getOperand (0 );
920
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921
- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922
- Type* indexType = Type::getInt32Ty (*m_moduleContext);
923
- // Generate new (scalar) instructions
924
- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925
- newScalarizedInsts.resize (numDupElements);
926
- for (unsigned dup = 0 ; dup < numDupElements; dup++)
927
- {
928
- Constant* laneVal = ConstantInt::get (indexType, dup);
929
- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930
- newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931
- }
932
- #else
933
- GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934
- if (!operand || operand->getNumIndices() != 1)
935
- {
936
- return recoverNonScalarizableInst(LI);
937
- }
938
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939
- Value* GepPtr = operand->getPointerOperand();
940
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941
- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942
- Type* indexType = operand->getOperand(1)->getType();
943
- // Generate new (scalar) instructions
944
- Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945
- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946
- for (unsigned dup = 0; dup < numDupElements; dup++)
947
- {
948
- Constant* laneVal = ConstantInt::get(indexType, dup);
949
- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950
- Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951
- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952
- newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953
- }
954
- #endif
955
- // Add new value/s to SCM
956
- updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957
-
958
- // Remove original instruction
959
- m_removedInsts.insert (LI);
960
- return ;
961
- }
962
- return recoverNonScalarizableInst (LI);
963
- }
964
-
965
- void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966
- {
967
- V_PRINT (scalarizer, " \t\t Store instruction\n " );
968
- IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969
-
970
- int indexPtr = SI->getPointerOperandIndex ();
971
- int indexData = 1 - indexPtr;
972
- VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973
- if (isScalarizableLoadStoreType (dataType) && m_pDL)
974
- {
975
- // Get additional info from instruction
976
- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977
- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978
- IGC_ASSERT (elementSize);
979
- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980
- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981
-
982
- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983
-
984
- // Obtain scalarized arguments
985
- // 1 - to allow scalarizing Load with any pointer type
986
- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987
- #if 1
988
- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989
-
990
- bool opIsConst;
991
- obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992
-
993
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994
- Value* GepPtr = SI->getOperand (indexPtr);
995
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996
- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997
- Type* indexType = Type::getInt32Ty (*m_moduleContext);
998
- // Generate new (scalar) instructions
999
- for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000
- {
1001
- Constant* laneVal = ConstantInt::get (indexType, dup);
1002
- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003
- new StoreInst (operand0[dup], pGEP, SI);
1004
- }
1005
- #else
1006
- GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007
- if (!operand1 || operand1->getNumIndices() != 1)
1008
- {
1009
- return recoverNonScalarizableInst(SI);
1010
- }
1011
- Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012
- bool opIsConst;
1013
- obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014
-
1015
- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016
- Value* GepPtr = operand1->getPointerOperand();
1017
- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018
- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019
- Type* indexType = operand1->getOperand(1)->getType();
1020
- // Generate new (scalar) instructions
1021
- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022
- for (unsigned dup = 0; dup < numDupElements; dup++)
1023
- {
1024
- Constant* laneVal = ConstantInt::get(indexType, dup);
1025
- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026
- Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027
- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028
- new StoreInst(operand0[dup], pGEP, SI);
1029
- }
1030
- #endif
1031
- // Remove original instruction
1032
- m_removedInsts.insert (SI);
1033
- return ;
1034
- }
1035
- return recoverNonScalarizableInst (SI);
1036
- }
1037
-
1038
1007
void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
1039
1008
Value* origValue, Instruction* origInst, int destIdx)
1040
1009
{
@@ -1411,17 +1380,9 @@ void ScalarizeFunction::resolveDeferredInstructions()
1411
1380
m_DRL.clear ();
1412
1381
}
1413
1382
1414
- bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415
- {
1416
- // Scalarize Load/Store worth doing only if:
1417
- // 1. Gather/Scatter are supported
1418
- // 2. Load/Store type is a vector
1419
- return (m_ScalarizingVectorLDSTType && (NULL != type));
1420
- }
1421
-
1422
- extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
1383
+ extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1423
1384
{
1424
- return new ScalarizeFunction (scalarizingVectorLDSTType );
1385
+ return new ScalarizeFunction (selectiveScalarization );
1425
1386
}
1426
1387
1427
1388
0 commit comments