@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
69
69
70
70
char ScalarizeFunction::ID = 0;
71
71
72
- ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
72
+ ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
73
73
{
74
74
initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
75
75
76
76
for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77
- m_SelectiveScalarization = selectiveScalarization ;
77
+ m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
78
78
79
79
// Initialize SCM buffers and allocation
80
80
m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121
121
m_SCM.clear ();
122
122
releaseAllSCMEntries ();
123
123
m_DRL.clear ();
124
- m_Excludes.clear ();
125
-
126
- // collecting instructions that we want to avoid scalarization
127
- if (m_SelectiveScalarization)
128
- {
129
- buildExclusiveSet ();
130
- }
131
124
132
125
// Scalarization. Iterate over all the instructions
133
126
// Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,10 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139
132
Instruction* currInst = &*sI ;
140
133
// Move iterator to next instruction BEFORE scalarizing current instruction
141
134
++sI ;
142
- if (!m_Excludes.count (currInst))
143
- {
144
- dispatchInstructionToScalarize (currInst);
145
- }
135
+ dispatchInstructionToScalarize (currInst);
146
136
}
147
137
148
138
resolveVectorValues ();
@@ -171,119 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
171
161
return true ;
172
162
}
173
163
174
- void ScalarizeFunction::buildExclusiveSet ()
175
- {
176
- inst_iterator sI = inst_begin (m_currFunc);
177
- inst_iterator sE = inst_end (m_currFunc);
178
- std::vector<llvm::Value*> workset;
179
- while (sI != sE )
180
- {
181
- Instruction* currInst = &*sI ;
182
- ++sI ;
183
- if (CallInst* CI = dyn_cast<CallInst>(currInst))
184
- {
185
- unsigned numOperands = CI->getNumArgOperands ();
186
- for (unsigned i = 0 ; i < numOperands; i++)
187
- {
188
- Value* operand = CI->getArgOperand (i);
189
- if (isa<VectorType>(operand->getType ()))
190
- {
191
- workset.push_back (operand);
192
- }
193
- }
194
- }
195
- else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
196
- {
197
- Value* scalarIndexVal = IEI->getOperand (2 );
198
- // If the index is not a constant - we cannot statically remove this inst
199
- if (!isa<ConstantInt>(scalarIndexVal)) {
200
- workset.push_back (IEI);
201
- }
202
- }
203
- else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
204
- {
205
- Value* scalarIndexVal = EEI->getOperand (1 );
206
- // If the index is not a constant - we cannot statically remove this inst
207
- if (!isa<ConstantInt>(scalarIndexVal)) {
208
- workset.push_back (EEI->getOperand (0 ));
209
- }
210
- }
211
- else if (auto STI = dyn_cast<StoreInst>(currInst))
212
- {
213
- auto V = STI->getValueOperand ();
214
- if (V->getType ()->isVectorTy ())
215
- {
216
- workset.push_back (V);
217
- }
218
- }
219
- }
220
- while (!workset.empty ())
221
- {
222
- auto Def = workset.back ();
223
- workset.pop_back ();
224
- if (m_Excludes.count (Def))
225
- {
226
- continue ;
227
- }
228
- if (auto IEI = dyn_cast<InsertElementInst>(Def))
229
- {
230
- m_Excludes.insert (IEI);
231
- if (!m_Excludes.count (IEI->getOperand (0 )) &&
232
- (isa<PHINode>(IEI->getOperand (0 )) ||
233
- isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
234
- isa<InsertElementInst>(IEI->getOperand (0 ))))
235
- {
236
- workset.push_back (IEI->getOperand (0 ));
237
- }
238
- }
239
- else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
240
- {
241
- m_Excludes.insert (SVI);
242
- if (!m_Excludes.count (SVI->getOperand (0 )) &&
243
- (isa<PHINode>(SVI->getOperand (0 )) ||
244
- isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
245
- isa<InsertElementInst>(SVI->getOperand (0 ))))
246
- {
247
- workset.push_back (SVI->getOperand (0 ));
248
- }
249
- if (!m_Excludes.count (SVI->getOperand (1 )) &&
250
- (isa<PHINode>(SVI->getOperand (1 )) ||
251
- isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
252
- isa<InsertElementInst>(SVI->getOperand (1 ))))
253
- {
254
- workset.push_back (SVI->getOperand (1 ));
255
- }
256
- }
257
- else if (auto PHI = dyn_cast<PHINode>(Def))
258
- {
259
- m_Excludes.insert (PHI);
260
- for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
261
- if (!m_Excludes.count (PHI->getOperand (i)) &&
262
- (isa<PHINode>(PHI->getOperand (i)) ||
263
- isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
264
- isa<InsertElementInst>(PHI->getOperand (i))))
265
- {
266
- workset.push_back (PHI->getOperand (i));
267
- }
268
- }
269
- else
270
- {
271
- continue ;
272
- }
273
- // check use
274
- for (auto U : Def->users ())
275
- {
276
- if (!m_Excludes.count (U) &&
277
- (isa<PHINode>(U) ||
278
- isa<ShuffleVectorInst>(U) ||
279
- isa<InsertElementInst>(U)))
280
- {
281
- workset.push_back (U);
282
- }
283
- }
284
- }
285
- }
286
-
287
164
void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
288
165
{
289
166
V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -358,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
358
235
case Instruction::GetElementPtr:
359
236
scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
360
237
break ;
238
+ case Instruction::Load:
239
+ scalarizeInstruction (dyn_cast<LoadInst>(I));
240
+ break ;
241
+ case Instruction::Store:
242
+ scalarizeInstruction (dyn_cast<StoreInst>(I));
243
+ break ;
244
+
361
245
// The remaining instructions are not supported for scalarization. Keep "as is"
362
246
default :
363
247
recoverNonScalarizableInst (I);
@@ -1008,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1008
892
m_removedInsts.insert (GI);
1009
893
}
1010
894
895
+ void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896
+ {
897
+ V_PRINT (scalarizer, " \t\t Load instruction\n " );
898
+ IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899
+
900
+ VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901
+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
902
+ {
903
+ // Prepare empty SCM entry for the instruction
904
+ SCMEntry* newEntry = getSCMEntry (LI);
905
+
906
+ // Get additional info from instruction
907
+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908
+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909
+ IGC_ASSERT (elementSize);
910
+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911
+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912
+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913
+
914
+ // Obtain scalarized arguments
915
+ // 1 - to allow scalarizing Load with any pointer type
916
+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917
+ #if 1
918
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919
+ Value * GepPtr = LI->getOperand (0 );
920
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921
+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922
+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
923
+ // Generate new (scalar) instructions
924
+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925
+ newScalarizedInsts.resize (numDupElements);
926
+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
927
+ {
928
+ Constant* laneVal = ConstantInt::get (indexType, dup);
929
+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930
+ newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931
+ }
932
+ #else
933
+ GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934
+ if (!operand || operand->getNumIndices() != 1)
935
+ {
936
+ return recoverNonScalarizableInst(LI);
937
+ }
938
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939
+ Value* GepPtr = operand->getPointerOperand();
940
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941
+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942
+ Type* indexType = operand->getOperand(1)->getType();
943
+ // Generate new (scalar) instructions
944
+ Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945
+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946
+ for (unsigned dup = 0; dup < numDupElements; dup++)
947
+ {
948
+ Constant* laneVal = ConstantInt::get(indexType, dup);
949
+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950
+ Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951
+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952
+ newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953
+ }
954
+ #endif
955
+ // Add new value/s to SCM
956
+ updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957
+
958
+ // Remove original instruction
959
+ m_removedInsts.insert (LI);
960
+ return ;
961
+ }
962
+ return recoverNonScalarizableInst (LI);
963
+ }
964
+
965
+ void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966
+ {
967
+ V_PRINT (scalarizer, " \t\t Store instruction\n " );
968
+ IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969
+
970
+ int indexPtr = SI->getPointerOperandIndex ();
971
+ int indexData = 1 - indexPtr;
972
+ VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973
+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
974
+ {
975
+ // Get additional info from instruction
976
+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977
+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978
+ IGC_ASSERT (elementSize);
979
+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980
+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981
+
982
+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983
+
984
+ // Obtain scalarized arguments
985
+ // 1 - to allow scalarizing Load with any pointer type
986
+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987
+ #if 1
988
+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989
+
990
+ bool opIsConst;
991
+ obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992
+
993
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994
+ Value* GepPtr = SI->getOperand (indexPtr);
995
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996
+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997
+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
998
+ // Generate new (scalar) instructions
999
+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000
+ {
1001
+ Constant* laneVal = ConstantInt::get (indexType, dup);
1002
+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003
+ new StoreInst (operand0[dup], pGEP, SI);
1004
+ }
1005
+ #else
1006
+ GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007
+ if (!operand1 || operand1->getNumIndices() != 1)
1008
+ {
1009
+ return recoverNonScalarizableInst(SI);
1010
+ }
1011
+ Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012
+ bool opIsConst;
1013
+ obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014
+
1015
+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016
+ Value* GepPtr = operand1->getPointerOperand();
1017
+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018
+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019
+ Type* indexType = operand1->getOperand(1)->getType();
1020
+ // Generate new (scalar) instructions
1021
+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022
+ for (unsigned dup = 0; dup < numDupElements; dup++)
1023
+ {
1024
+ Constant* laneVal = ConstantInt::get(indexType, dup);
1025
+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026
+ Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027
+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028
+ new StoreInst(operand0[dup], pGEP, SI);
1029
+ }
1030
+ #endif
1031
+ // Remove original instruction
1032
+ m_removedInsts.insert (SI);
1033
+ return ;
1034
+ }
1035
+ return recoverNonScalarizableInst (SI);
1036
+ }
1037
+
1011
1038
void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
1012
1039
Value* origValue, Instruction* origInst, int destIdx)
1013
1040
{
@@ -1384,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
1384
1411
m_DRL.clear ();
1385
1412
}
1386
1413
1387
- extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1414
+ bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415
+ {
1416
+ // Scalarize Load/Store worth doing only if:
1417
+ // 1. Gather/Scatter are supported
1418
+ // 2. Load/Store type is a vector
1419
+ return (m_ScalarizingVectorLDSTType && (NULL != type));
1420
+ }
1421
+
1422
+ extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
1388
1423
{
1389
- return new ScalarizeFunction (selectiveScalarization );
1424
+ return new ScalarizeFunction (scalarizingVectorLDSTType );
1390
1425
}
1391
1426
1392
1427
0 commit comments