@@ -73,7 +73,6 @@ namespace
73
73
FunctionPass (ID)
74
74
{ }
75
75
76
- virtual bool doInitialization (Function& F);
77
76
virtual bool runOnFunction (Function& F);
78
77
79
78
virtual void getAnalysisUsage (AnalysisUsage& AU) const
@@ -94,8 +93,20 @@ namespace
94
93
// in order of offsets and merging adjacent writes.
95
94
void MergeInstructions ();
96
95
96
+ // Returns the dynamic URB base offset and an immediate const offset
97
+ // from the dynamic base. The function calculates the result by walking
98
+ // the use-def chain of pUrbOffset.
99
+ // If pUrbOffset is an immediate constant (==offset) then
100
+ // <nullptr, offset> is returned.
101
+ // In all other cases <pUrbOffset, 0> is returned.
102
+ std::pair<Value*, unsigned int > GetBaseAndOffset (Value* pUrbOffset);
103
+
97
104
// represents the map (urb index) --> (instruction, instruction index in BB)
98
- std::vector<InstWithIndex> m_writeList;
105
+ // The key consists of a dynamic URB base offset (key.first) and
106
+ // an immediate offset from this dynamic base
107
+ // Dynamic URB base offset is null if URB offset is constant.
108
+ std::map<std::pair<Value*, unsigned int >, InstWithIndex> m_writeList;
109
+
99
110
bool m_bbModified;
100
111
static char ID;
101
112
};
@@ -109,13 +120,6 @@ namespace
109
120
110
121
} // end of unnamed namespace to contain class definition and auxiliary functions
111
122
112
- // / Do initialization of the data structure.
113
- // / We want to allocate space for the vector only once.
114
- bool MergeURBWrites::doInitialization (Function& F)
115
- {
116
- m_writeList.reserve (128 ); // most of the time we won't exceed offset = 127
117
- return false ;
118
- }
119
123
120
124
// / This optimization merges shorter writes to URB to get a smaller number of longer writes
121
125
// / which is more efficient.
@@ -131,7 +135,7 @@ bool MergeURBWrites::doInitialization(Function& F)
131
135
// / locations with one.
132
136
// /
133
137
// / for now, we don't handle the following cases:
134
- // / 1) offset is a runtime value
138
+ // / 1) channel mask is a runtime value
135
139
// / 2) handling of writes of size >4
136
140
// / so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
137
141
// / this will be addressed in the future.
@@ -172,31 +176,28 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
172
176
}
173
177
174
178
// intrinsic has the format: URB_write (%offset, %mask, %data0, ... , %data7)
175
- ConstantInt* pOffset = dyn_cast<ConstantInt>(iit->getOperand (0 ));
176
179
ConstantInt* pImmediateMask = dyn_cast<ConstantInt>(iit->getOperand (1 ));
177
- if (pOffset == nullptr || pImmediateMask == nullptr || (GetChannelMask (intrinsic) > 0x0F ))
180
+ if (pImmediateMask == nullptr || (GetChannelMask (intrinsic) > 0x0F ))
178
181
{
179
182
// for now, we don't handle the following cases:
180
- // 1) offset is a runtime value
181
- // 2) mask is a runtime value
182
- // 3) handling of writes of size >4
183
+ // 1) mask is a runtime value
184
+ // 2) handling of writes of size >4
183
185
// so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
184
186
// this will be addressed in the future
185
187
continue ;
186
188
}
187
- const unsigned int offset = int_cast<unsigned int >(pOffset->getZExtValue ());
188
- // if we reach outside of the vector, grow it (filling with nullptr)
189
- if (offset >= m_writeList.size ())
190
- {
191
- m_writeList.resize (offset + 1 );
192
- }
193
- auto elem = m_writeList[offset];
189
+
190
+ std::pair<Value*, unsigned int > baseAndOffset =
191
+ GetBaseAndOffset (iit->getOperand (0 ));
192
+
193
+ auto it = m_writeList.find (baseAndOffset);
194
194
// we encountered an instruction writing at the same offset,
195
195
// most likely we write RTAI, VAI or PSIZE to vertex header
196
196
// or we overwrite the old value
197
- if (elem. GetInst () != nullptr )
197
+ if (it != m_writeList. end () )
198
198
{
199
- auto oldMask = GetChannelMask (m_writeList[offset].GetInst ());
199
+ const InstWithIndex& instWithIndex = it->second ;
200
+ auto oldMask = GetChannelMask (instWithIndex.GetInst ());
200
201
auto newMask = GetChannelMask (intrinsic);
201
202
// assume the write lengths are <=4
202
203
// if we have writes to the same channel, we retain the later one,
@@ -219,21 +220,21 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
219
220
{
220
221
intrinsic->setOperand (
221
222
opIndex + 2 ,
222
- m_writeList[offset] .GetInst ()->getOperand (opIndex + 2 ));
223
+ instWithIndex .GetInst ()->getOperand (opIndex + 2 ));
223
224
}
224
225
++opIndex;
225
226
takeFromOlderMask = takeFromOlderMask >> 1 ;
226
227
}
227
228
// after transferring the operands, remove the old instruction and store the new one
228
- m_writeList[offset] .GetInst ()->eraseFromParent ();
229
+ instWithIndex .GetInst ()->eraseFromParent ();
229
230
m_bbModified = true ;
230
- m_writeList[offset ] = InstWithIndex (intrinsic, instCounter);
231
+ m_writeList[baseAndOffset ] = InstWithIndex (intrinsic, instCounter);
231
232
}
232
233
}
233
234
else
234
235
{
235
236
// adding new write at this offset
236
- m_writeList[offset ] = InstWithIndex (intrinsic, instCounter);
237
+ m_writeList[baseAndOffset ] = InstWithIndex (intrinsic, instCounter);
237
238
}
238
239
}
239
240
} // FillWriteList()
@@ -253,11 +254,24 @@ void MergeURBWrites::MergeInstructions()
253
254
for (auto ii = m_writeList.begin (); ii != m_writeList.end () && ii != last; ++ii)
254
255
{
255
256
auto next = std::next (ii);
256
- if (ii->GetInst () == nullptr || next->GetInst () == nullptr )
257
+ if (ii->second . GetInst () == nullptr || next->second . GetInst () == nullptr )
257
258
{
258
259
// nothing to do, no write at current or next offset
259
260
continue ;
260
261
}
262
+
263
+ // ii->first.first is the dynamic URB base offset, may be nullptr
264
+ // ii->first.second is the immediate constant offset from ii->first.first
265
+ if (ii->first .first != next->first .first )
266
+ {
267
+ // nothing to do, different base URB offset
268
+ continue ;
269
+ }
270
+ if (ii->first .second + 1 != next->first .second )
271
+ {
272
+ // nothing to do, not a consecutive URB access
273
+ continue ;
274
+ }
261
275
// We have two instructions, merge them by moving operands from the one appearing
262
276
// earlier in the BB to the one appearing later and increasing write length.
263
277
//
@@ -281,13 +295,13 @@ void MergeURBWrites::MergeInstructions()
281
295
// and 'next' corresponds to 'offset+1'.
282
296
//
283
297
// determine which instruction is appearing earlier in the BB
284
- const bool inOrder = ii->GetPlace () < next->GetPlace ();
285
- CallInst* earlierInst = inOrder ? ii->GetInst () : next->GetInst ();
286
- CallInst* laterInst = !inOrder ? ii->GetInst () : next->GetInst ();
298
+ const bool inOrder = ii->second . GetPlace () < next->second . GetPlace ();
299
+ CallInst* earlierInst = inOrder ? ii->second . GetInst () : next->second . GetInst ();
300
+ CallInst* laterInst = !inOrder ? ii->second . GetInst () : next->second . GetInst ();
287
301
288
302
// merge per-channel write masks
289
- auto lowWriteMask = GetChannelMask (ii->GetInst ());
290
- auto highWriteMask = GetChannelMask (next->GetInst ());
303
+ auto lowWriteMask = GetChannelMask (ii->second . GetInst ());
304
+ auto highWriteMask = GetChannelMask (next->second . GetInst ());
291
305
assert (lowWriteMask <= 0x0F && highWriteMask <= 0x0F );
292
306
auto mergedMask = lowWriteMask | (highWriteMask << 4 );
293
307
@@ -306,7 +320,7 @@ void MergeURBWrites::MergeInstructions()
306
320
}
307
321
308
322
// now take the smaller of the two offsets from the instruction in the current slot
309
- laterInst->setOperand (0 , ii->GetInst ()->getOperand (0 ));
323
+ laterInst->setOperand (0 , ii->second . GetInst ()->getOperand (0 ));
310
324
// and update the mask operand
311
325
auto mergedMaskVal = llvm::ConstantInt::get (
312
326
llvm::Type::getInt32Ty (laterInst->getParent ()->getContext ()),
@@ -317,12 +331,59 @@ void MergeURBWrites::MergeInstructions()
317
331
earlierInst->eraseFromParent ();
318
332
m_bbModified = true ;
319
333
++ii; // skip the next slot since we just considered it as 'next'
320
- URBWrite8.push_back (laterInst == ii->GetInst () ? *ii : *next);
334
+ if (nullptr == ii->first .first ) // if URB offset is immediate const
335
+ {
336
+ URBWrite8.push_back (laterInst == ii->second .GetInst () ? ii->second : next->second );
337
+ }
321
338
} // for
322
339
323
340
} // MergeInstructions
324
341
325
342
343
+ std::pair<Value*, unsigned int > MergeURBWrites::GetBaseAndOffset (Value* pUrbOffset)
344
+ {
345
+ Value* pBase = pUrbOffset;
346
+ unsigned int offset = 0 ;
347
+
348
+ auto GetConstant = [](Value* pVal)->unsigned int
349
+ {
350
+ assert (isa<ConstantInt>(pVal));
351
+ ConstantInt* pConst = cast<ConstantInt>(pVal);
352
+ return int_cast<unsigned int >(pConst->getZExtValue ());
353
+ };
354
+
355
+ if (isa<ConstantInt>(pUrbOffset))
356
+ {
357
+ Value* pNullBase = nullptr ;
358
+ return std::make_pair (
359
+ pNullBase,
360
+ GetConstant (pUrbOffset));
361
+ }
362
+ else if (isa<Instruction>(pUrbOffset))
363
+ {
364
+ Instruction* pInstr = cast<Instruction>(pUrbOffset);
365
+ if (pInstr->getOpcode () == Instruction::Add)
366
+ {
367
+ Value* src0 = pInstr->getOperand (0 );
368
+ Value* src1 = pInstr->getOperand (1 );
369
+ if (isa<ConstantInt>(src1))
370
+ {
371
+ auto baseAndOffset = GetBaseAndOffset (src0);
372
+ pBase = baseAndOffset.first ;
373
+ offset = GetConstant (src1) + baseAndOffset.second ;
374
+ }
375
+ else if (isa<ConstantInt>(src0))
376
+ {
377
+ auto baseAndOffset = GetBaseAndOffset (src1);
378
+ pBase = baseAndOffset.first ;
379
+ offset = GetConstant (src0) + baseAndOffset.second ;
380
+ }
381
+ }
382
+ }
383
+
384
+ return std::make_pair (pBase, offset);
385
+ }
386
+
326
387
llvm::FunctionPass* IGC::createMergeURBWritesPass ()
327
388
{
328
389
return new MergeURBWrites ();
0 commit comments