Skip to content

Commit 226e173

Browse files
mmereckisys_zuul
authored andcommitted
Improve MergeURBWrites to also merge URBWrite instructions with dynamic URB offsets.
Change-Id: I921de05f0a8778637ffea9057872b54834037580
1 parent 64f443f commit 226e173

File tree

1 file changed

+97
-36
lines changed

1 file changed

+97
-36
lines changed

IGC/Compiler/CISACodeGen/MergeURBWrites.cpp

Lines changed: 97 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ namespace
7373
FunctionPass(ID)
7474
{ }
7575

76-
virtual bool doInitialization(Function& F);
7776
virtual bool runOnFunction(Function& F);
7877

7978
virtual void getAnalysisUsage(AnalysisUsage& AU) const
@@ -94,8 +93,20 @@ namespace
9493
// in order of offsets and merging adjacent writes.
9594
void MergeInstructions();
9695

96+
// Returns the dynamic URB base offset and an immediate const offset
97+
// from the dynamic base. The function calculates the result by walking
98+
// the use-def chain of pUrbOffset.
99+
// If pUrbOffset is an immediate constant (==offset) then
100+
// <nullptr, offset> is returned.
101+
// In all other cases <pUrbOffset, 0> is returned.
102+
std::pair<Value*, unsigned int> GetBaseAndOffset(Value* pUrbOffset);
103+
97104
// represents the map (urb index) --> (instruction, instruction index in BB)
98-
std::vector<InstWithIndex> m_writeList;
105+
// The key consists of a dynamic URB base offset (key.first) and
106+
// an immediate offset from this dynamic base
107+
// Dynamic URB base offset is null if URB offset is constant.
108+
std::map<std::pair<Value*, unsigned int>, InstWithIndex> m_writeList;
109+
99110
bool m_bbModified;
100111
static char ID;
101112
};
@@ -109,13 +120,6 @@ namespace
109120

110121
} // end of unnamed namespace to contain class definition and auxiliary functions
111122

112-
/// Do initialization of the data structure.
113-
/// We want to allocate space for the vector only once.
114-
bool MergeURBWrites::doInitialization(Function& F)
115-
{
116-
m_writeList.reserve(128); //most of the time we won't exceed offset = 127
117-
return false;
118-
}
119123

120124
/// This optimization merges shorter writes to URB to get a smaller number of longer writes
121125
/// which is more efficient.
@@ -131,7 +135,7 @@ bool MergeURBWrites::doInitialization(Function& F)
131135
/// locations with one.
132136
///
133137
/// for now, we don't handle the following cases:
134-
/// 1) offset is a runtime value
138+
/// 1) channel mask is a runtime value
135139
/// 2) handling of writes of size >4
136140
/// so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
137141
/// this will be addressed in the future.
@@ -172,31 +176,28 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
172176
}
173177

174178
// intrinsic has the format: URB_write (%offset, %mask, %data0, ... , %data7)
175-
ConstantInt* pOffset = dyn_cast<ConstantInt>(iit->getOperand(0));
176179
ConstantInt* pImmediateMask = dyn_cast<ConstantInt>(iit->getOperand(1));
177-
if (pOffset == nullptr || pImmediateMask == nullptr || (GetChannelMask(intrinsic) > 0x0F))
180+
if (pImmediateMask == nullptr || (GetChannelMask(intrinsic) > 0x0F))
178181
{
179182
// for now, we don't handle the following cases:
180-
// 1) offset is a runtime value
181-
// 2) mask is a runtime value
182-
// 3) handling of writes of size >4
183+
// 1) mask is a runtime value
184+
// 2) handling of writes of size >4
183185
// so e.g. we don't handle |aaaa|bbbbbbbb|cccc| -> |aaaabbbb|bbbbcccc|
184186
// this will be addressed in the future
185187
continue;
186188
}
187-
const unsigned int offset = int_cast<unsigned int>(pOffset->getZExtValue());
188-
// if we reach outside of the vector, grow it (filling with nullptr)
189-
if (offset >= m_writeList.size())
190-
{
191-
m_writeList.resize(offset + 1);
192-
}
193-
auto elem = m_writeList[offset];
189+
190+
std::pair<Value*, unsigned int> baseAndOffset =
191+
GetBaseAndOffset(iit->getOperand(0));
192+
193+
auto it = m_writeList.find(baseAndOffset);
194194
// we encountered an instruction writing at the same offset,
195195
// most likely we write RTAI, VAI or PSIZE to vertex header
196196
// or we overwrite the old value
197-
if (elem.GetInst() != nullptr)
197+
if (it != m_writeList.end())
198198
{
199-
auto oldMask = GetChannelMask(m_writeList[offset].GetInst());
199+
const InstWithIndex& instWithIndex = it->second;
200+
auto oldMask = GetChannelMask(instWithIndex.GetInst());
200201
auto newMask = GetChannelMask(intrinsic);
201202
// assume the write lengths are <=4
202203
// if we have writes to the same channel, we retain the later one,
@@ -219,21 +220,21 @@ void MergeURBWrites::FillWriteList(BasicBlock& BB)
219220
{
220221
intrinsic->setOperand(
221222
opIndex + 2,
222-
m_writeList[offset].GetInst()->getOperand(opIndex + 2));
223+
instWithIndex.GetInst()->getOperand(opIndex + 2));
223224
}
224225
++opIndex;
225226
takeFromOlderMask = takeFromOlderMask >> 1;
226227
}
227228
// after transferring the operands, remove the old instruction and store the new one
228-
m_writeList[offset].GetInst()->eraseFromParent();
229+
instWithIndex.GetInst()->eraseFromParent();
229230
m_bbModified = true;
230-
m_writeList[offset] = InstWithIndex(intrinsic, instCounter);
231+
m_writeList[baseAndOffset] = InstWithIndex(intrinsic, instCounter);
231232
}
232233
}
233234
else
234235
{
235236
// adding new write at this offset
236-
m_writeList[offset] = InstWithIndex(intrinsic, instCounter);
237+
m_writeList[baseAndOffset] = InstWithIndex(intrinsic, instCounter);
237238
}
238239
}
239240
} // FillWriteList()
@@ -253,11 +254,24 @@ void MergeURBWrites::MergeInstructions()
253254
for (auto ii = m_writeList.begin(); ii != m_writeList.end() && ii != last; ++ii)
254255
{
255256
auto next = std::next(ii);
256-
if (ii->GetInst() == nullptr || next->GetInst() == nullptr)
257+
if (ii->second.GetInst() == nullptr || next->second.GetInst() == nullptr)
257258
{
258259
//nothing to do, no write at current or next offset
259260
continue;
260261
}
262+
263+
// ii->first.first is the dynamic URB base offset, may be nullptr
264+
// ii->first.second is the immediate constant offset from ii->first.first
265+
if (ii->first.first != next->first.first)
266+
{
267+
// nothing to do, different base URB offset
268+
continue;
269+
}
270+
if (ii->first.second + 1 != next->first.second)
271+
{
272+
// nothing to do, not a consecutive URB access
273+
continue;
274+
}
261275
// We have two instructions, merge them by moving operands from the one appearing
262276
// earlier in the BB to the one appearing later and increasing write length.
263277
//
@@ -281,13 +295,13 @@ void MergeURBWrites::MergeInstructions()
281295
// and 'next' corresponds to 'offset+1'.
282296
//
283297
// determine which instruction is appearing earlier in the BB
284-
const bool inOrder = ii->GetPlace() < next->GetPlace();
285-
CallInst* earlierInst = inOrder ? ii->GetInst() : next->GetInst();
286-
CallInst* laterInst = !inOrder ? ii->GetInst() : next->GetInst();
298+
const bool inOrder = ii->second.GetPlace() < next->second.GetPlace();
299+
CallInst* earlierInst = inOrder ? ii->second.GetInst() : next->second.GetInst();
300+
CallInst* laterInst = !inOrder ? ii->second.GetInst() : next->second.GetInst();
287301

288302
// merge per-channel write masks
289-
auto lowWriteMask = GetChannelMask(ii->GetInst());
290-
auto highWriteMask = GetChannelMask(next->GetInst());
303+
auto lowWriteMask = GetChannelMask(ii->second.GetInst());
304+
auto highWriteMask = GetChannelMask(next->second.GetInst());
291305
assert(lowWriteMask <= 0x0F && highWriteMask <= 0x0F);
292306
auto mergedMask = lowWriteMask | (highWriteMask << 4);
293307

@@ -306,7 +320,7 @@ void MergeURBWrites::MergeInstructions()
306320
}
307321

308322
// now take the smaller of the two offsets from the instruction in the current slot
309-
laterInst->setOperand(0, ii->GetInst()->getOperand(0));
323+
laterInst->setOperand(0, ii->second.GetInst()->getOperand(0));
310324
// and update the mask operand
311325
auto mergedMaskVal = llvm::ConstantInt::get(
312326
llvm::Type::getInt32Ty(laterInst->getParent()->getContext()),
@@ -317,12 +331,59 @@ void MergeURBWrites::MergeInstructions()
317331
earlierInst->eraseFromParent();
318332
m_bbModified = true;
319333
++ii; // skip the next slot since we just considered it as 'next'
320-
URBWrite8.push_back(laterInst == ii->GetInst() ? *ii : *next);
334+
if (nullptr == ii->first.first) // if URB offset is immediate const
335+
{
336+
URBWrite8.push_back(laterInst == ii->second.GetInst() ? ii->second : next->second);
337+
}
321338
} // for
322339

323340
} // MergeInstructions
324341

325342

343+
std::pair<Value*, unsigned int> MergeURBWrites::GetBaseAndOffset(Value* pUrbOffset)
344+
{
345+
Value* pBase = pUrbOffset;
346+
unsigned int offset = 0;
347+
348+
auto GetConstant = [](Value* pVal)->unsigned int
349+
{
350+
assert(isa<ConstantInt>(pVal));
351+
ConstantInt* pConst = cast<ConstantInt>(pVal);
352+
return int_cast<unsigned int>(pConst->getZExtValue());
353+
};
354+
355+
if (isa<ConstantInt>(pUrbOffset))
356+
{
357+
Value* pNullBase = nullptr;
358+
return std::make_pair(
359+
pNullBase,
360+
GetConstant(pUrbOffset));
361+
}
362+
else if (isa<Instruction>(pUrbOffset))
363+
{
364+
Instruction* pInstr = cast<Instruction>(pUrbOffset);
365+
if (pInstr->getOpcode() == Instruction::Add)
366+
{
367+
Value* src0 = pInstr->getOperand(0);
368+
Value* src1 = pInstr->getOperand(1);
369+
if (isa<ConstantInt>(src1))
370+
{
371+
auto baseAndOffset = GetBaseAndOffset(src0);
372+
pBase = baseAndOffset.first;
373+
offset = GetConstant(src1) + baseAndOffset.second;
374+
}
375+
else if (isa<ConstantInt>(src0))
376+
{
377+
auto baseAndOffset = GetBaseAndOffset(src1);
378+
pBase = baseAndOffset.first;
379+
offset = GetConstant(src0) + baseAndOffset.second;
380+
}
381+
}
382+
}
383+
384+
return std::make_pair(pBase, offset);
385+
}
386+
326387
llvm::FunctionPass* IGC::createMergeURBWritesPass()
327388
{
328389
return new MergeURBWrites();

0 commit comments

Comments
 (0)