Skip to content

Commit 77f8e09

Browse files
jgu222igcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: a0caa5f
Using ptr as address payload's type By design, address payload updating builtins does not create a new address payload. If the payload's type is int, not pointer, the updating builtin would be like the following: (1) int addrP0 = createAddrPayload(...) (2) v = block2d_read (addrP0, ....) (3) int addrP1 = setBlockX(addrP0, ...) In llvm IR, those addrP0 and addrP1 are different llvm values. And it is legal for llvm to reorder instructions into { (1), (3), (2) }, which is incorrect as (2) should use the original addrP0, not the updated one (addrP1). For this reason, address payload should be of pointer type. With ptr type, the above would be: (1) int* addrP0 = createAddrPayload(...) (2) v = block2d_read (addrP0, ....) (3) etBlockX(addrP0, ...) The llvm cannot reorder them as there are dependences b/w (2) and (3). This PR makes address payload's type to be pointer type (int*).
1 parent 5519219 commit 77f8e09

File tree

9 files changed

+270
-203
lines changed

9 files changed

+270
-203
lines changed

IGC/BiFModule/Implementation/IGCBiF_Intrinsics_Lsc.cl

Lines changed: 73 additions & 74 deletions
Large diffs are not rendered by default.

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3390,6 +3390,7 @@ CVariable* CShader::GetSymbol(llvm::Value* value, bool fromConstantPool,
33903390
switch (GII->getIntrinsicID()) {
33913391
case GenISAIntrinsic::GenISA_LSC2DBlockCreateAddrPayload:
33923392
case GenISAIntrinsic::GenISA_LSC2DBlockCopyAddrPayload:
3393+
case GenISAIntrinsic::GenISA_LSC2DBlockSetAddrPayloadField:
33933394
{
33943395
// Address Payload is opaque, no alias to it. It takes 8DW.
33953396
auto thisAlign = (m_Platform->getGRFSize() == 64 ? EALIGN_32WORD : EALIGN_HWORD);

IGC/Compiler/CISACodeGen/DeSSA.cpp

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,9 @@ bool DeSSA::runOnFunction(Function& MF)
397397
CoalesceAliasInst();
398398
CoalesceInsertElements();
399399

400+
// Special coalescing
401+
CoalesceOthers();
402+
400403
// checkPHILoopInput
401404
// PreHeader:
402405
// x = ...
@@ -468,7 +471,7 @@ bool DeSSA::runOnFunction(Function& MF)
468471
for (BasicBlock::iterator BBI = I->begin(), BBE = I->end();
469472
BBI != BBE; ++BBI) {
470473
PHINode* PHI = dyn_cast<PHINode>(BBI);
471-
if (!PHI) {
474+
if (!PHI || isProcessed(PHI)) {
472475
break;
473476
}
474477

@@ -1296,6 +1299,79 @@ DeSSA::CoalesceInsertElements()
12961299
}
12971300
}
12981301

1302+
// This is to coalesce special instructions
1303+
void DeSSA::CoalesceOthers()
1304+
{
1305+
// Coalesce address payload
1306+
// 1. created by GenISA_LSC2DBlockCreateAddrPayload
1307+
// or GenISA_LSC2DBlockCopyAddrPayload;
1308+
// 2. may be updated by GenISA_LSC2DBlockSetAddrPayloadField
1309+
// By design, address payload created originally and later
1310+
// updated should be coalesced and no need to check interference.
1311+
auto push_back_if_absent = [](std::list<Value*>& L, Value* V) {
1312+
if (std::find(L.begin(), L.end(), V) == L.end()) {
1313+
L.push_back(V);
1314+
}
1315+
};
1316+
1317+
auto isAddrPayloadUpdater = [](Value* V) {
1318+
GenIntrinsicInst* G = dyn_cast<GenIntrinsicInst>(V);
1319+
if (!G ||
1320+
G->getIntrinsicID() != GenISAIntrinsic::GenISA_LSC2DBlockSetAddrPayloadField) {
1321+
return false;
1322+
}
1323+
return true;
1324+
};
1325+
1326+
1327+
for (auto II = inst_begin(m_F), E = inst_end(m_F); II != E; ++II) {
1328+
Instruction* I = &*II;
1329+
GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
1330+
if (!GII) {
1331+
continue;
1332+
}
1333+
auto GID = GII->getIntrinsicID();
1334+
if (GID != GenISAIntrinsic::GenISA_LSC2DBlockCreateAddrPayload &&
1335+
GID != GenISAIntrinsic::GenISA_LSC2DBlockCopyAddrPayload) {
1336+
continue;
1337+
}
1338+
// Coalesce all uses of CreateAddrPayload/CopyAddrPayload, including
1339+
// all uses of its address payload updating intrinsics.
1340+
//
1341+
// The algorithm collect all uses (address payload) by doing
1342+
// breadth-first traversal, making sure each value is visited no
1343+
// more than once to avoid cyclic use relation due to PHI node.
1344+
//
1345+
std::list<Value*> allUses{ GII };
1346+
for (auto it = allUses.begin(); it != allUses.end(); ++it) {
1347+
Value* V = *it;
1348+
for (auto user : V->users()) {
1349+
Value* tV = user;
1350+
// Only PHI/addrPayload updating intrinsic can be coalesced
1351+
if (isa<PHINode>(tV) || isAddrPayloadUpdater(tV)) {
1352+
push_back_if_absent(allUses, tV);
1353+
}
1354+
}
1355+
}
1356+
1357+
// allUses have all address payload values. Coalesce them together.
1358+
// They are opaque and have no aliases.
1359+
auto iter = allUses.begin();
1360+
IGC_ASSERT(GII == getNodeValue(GII));
1361+
addReg(GII, EALIGN_GRF);
1362+
m_processedValues[GII] = 1;
1363+
++iter;
1364+
for (auto iterEnd = allUses.end(); iter != iterEnd; ++iter) {
1365+
Value* V = *iter;
1366+
IGC_ASSERT(V == getNodeValue(V));
1367+
addReg(V, EALIGN_GRF);
1368+
unionRegs(GII, V);
1369+
1370+
m_processedValues[V] = 1;
1371+
}
1372+
}
1373+
}
1374+
12991375
Value* DeSSA::getRootValue(Value* Val, e_alignment* pAlign) const
13001376
{
13011377
Value* mapVal = nullptr;

IGC/Compiler/CISACodeGen/DeSSA.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,14 @@ namespace IGC {
318318
}
319319
return llvm::isa<llvm::Argument>(V);
320320
}
321+
322+
// Special coalescing prior the main algorithm kicks in.
323+
void CoalesceOthers();
324+
// Main algo shall skip if a value is in the map.
325+
llvm::DenseMap<llvm::Value*, int> m_processedValues;
326+
bool isProcessed(llvm::Value* V) const {
327+
return m_processedValues.count(V) > 0;
328+
}
321329
};
322330

323331
struct MIIndexCompare {

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22848,6 +22848,7 @@ void EmitPass::emitLSC2DBlockAddrPayload(GenIntrinsicInst* GII)
2284822848
void EmitPass::emitLSC2DBlockSetAddrPayloadField(GenIntrinsicInst* GII)
2284922849
{
2285022850
CVariable* AP = GetSymbol(GII->getOperand(0));
22851+
IGC_ASSERT(AP == m_destination);
2285122852
int tval = (int)cast<ConstantInt>(GII->getOperand(1))->getZExtValue();
2285222853
LSC2DBlockField Field = (LSC2DBlockField)tval;
2285322854
CVariable* cV = GetSymbol(GII->getOperand(2));
@@ -22856,7 +22857,7 @@ void EmitPass::emitLSC2DBlockSetAddrPayloadField(GenIntrinsicInst* GII)
2285622857
switch (Field) {
2285722858
case LSC2DBlockField::BASE:
2285822859
{
22859-
CVariable* baseQW = m_currShader->GetNewAlias(AP, ISA_TYPE_Q, 0, 1, true);
22860+
CVariable* baseQW = m_currShader->GetNewAlias(m_destination, ISA_TYPE_Q, 0, 1, true);
2286022861
m_encoder->SetUniformSIMDSize(SIMDMode::SIMD1);
2286122862
m_encoder->SetDstSubReg(0);
2286222863
m_encoder->Copy(baseQW, cV);
@@ -22871,10 +22872,10 @@ void EmitPass::emitLSC2DBlockSetAddrPayloadField(GenIntrinsicInst* GII)
2287122872
m_encoder->SetDstSubReg(OprdNum);
2287222873
if (IsAddend > 0) {
2287322874
m_encoder->SetSrcSubReg(0, OprdNum);
22874-
m_encoder->Add(AP, AP, cV);
22875+
m_encoder->Add(m_destination, m_destination, cV);
2287522876
}
2287622877
else {
22877-
m_encoder->Copy(AP, cV);
22878+
m_encoder->Copy(m_destination, cV);
2287822879
}
2287922880
m_encoder->Push();
2288022881
break;
@@ -22887,7 +22888,7 @@ void EmitPass::emitLSC2DBlockSetAddrPayloadField(GenIntrinsicInst* GII)
2288722888
: (Field == LSC2DBlockField::HEIGHT ? 3 : 4));
2288822889
m_encoder->SetUniformSIMDSize(SIMDMode::SIMD1);
2288922890
m_encoder->SetDstSubReg(OprdNum);
22890-
m_encoder->Copy(AP, cV);
22891+
m_encoder->Copy(m_destination, cV);
2289122892
m_encoder->Push();
2289222893
break;
2289322894
}

IGC/Compiler/CISACodeGen/WIAnalysis.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1481,7 +1481,8 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
14811481
GII_id == GenISAIntrinsic::GenISA_bitcastfromstruct ||
14821482
GII_id == GenISAIntrinsic::GenISA_bitcasttostruct ||
14831483
GII_id == GenISAIntrinsic::GenISA_LSC2DBlockCreateAddrPayload ||
1484-
GII_id == GenISAIntrinsic::GenISA_LSC2DBlockCopyAddrPayload)
1484+
GII_id == GenISAIntrinsic::GenISA_LSC2DBlockCopyAddrPayload ||
1485+
GII_id == GenISAIntrinsic::GenISA_LSC2DBlockSetAddrPayloadField)
14851486
{
14861487
switch (GII_id)
14871488
{
@@ -1508,6 +1509,7 @@ WIAnalysis::WIDependancy WIAnalysisRunner::calculate_dep(const CallInst* inst)
15081509
case GenISAIntrinsic::GenISA_ReadFromReservedArgSpace:
15091510
case GenISAIntrinsic::GenISA_LSC2DBlockCreateAddrPayload:
15101511
case GenISAIntrinsic::GenISA_LSC2DBlockCopyAddrPayload:
1512+
case GenISAIntrinsic::GenISA_LSC2DBlockSetAddrPayloadField:
15111513
return WIAnalysis::UNIFORM_THREAD;
15121514
case GenISAIntrinsic::GenISA_getR0:
15131515
case GenISAIntrinsic::GenISA_getPayloadHeader:

0 commit comments

Comments
 (0)