Skip to content

Commit 94ec9d3

Browse files
krystian-andrzejewskiigcbot
authored andcommitted
Align inactive lanes with address if no mask used
This change is to avoid delivering unaligned adresses for inactive lanes in case of the usage of a no mask flag.
1 parent e70be3b commit 94ec9d3

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18036,6 +18036,33 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1803618036
// eOffset is in bytes
1803718037
// offset corresponds to Int2Ptr operand obtained during pattern matching
1803818038
CVariable* eOffset = GetSymbol(varOffset);
18039+
18040+
// This operation is to avoid unaligned addresses on inactive lanes.
18041+
// The most accurate way to address a subspan issue caused by non-uniformness
18042+
// of quads/subgroups related to computations of derivatives is to copy the nearest neighbor
18043+
// as a part of its quad/subgroup instead of enabling a no_mask flag.
18044+
bool alignAddressInInactiveLanes =
18045+
m_encoder->IsSubSpanDestination() && !eOffset->IsUniform();
18046+
if (alignAddressInInactiveLanes)
18047+
{
18048+
uint32_t origTypeSize = CEncoder::GetCISADataTypeSize(eOffset->GetType());
18049+
const VISA_Type aliasType = ISA_TYPE_UW;
18050+
uint32_t aliasTypeSize = CEncoder::GetCISADataTypeSize(aliasType);
18051+
uint32_t stride = origTypeSize / aliasTypeSize;
18052+
CVariable* shortOffVar = m_currShader->GetNewAlias(eOffset, aliasType, 0,
18053+
eOffset->GetNumberElement() * origTypeSize / aliasTypeSize);
18054+
CVariable* maskVar = m_currShader->ImmToVariable(~(align - 1), shortOffVar->GetType());
18055+
for (uint instance = 0; instance < eOffset->GetNumberInstance(); instance++)
18056+
{
18057+
m_encoder->SetSecondHalf(instance == 1);
18058+
m_encoder->SetSrcRegion(0, stride, 1, 0);
18059+
m_encoder->SetDstRegion(stride);
18060+
m_encoder->And(shortOffVar, shortOffVar, maskVar);
18061+
m_encoder->Push();
18062+
m_encoder->SetSecondHalf(false);
18063+
}
18064+
}
18065+
1803918066
if (useA32)
1804018067
{
1804118068
eOffset = TruncatePointer(eOffset);

0 commit comments

Comments
 (0)