Skip to content

Commit 6baed3e

Browse files
jgu222Zuul
authored and
Zuul
committed
Internal minor change
Change-Id: I8071ad82327f23ce7935440160e0afefb3dcaf47
1 parent bb21f32 commit 6baed3e

File tree

1 file changed

+93
-93
lines changed

1 file changed

+93
-93
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 93 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -15752,98 +15752,84 @@ void EmitPass::emitVectorStore(StoreInst* inst, Value* offset, ConstantInt* immO
1575215752
// In addition, if 64bit add is not supported, emitAddPair() will be used to
1575315753
// use 32bit add/addc to emulate 64bit add.
1575415754
//
15755-
// Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is its return var.
15756-
// The argument 'DataVar' in prepareDataForUniform() is uniform, so is its return var.
15755+
// Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is
15756+
// its return var. The argument 'DataVar' in prepareDataForUniform() is uniform,
15757+
// so is its return var.
1575715758
//
1575815759
CVariable* EmitPass::prepareAddressForUniform(
15759-
CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz, e_alignment Align)
15760+
CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t RequiredNElts, e_alignment Align)
1576015761
{
15762+
// If RequiredNElts == 0, use next power of 2 of NElts as return var's num of elements.
15763+
// otherwise, user RequiredNElts as return var's num of elements.
15764+
uint32_t pow2NElts = (uint32_t)PowerOf2Ceil(NElts);
15765+
uint32_t allocNElts = (RequiredNElts > 0 ? RequiredNElts : pow2NElts);
1576115766
IGC_ASSERT(NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15762-
if (ExecSz == 1 && AddrVar->IsGRFAligned(Align))
15767+
IGC_ASSERT(allocNElts >= pow2NElts);
15768+
if (allocNElts == NElts && NElts == 1 && AddrVar->IsGRFAligned(Align))
1576315769
{
15770+
// No need to create a new var.
1576415771
return AddrVar;
1576515772
}
1576615773
bool isA64 = (AddrVar->GetElemSize() == 8);
15767-
SIMDMode simdmode = lanesToSIMDMode(ExecSz);
15768-
CVariable* newVar = m_currShader->GetNewVariable(ExecSz, AddrVar->GetType(), Align, true, CName::NONE);
15774+
SIMDMode simdmode = lanesToSIMDMode(pow2NElts);
15775+
CVariable* newVar = m_currShader->GetNewVariable(allocNElts, AddrVar->GetType(), Align, true, CName::NONE);
1576915776

1577015777
CVariable* off;
1577115778
uint32_t incImm = (0x76543210 & maskTrailingOnes<uint32_t>(NElts * 4));
15772-
if ((ExecSz <= 4 && EltBytes == 4) || (ExecSz <= 2 && EltBytes == 8))
15779+
if ((pow2NElts <= 4 && EltBytes == 4) || (pow2NElts <= 2 && EltBytes == 8))
1577315780
{
1577415781
// This case needs a single UV immediate
1577515782
incImm = incImm << (EltBytes == 4 ? 2 : 3);
1577615783
off = m_currShader->ImmToVariable(incImm, ISA_TYPE_UV);
1577715784
}
1577815785
else
1577915786
{
15780-
// Need a temporary var to calculate offsets
15781-
off = m_currShader->GetNewVariable(ExecSz, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15787+
// Need a temporary var to calculate offsets.
15788+
// (Note that the temp is non-uniform, otherwise emitAddrPair() won't work.)
15789+
off = m_currShader->GetNewVariable(pow2NElts, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
1578215790

15783-
// actualES is the actual execsize used for computing offsets.
15784-
uint32_t actualES = (uint32_t)PowerOf2Ceil(NElts);
15785-
15786-
// incImm is UV type and can be used in execsize <= 8 only. If ExecSz is greater
15787-
// than the actual number of lanes (for example, 4GRF alignment case), the upper lanes
15788-
// beyond need to be zero'ed.
15789-
if (ExecSz > actualES)
15790-
{
15791-
// Need to zero the upper lanes.
15792-
m_encoder->SetNoMask();
15793-
m_encoder->SetSimdSize(simdmode);
15794-
m_encoder->Copy(off, m_currShader->ImmToVariable(0, ISA_TYPE_UD));
15795-
m_encoder->Push();
15796-
}
15797-
15798-
SIMDMode sm = lanesToSIMDMode(actualES);
15799-
if (incImm > 0 &&
15800-
((actualES <= 4 && EltBytes == 4) || (actualES <= 2 && EltBytes == 8)))
15801-
{
15802-
// This case needs a single UV immediate
15803-
incImm = incImm << (EltBytes == 4 ? 2 : 3);
15791+
// Need a mov and mul
15792+
m_encoder->SetNoMask();
15793+
m_encoder->SetSimdSize(simdmode);
15794+
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15795+
m_encoder->Push();
1580415796

15805-
m_encoder->SetNoMask();
15806-
m_encoder->SetSimdSize(sm);
15807-
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15808-
m_encoder->Push();
15809-
}
15810-
else if (incImm > 0)
15811-
{
15812-
// Need a mov and mul
15813-
m_encoder->SetNoMask();
15814-
m_encoder->SetSimdSize(sm);
15815-
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15816-
m_encoder->Push();
15797+
m_encoder->SetNoMask();
15798+
m_encoder->SetSimdSize(simdmode);
15799+
m_encoder->SetSrcRegion(0, 1, 1, 0);
15800+
m_encoder->SetSrcRegion(1, 0, 1, 0);
15801+
m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15802+
m_encoder->Push();
15803+
}
1581715804

15818-
m_encoder->SetNoMask();
15819-
m_encoder->SetSimdSize(sm);
15820-
m_encoder->SetSrcRegion(0, 1, 1, 0);
15821-
m_encoder->SetSrcRegion(1, 0, 1, 0);
15822-
m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15823-
m_encoder->Push();
15824-
}
15805+
// Only need to initialize pow2NElts elements.
15806+
if (allocNElts > pow2NElts)
15807+
{
15808+
newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
1582515809
}
1582615810

15827-
// May need splitting for A64
15828-
bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15811+
// Currently, it's impossible to split because of NElts <= 8. In the future, NElts
15812+
// could be 32 and we could need to split.
15813+
bool needSplit = ((pow2NElts * newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
1582915814
if (needSplit)
1583015815
{
1583115816
IGC_ASSERT(!off->IsImmediate());
15832-
uint32_t bytes1 = (ExecSz / 2) * newVar->GetElemSize();
15833-
uint32_t bytes2 = (ExecSz / 2) * off->GetElemSize();
15834-
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, ExecSz / 2);
15835-
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, ExecSz / 2);
15836-
CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, ExecSz / 2);
15837-
CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, ExecSz / 2);
15817+
uint32_t halfNElts = pow2NElts / 2;
15818+
uint32_t bytes1 = halfNElts * newVar->GetElemSize();
15819+
uint32_t bytes2 = halfNElts * off->GetElemSize();
15820+
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, halfNElts);
15821+
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, halfNElts);
15822+
CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, halfNElts);
15823+
CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, halfNElts);
1583815824

15839-
if (m_currShader->m_Platform->hasNoInt64Inst())
15825+
if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
1584015826
{
1584115827
emitAddPair(newVarHi, AddrVar, offHi);
1584215828
emitAddPair(newVarLo, AddrVar, offLo);
1584315829
}
1584415830
else
1584515831
{
15846-
SIMDMode sm = lanesToSIMDMode(ExecSz / 2);
15832+
SIMDMode sm = lanesToSIMDMode(halfNElts);
1584715833
m_encoder->SetNoMask();
1584815834
m_encoder->SetUniformSIMDSize(sm);
1584915835
m_encoder->SetSrcRegion(0, 0, 1, 0);
@@ -15859,7 +15845,7 @@ CVariable* EmitPass::prepareAddressForUniform(
1585915845
m_encoder->Push();
1586015846
}
1586115847
}
15862-
else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15848+
else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst() && pow2NElts > 1)
1586315849
{
1586415850
emitAddPair(newVar, AddrVar, off);
1586515851
}
@@ -15869,59 +15855,73 @@ CVariable* EmitPass::prepareAddressForUniform(
1586915855
m_encoder->SetUniformSIMDSize(simdmode);
1587015856
m_encoder->SetSrcRegion(0, 0, 1, 0);
1587115857
m_encoder->SetSrcRegion(1, 1, 1, 0);
15872-
m_encoder->Add(newVar, AddrVar, off);
15858+
if (pow2NElts > 1) {
15859+
m_encoder->Add(newVar, AddrVar, off);
15860+
}
15861+
else {
15862+
m_encoder->Copy(newVar, AddrVar);
15863+
}
1587315864
m_encoder->Push();
1587415865
}
1587515866
return newVar;
1587615867
}
1587715868

1587815869
CVariable* EmitPass::prepareDataForUniform(
15879-
CVariable* DataVar, uint32_t ExecSz, e_alignment Align)
15870+
CVariable* DataVar, uint32_t RequiredNElts, e_alignment Align)
1588015871
{
1588115872
uint32_t NElts = DataVar->GetNumberElement();
1588215873
uint32_t EltBytes = DataVar->GetElemSize();
15883-
IGC_ASSERT(ExecSz >= NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15884-
if (NElts == ExecSz && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15874+
uint32_t pow2NElts = (uint32_t)(uint32_t)PowerOf2Ceil(NElts);
15875+
uint32_t allocNElts = RequiredNElts > 0 ? RequiredNElts : pow2NElts;
15876+
IGC_ASSERT(allocNElts >= pow2NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15877+
if (NElts == allocNElts && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
1588515878
{
1588615879
return DataVar;
1588715880
}
15888-
CVariable* newVar = m_currShader->GetNewVariable(ExecSz, DataVar->GetType(), Align, true, CName::NONE);
15881+
CVariable* newVar = m_currShader->GetNewVariable(allocNElts, DataVar->GetType(), Align, true, CName::NONE);
1588915882

15890-
// Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15891-
bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15892-
if (needSplit)
15883+
// Need to return a var with pow2NElts elements
15884+
if (allocNElts > pow2NElts)
1589315885
{
15894-
uint32_t esz = ExecSz / 2;
15895-
uint32_t bytes = esz * newVar->GetElemSize();
15896-
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15897-
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15898-
15899-
m_encoder->SetNoMask();
15900-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15901-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15902-
m_encoder->Copy(newVarHi, DataVar);
15903-
m_encoder->Push();
15904-
15905-
m_encoder->SetNoMask();
15906-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15907-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15908-
m_encoder->Copy(newVarLo, DataVar);
15909-
m_encoder->Push();
15886+
newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
1591015887
}
15911-
else
15888+
15889+
// Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15890+
bool initWithElem0 = (pow2NElts > NElts);
15891+
bool needSplit = ((pow2NElts *newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15892+
if (initWithElem0)
1591215893
{
15894+
if (needSplit)
15895+
{
15896+
uint32_t esz = pow2NElts / 2;
15897+
uint32_t bytes = esz * newVar->GetElemSize();
15898+
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15899+
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
1591315900

15914-
m_encoder->SetNoMask();
15915-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(ExecSz));
15916-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15917-
m_encoder->Copy(newVar, DataVar);
15918-
m_encoder->Push();
15901+
m_encoder->SetNoMask();
15902+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15903+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15904+
m_encoder->Copy(newVarHi, DataVar);
15905+
m_encoder->Push();
15906+
15907+
m_encoder->SetNoMask();
15908+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15909+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15910+
m_encoder->Copy(newVarLo, DataVar);
15911+
m_encoder->Push();
15912+
}
15913+
else
15914+
{
15915+
m_encoder->SetNoMask();
15916+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(pow2NElts));
15917+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15918+
m_encoder->Copy(newVar, DataVar);
15919+
m_encoder->Push();
15920+
}
1591915921
}
1592015922

15921-
if (!DataVar->IsImmediate() && NElts > 1)
15923+
if (!initWithElem0 || NElts != 1)
1592215924
{
15923-
// Copy values over, the elements from NElts to ExecSz-1 are set to the first element
15924-
// in the initialization above.
1592515925
emitVectorCopy(newVar, DataVar, NElts);
1592615926
}
1592715927
return newVar;

0 commit comments

Comments
 (0)