Skip to content

Commit bb21f32

Browse files
jgu222sys_zuul
authored and
sys_zuul
committed
Changes in code.
Change-Id: I0f3eb0d67e8181b8f90a578cd6214e9be7594252
1 parent f0f97c0 commit bb21f32

File tree

1 file changed

+93
-93
lines changed

1 file changed

+93
-93
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 93 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -15752,84 +15752,98 @@ void EmitPass::emitVectorStore(StoreInst* inst, Value* offset, ConstantInt* immO
1575215752
// In addition, if 64bit add is not supported, emitAddPair() will be used to
1575315753
// use 32bit add/addc to emulate 64bit add.
1575415754
//
15755-
// Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is
15756-
// its return var. The argument 'DataVar' in prepareDataForUniform() is uniform,
15757-
// so is its return var.
15755+
// Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is its return var.
15756+
// The argument 'DataVar' in prepareDataForUniform() is uniform, so is its return var.
1575815757
//
1575915758
CVariable* EmitPass::prepareAddressForUniform(
15760-
CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t RequiredNElts, e_alignment Align)
15759+
CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz, e_alignment Align)
1576115760
{
15762-
// If RequiredNElts == 0, use next power of 2 of NElts as return var's num of elements.
15763-
// otherwise, user RequiredNElts as return var's num of elements.
15764-
uint32_t pow2NElts = (uint32_t)PowerOf2Ceil(NElts);
15765-
uint32_t allocNElts = (RequiredNElts > 0 ? RequiredNElts : pow2NElts);
1576615761
IGC_ASSERT(NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15767-
IGC_ASSERT(allocNElts >= pow2NElts);
15768-
if (allocNElts == NElts && AddrVar->IsGRFAligned(Align))
15762+
if (ExecSz == 1 && AddrVar->IsGRFAligned(Align))
1576915763
{
15770-
// No need to create a new var.
1577115764
return AddrVar;
1577215765
}
1577315766
bool isA64 = (AddrVar->GetElemSize() == 8);
15774-
SIMDMode simdmode = lanesToSIMDMode(pow2NElts);
15775-
CVariable* newVar = m_currShader->GetNewVariable(allocNElts, AddrVar->GetType(), Align, true, CName::NONE);
15767+
SIMDMode simdmode = lanesToSIMDMode(ExecSz);
15768+
CVariable* newVar = m_currShader->GetNewVariable(ExecSz, AddrVar->GetType(), Align, true, CName::NONE);
1577615769

1577715770
CVariable* off;
1577815771
uint32_t incImm = (0x76543210 & maskTrailingOnes<uint32_t>(NElts * 4));
15779-
if ((pow2NElts <= 4 && EltBytes == 4) || (pow2NElts <= 2 && EltBytes == 8))
15772+
if ((ExecSz <= 4 && EltBytes == 4) || (ExecSz <= 2 && EltBytes == 8))
1578015773
{
1578115774
// This case needs a single UV immediate
1578215775
incImm = incImm << (EltBytes == 4 ? 2 : 3);
1578315776
off = m_currShader->ImmToVariable(incImm, ISA_TYPE_UV);
1578415777
}
1578515778
else
1578615779
{
15787-
// Need a temporary var to calculate offsets.
15788-
// (Note that the temp is non-uniform, otherwise emitAddrPair() won't work.)
15789-
off = m_currShader->GetNewVariable(pow2NElts, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15780+
// Need a temporary var to calculate offsets
15781+
off = m_currShader->GetNewVariable(ExecSz, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
1579015782

15791-
// Need a mov and mul
15792-
m_encoder->SetNoMask();
15793-
m_encoder->SetSimdSize(simdmode);
15794-
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15795-
m_encoder->Push();
15783+
// actualES is the actual execsize used for computing offsets.
15784+
uint32_t actualES = (uint32_t)PowerOf2Ceil(NElts);
1579615785

15797-
m_encoder->SetNoMask();
15798-
m_encoder->SetSimdSize(simdmode);
15799-
m_encoder->SetSrcRegion(0, 1, 1, 0);
15800-
m_encoder->SetSrcRegion(1, 0, 1, 0);
15801-
m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15802-
m_encoder->Push();
15803-
}
15786+
// incImm is UV type and can be used in execsize <= 8 only. If ExecSz is greater
15787+
// than the actual number of lanes (for example, 4GRF alignment case), the upper lanes
15788+
// beyond need to be zero'ed.
15789+
if (ExecSz > actualES)
15790+
{
15791+
// Need to zero the upper lanes.
15792+
m_encoder->SetNoMask();
15793+
m_encoder->SetSimdSize(simdmode);
15794+
m_encoder->Copy(off, m_currShader->ImmToVariable(0, ISA_TYPE_UD));
15795+
m_encoder->Push();
15796+
}
1580415797

15805-
// Only need to initialize pow2NElts elements.
15806-
if (allocNElts > pow2NElts)
15807-
{
15808-
newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15798+
SIMDMode sm = lanesToSIMDMode(actualES);
15799+
if (incImm > 0 &&
15800+
((actualES <= 4 && EltBytes == 4) || (actualES <= 2 && EltBytes == 8)))
15801+
{
15802+
// This case needs a single UV immediate
15803+
incImm = incImm << (EltBytes == 4 ? 2 : 3);
15804+
15805+
m_encoder->SetNoMask();
15806+
m_encoder->SetSimdSize(sm);
15807+
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15808+
m_encoder->Push();
15809+
}
15810+
else if (incImm > 0)
15811+
{
15812+
// Need a mov and mul
15813+
m_encoder->SetNoMask();
15814+
m_encoder->SetSimdSize(sm);
15815+
m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15816+
m_encoder->Push();
15817+
15818+
m_encoder->SetNoMask();
15819+
m_encoder->SetSimdSize(sm);
15820+
m_encoder->SetSrcRegion(0, 1, 1, 0);
15821+
m_encoder->SetSrcRegion(1, 0, 1, 0);
15822+
m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15823+
m_encoder->Push();
15824+
}
1580915825
}
1581015826

15811-
// Currently, it's impossible to split because of NElts <= 8. In the future, NElts
15812-
// could be 32 and we could need to split.
15813-
bool needSplit = ((pow2NElts * newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15827+
// May need splitting for A64
15828+
bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
1581415829
if (needSplit)
1581515830
{
1581615831
IGC_ASSERT(!off->IsImmediate());
15817-
uint32_t halfNElts = pow2NElts / 2;
15818-
uint32_t bytes1 = halfNElts * newVar->GetElemSize();
15819-
uint32_t bytes2 = halfNElts * off->GetElemSize();
15820-
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, halfNElts);
15821-
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, halfNElts);
15822-
CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, halfNElts);
15823-
CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, halfNElts);
15832+
uint32_t bytes1 = (ExecSz / 2) * newVar->GetElemSize();
15833+
uint32_t bytes2 = (ExecSz / 2) * off->GetElemSize();
15834+
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, ExecSz / 2);
15835+
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, ExecSz / 2);
15836+
CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, ExecSz / 2);
15837+
CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, ExecSz / 2);
1582415838

15825-
if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15839+
if (m_currShader->m_Platform->hasNoInt64Inst())
1582615840
{
1582715841
emitAddPair(newVarHi, AddrVar, offHi);
1582815842
emitAddPair(newVarLo, AddrVar, offLo);
1582915843
}
1583015844
else
1583115845
{
15832-
SIMDMode sm = lanesToSIMDMode(halfNElts);
15846+
SIMDMode sm = lanesToSIMDMode(ExecSz / 2);
1583315847
m_encoder->SetNoMask();
1583415848
m_encoder->SetUniformSIMDSize(sm);
1583515849
m_encoder->SetSrcRegion(0, 0, 1, 0);
@@ -15845,7 +15859,7 @@ CVariable* EmitPass::prepareAddressForUniform(
1584515859
m_encoder->Push();
1584615860
}
1584715861
}
15848-
else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst() && pow2NElts > 1)
15862+
else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
1584915863
{
1585015864
emitAddPair(newVar, AddrVar, off);
1585115865
}
@@ -15855,73 +15869,59 @@ CVariable* EmitPass::prepareAddressForUniform(
1585515869
m_encoder->SetUniformSIMDSize(simdmode);
1585615870
m_encoder->SetSrcRegion(0, 0, 1, 0);
1585715871
m_encoder->SetSrcRegion(1, 1, 1, 0);
15858-
if (pow2NElts > 1) {
15859-
m_encoder->Add(newVar, AddrVar, off);
15860-
}
15861-
else {
15862-
m_encoder->Copy(newVar, AddrVar);
15863-
}
15872+
m_encoder->Add(newVar, AddrVar, off);
1586415873
m_encoder->Push();
1586515874
}
1586615875
return newVar;
1586715876
}
1586815877

1586915878
CVariable* EmitPass::prepareDataForUniform(
15870-
CVariable* DataVar, uint32_t RequiredNElts, e_alignment Align)
15879+
CVariable* DataVar, uint32_t ExecSz, e_alignment Align)
1587115880
{
1587215881
uint32_t NElts = DataVar->GetNumberElement();
1587315882
uint32_t EltBytes = DataVar->GetElemSize();
15874-
uint32_t pow2NElts = (uint32_t)(uint32_t)PowerOf2Ceil(NElts);
15875-
uint32_t allocNElts = RequiredNElts > 0 ? RequiredNElts : pow2NElts;
15876-
IGC_ASSERT(allocNElts >= pow2NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15877-
if (NElts == allocNElts && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15883+
IGC_ASSERT(ExecSz >= NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15884+
if (NElts == ExecSz && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
1587815885
{
1587915886
return DataVar;
1588015887
}
15881-
CVariable* newVar = m_currShader->GetNewVariable(allocNElts, DataVar->GetType(), Align, true, CName::NONE);
15888+
CVariable* newVar = m_currShader->GetNewVariable(ExecSz, DataVar->GetType(), Align, true, CName::NONE);
1588215889

15883-
// Need to return a var with pow2NElts elements
15884-
if (allocNElts > pow2NElts)
15890+
// Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15891+
bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15892+
if (needSplit)
1588515893
{
15886-
newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15887-
}
15894+
uint32_t esz = ExecSz / 2;
15895+
uint32_t bytes = esz * newVar->GetElemSize();
15896+
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15897+
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
1588815898

15889-
// Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15890-
bool initWithElem0 = (pow2NElts > NElts);
15891-
bool needSplit = ((pow2NElts *newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15892-
if (initWithElem0)
15893-
{
15894-
if (needSplit)
15895-
{
15896-
uint32_t esz = pow2NElts / 2;
15897-
uint32_t bytes = esz * newVar->GetElemSize();
15898-
CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15899-
CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15899+
m_encoder->SetNoMask();
15900+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15901+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15902+
m_encoder->Copy(newVarHi, DataVar);
15903+
m_encoder->Push();
1590015904

15901-
m_encoder->SetNoMask();
15902-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15903-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15904-
m_encoder->Copy(newVarHi, DataVar);
15905-
m_encoder->Push();
15905+
m_encoder->SetNoMask();
15906+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15907+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15908+
m_encoder->Copy(newVarLo, DataVar);
15909+
m_encoder->Push();
15910+
}
15911+
else
15912+
{
1590615913

15907-
m_encoder->SetNoMask();
15908-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15909-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15910-
m_encoder->Copy(newVarLo, DataVar);
15911-
m_encoder->Push();
15912-
}
15913-
else
15914-
{
15915-
m_encoder->SetNoMask();
15916-
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(pow2NElts));
15917-
m_encoder->SetSrcRegion(0, 0, 1, 0);
15918-
m_encoder->Copy(newVar, DataVar);
15919-
m_encoder->Push();
15920-
}
15914+
m_encoder->SetNoMask();
15915+
m_encoder->SetUniformSIMDSize(lanesToSIMDMode(ExecSz));
15916+
m_encoder->SetSrcRegion(0, 0, 1, 0);
15917+
m_encoder->Copy(newVar, DataVar);
15918+
m_encoder->Push();
1592115919
}
1592215920

15923-
if (!initWithElem0 || NElts != 1)
15921+
if (!DataVar->IsImmediate() && NElts > 1)
1592415922
{
15923+
// Copy values over, the elements from NElts to ExecSz-1 are set to the first element
15924+
// in the initialization above.
1592515925
emitVectorCopy(newVar, DataVar, NElts);
1592615926
}
1592715927
return newVar;

0 commit comments

Comments
 (0)