@@ -15752,84 +15752,98 @@ void EmitPass::emitVectorStore(StoreInst* inst, Value* offset, ConstantInt* immO
15752
15752
// In addition, if 64bit add is not supported, emitAddPair() will be used to
15753
15753
// use 32bit add/addc to emulate 64bit add.
15754
15754
//
15755
- // Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is
15756
- // its return var. The argument 'DataVar' in prepareDataForUniform() is uniform,
15757
- // so is its return var.
15755
+ // Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is its return var.
15756
+ // The argument 'DataVar' in prepareDataForUniform() is uniform, so is its return var.
15758
15757
//
15759
15758
CVariable* EmitPass::prepareAddressForUniform(
15760
- CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t RequiredNElts , e_alignment Align)
15759
+ CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz , e_alignment Align)
15761
15760
{
15762
- // If RequiredNElts == 0, use next power of 2 of NElts as return var's num of elements.
15763
- // otherwise, user RequiredNElts as return var's num of elements.
15764
- uint32_t pow2NElts = (uint32_t)PowerOf2Ceil(NElts);
15765
- uint32_t allocNElts = (RequiredNElts > 0 ? RequiredNElts : pow2NElts);
15766
15761
IGC_ASSERT(NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15767
- IGC_ASSERT(allocNElts >= pow2NElts);
15768
- if (allocNElts == NElts && AddrVar->IsGRFAligned(Align))
15762
+ if (ExecSz == 1 && AddrVar->IsGRFAligned(Align))
15769
15763
{
15770
- // No need to create a new var.
15771
15764
return AddrVar;
15772
15765
}
15773
15766
bool isA64 = (AddrVar->GetElemSize() == 8);
15774
- SIMDMode simdmode = lanesToSIMDMode(pow2NElts );
15775
- CVariable* newVar = m_currShader->GetNewVariable(allocNElts , AddrVar->GetType(), Align, true, CName::NONE);
15767
+ SIMDMode simdmode = lanesToSIMDMode(ExecSz );
15768
+ CVariable* newVar = m_currShader->GetNewVariable(ExecSz , AddrVar->GetType(), Align, true, CName::NONE);
15776
15769
15777
15770
CVariable* off;
15778
15771
uint32_t incImm = (0x76543210 & maskTrailingOnes<uint32_t>(NElts * 4));
15779
- if ((pow2NElts <= 4 && EltBytes == 4) || (pow2NElts <= 2 && EltBytes == 8))
15772
+ if ((ExecSz <= 4 && EltBytes == 4) || (ExecSz <= 2 && EltBytes == 8))
15780
15773
{
15781
15774
// This case needs a single UV immediate
15782
15775
incImm = incImm << (EltBytes == 4 ? 2 : 3);
15783
15776
off = m_currShader->ImmToVariable(incImm, ISA_TYPE_UV);
15784
15777
}
15785
15778
else
15786
15779
{
15787
- // Need a temporary var to calculate offsets.
15788
- // (Note that the temp is non-uniform, otherwise emitAddrPair() won't work.)
15789
- off = m_currShader->GetNewVariable(pow2NElts, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15780
+ // Need a temporary var to calculate offsets
15781
+ off = m_currShader->GetNewVariable(ExecSz, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15790
15782
15791
- // Need a mov and mul
15792
- m_encoder->SetNoMask();
15793
- m_encoder->SetSimdSize(simdmode);
15794
- m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15795
- m_encoder->Push();
15783
+ // actualES is the actual execsize used for computing offsets.
15784
+ uint32_t actualES = (uint32_t)PowerOf2Ceil(NElts);
15796
15785
15797
- m_encoder->SetNoMask();
15798
- m_encoder->SetSimdSize(simdmode);
15799
- m_encoder->SetSrcRegion(0, 1, 1, 0);
15800
- m_encoder->SetSrcRegion(1, 0, 1, 0);
15801
- m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15802
- m_encoder->Push();
15803
- }
15786
+ // incImm is UV type and can be used in execsize <= 8 only. If ExecSz is greater
15787
+ // than the actual number of lanes (for example, 4GRF alignment case), the upper lanes
15788
+ // beyond need to be zero'ed.
15789
+ if (ExecSz > actualES)
15790
+ {
15791
+ // Need to zero the upper lanes.
15792
+ m_encoder->SetNoMask();
15793
+ m_encoder->SetSimdSize(simdmode);
15794
+ m_encoder->Copy(off, m_currShader->ImmToVariable(0, ISA_TYPE_UD));
15795
+ m_encoder->Push();
15796
+ }
15804
15797
15805
- // Only need to initialize pow2NElts elements.
15806
- if (allocNElts > pow2NElts)
15807
- {
15808
- newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15798
+ SIMDMode sm = lanesToSIMDMode(actualES);
15799
+ if (incImm > 0 &&
15800
+ ((actualES <= 4 && EltBytes == 4) || (actualES <= 2 && EltBytes == 8)))
15801
+ {
15802
+ // This case needs a single UV immediate
15803
+ incImm = incImm << (EltBytes == 4 ? 2 : 3);
15804
+
15805
+ m_encoder->SetNoMask();
15806
+ m_encoder->SetSimdSize(sm);
15807
+ m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15808
+ m_encoder->Push();
15809
+ }
15810
+ else if (incImm > 0)
15811
+ {
15812
+ // Need a mov and mul
15813
+ m_encoder->SetNoMask();
15814
+ m_encoder->SetSimdSize(sm);
15815
+ m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15816
+ m_encoder->Push();
15817
+
15818
+ m_encoder->SetNoMask();
15819
+ m_encoder->SetSimdSize(sm);
15820
+ m_encoder->SetSrcRegion(0, 1, 1, 0);
15821
+ m_encoder->SetSrcRegion(1, 0, 1, 0);
15822
+ m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15823
+ m_encoder->Push();
15824
+ }
15809
15825
}
15810
15826
15811
- // Currently, it's impossible to split because of NElts <= 8. In the future, NElts
15812
- // could be 32 and we could need to split.
15813
- bool needSplit = ((pow2NElts * newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15827
+ // May need splitting for A64
15828
+ bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15814
15829
if (needSplit)
15815
15830
{
15816
15831
IGC_ASSERT(!off->IsImmediate());
15817
- uint32_t halfNElts = pow2NElts / 2;
15818
- uint32_t bytes1 = halfNElts * newVar->GetElemSize();
15819
- uint32_t bytes2 = halfNElts * off->GetElemSize();
15820
- CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, halfNElts);
15821
- CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, halfNElts);
15822
- CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, halfNElts);
15823
- CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, halfNElts);
15832
+ uint32_t bytes1 = (ExecSz / 2) * newVar->GetElemSize();
15833
+ uint32_t bytes2 = (ExecSz / 2) * off->GetElemSize();
15834
+ CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, ExecSz / 2);
15835
+ CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, ExecSz / 2);
15836
+ CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, ExecSz / 2);
15837
+ CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, ExecSz / 2);
15824
15838
15825
- if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15839
+ if (m_currShader->m_Platform->hasNoInt64Inst())
15826
15840
{
15827
15841
emitAddPair(newVarHi, AddrVar, offHi);
15828
15842
emitAddPair(newVarLo, AddrVar, offLo);
15829
15843
}
15830
15844
else
15831
15845
{
15832
- SIMDMode sm = lanesToSIMDMode(halfNElts );
15846
+ SIMDMode sm = lanesToSIMDMode(ExecSz / 2 );
15833
15847
m_encoder->SetNoMask();
15834
15848
m_encoder->SetUniformSIMDSize(sm);
15835
15849
m_encoder->SetSrcRegion(0, 0, 1, 0);
@@ -15845,7 +15859,7 @@ CVariable* EmitPass::prepareAddressForUniform(
15845
15859
m_encoder->Push();
15846
15860
}
15847
15861
}
15848
- else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst() && pow2NElts > 1 )
15862
+ else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15849
15863
{
15850
15864
emitAddPair(newVar, AddrVar, off);
15851
15865
}
@@ -15855,73 +15869,59 @@ CVariable* EmitPass::prepareAddressForUniform(
15855
15869
m_encoder->SetUniformSIMDSize(simdmode);
15856
15870
m_encoder->SetSrcRegion(0, 0, 1, 0);
15857
15871
m_encoder->SetSrcRegion(1, 1, 1, 0);
15858
- if (pow2NElts > 1) {
15859
- m_encoder->Add(newVar, AddrVar, off);
15860
- }
15861
- else {
15862
- m_encoder->Copy(newVar, AddrVar);
15863
- }
15872
+ m_encoder->Add(newVar, AddrVar, off);
15864
15873
m_encoder->Push();
15865
15874
}
15866
15875
return newVar;
15867
15876
}
15868
15877
15869
15878
CVariable* EmitPass::prepareDataForUniform(
15870
- CVariable* DataVar, uint32_t RequiredNElts , e_alignment Align)
15879
+ CVariable* DataVar, uint32_t ExecSz , e_alignment Align)
15871
15880
{
15872
15881
uint32_t NElts = DataVar->GetNumberElement();
15873
15882
uint32_t EltBytes = DataVar->GetElemSize();
15874
- uint32_t pow2NElts = (uint32_t)(uint32_t)PowerOf2Ceil(NElts);
15875
- uint32_t allocNElts = RequiredNElts > 0 ? RequiredNElts : pow2NElts;
15876
- IGC_ASSERT(allocNElts >= pow2NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15877
- if (NElts == allocNElts && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15883
+ IGC_ASSERT(ExecSz >= NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15884
+ if (NElts == ExecSz && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15878
15885
{
15879
15886
return DataVar;
15880
15887
}
15881
- CVariable* newVar = m_currShader->GetNewVariable(allocNElts , DataVar->GetType(), Align, true, CName::NONE);
15888
+ CVariable* newVar = m_currShader->GetNewVariable(ExecSz , DataVar->GetType(), Align, true, CName::NONE);
15882
15889
15883
- // Need to return a var with pow2NElts elements
15884
- if (allocNElts > pow2NElts)
15890
+ // Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15891
+ bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15892
+ if (needSplit)
15885
15893
{
15886
- newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15887
- }
15894
+ uint32_t esz = ExecSz / 2;
15895
+ uint32_t bytes = esz * newVar->GetElemSize();
15896
+ CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15897
+ CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15888
15898
15889
- // Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15890
- bool initWithElem0 = (pow2NElts > NElts);
15891
- bool needSplit = ((pow2NElts *newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15892
- if (initWithElem0)
15893
- {
15894
- if (needSplit)
15895
- {
15896
- uint32_t esz = pow2NElts / 2;
15897
- uint32_t bytes = esz * newVar->GetElemSize();
15898
- CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15899
- CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15899
+ m_encoder->SetNoMask();
15900
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15901
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15902
+ m_encoder->Copy(newVarHi, DataVar);
15903
+ m_encoder->Push();
15900
15904
15901
- m_encoder->SetNoMask();
15902
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15903
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15904
- m_encoder->Copy(newVarHi, DataVar);
15905
- m_encoder->Push();
15905
+ m_encoder->SetNoMask();
15906
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15907
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15908
+ m_encoder->Copy(newVarLo, DataVar);
15909
+ m_encoder->Push();
15910
+ }
15911
+ else
15912
+ {
15906
15913
15907
- m_encoder->SetNoMask();
15908
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15909
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15910
- m_encoder->Copy(newVarLo, DataVar);
15911
- m_encoder->Push();
15912
- }
15913
- else
15914
- {
15915
- m_encoder->SetNoMask();
15916
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(pow2NElts));
15917
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15918
- m_encoder->Copy(newVar, DataVar);
15919
- m_encoder->Push();
15920
- }
15914
+ m_encoder->SetNoMask();
15915
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(ExecSz));
15916
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15917
+ m_encoder->Copy(newVar, DataVar);
15918
+ m_encoder->Push();
15921
15919
}
15922
15920
15923
- if (!initWithElem0 || NElts != 1)
15921
+ if (!DataVar->IsImmediate() && NElts > 1)
15924
15922
{
15923
+ // Copy values over, the elements from NElts to ExecSz-1 are set to the first element
15924
+ // in the initialization above.
15925
15925
emitVectorCopy(newVar, DataVar, NElts);
15926
15926
}
15927
15927
return newVar;
0 commit comments