@@ -15752,98 +15752,84 @@ void EmitPass::emitVectorStore(StoreInst* inst, Value* offset, ConstantInt* immO
15752
15752
// In addition, if 64bit add is not supported, emitAddPair() will be used to
15753
15753
// use 32bit add/addc to emulate 64bit add.
15754
15754
//
15755
- // Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is its return var.
15756
- // The argument 'DataVar' in prepareDataForUniform() is uniform, so is its return var.
15755
+ // Note that argument 'AddrVar' in prepareAddressForUniform() is uniform, so is
15756
+ // its return var. The argument 'DataVar' in prepareDataForUniform() is uniform,
15757
+ // so is its return var.
15757
15758
//
15758
15759
CVariable* EmitPass::prepareAddressForUniform(
15759
- CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz , e_alignment Align)
15760
+ CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t RequiredNElts , e_alignment Align)
15760
15761
{
15762
+ // If RequiredNElts == 0, use next power of 2 of NElts as return var's num of elements.
15763
+ // otherwise, user RequiredNElts as return var's num of elements.
15764
+ uint32_t pow2NElts = (uint32_t)PowerOf2Ceil(NElts);
15765
+ uint32_t allocNElts = (RequiredNElts > 0 ? RequiredNElts : pow2NElts);
15761
15766
IGC_ASSERT(NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15762
- if (ExecSz == 1 && AddrVar->IsGRFAligned(Align))
15767
+ IGC_ASSERT(allocNElts >= pow2NElts);
15768
+ if (allocNElts == NElts && NElts == 1 && AddrVar->IsGRFAligned(Align))
15763
15769
{
15770
+ // No need to create a new var.
15764
15771
return AddrVar;
15765
15772
}
15766
15773
bool isA64 = (AddrVar->GetElemSize() == 8);
15767
- SIMDMode simdmode = lanesToSIMDMode(ExecSz );
15768
- CVariable* newVar = m_currShader->GetNewVariable(ExecSz , AddrVar->GetType(), Align, true, CName::NONE);
15774
+ SIMDMode simdmode = lanesToSIMDMode(pow2NElts );
15775
+ CVariable* newVar = m_currShader->GetNewVariable(allocNElts , AddrVar->GetType(), Align, true, CName::NONE);
15769
15776
15770
15777
CVariable* off;
15771
15778
uint32_t incImm = (0x76543210 & maskTrailingOnes<uint32_t>(NElts * 4));
15772
- if ((ExecSz <= 4 && EltBytes == 4) || (ExecSz <= 2 && EltBytes == 8))
15779
+ if ((pow2NElts <= 4 && EltBytes == 4) || (pow2NElts <= 2 && EltBytes == 8))
15773
15780
{
15774
15781
// This case needs a single UV immediate
15775
15782
incImm = incImm << (EltBytes == 4 ? 2 : 3);
15776
15783
off = m_currShader->ImmToVariable(incImm, ISA_TYPE_UV);
15777
15784
}
15778
15785
else
15779
15786
{
15780
- // Need a temporary var to calculate offsets
15781
- off = m_currShader->GetNewVariable(ExecSz, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15787
+ // Need a temporary var to calculate offsets.
15788
+ // (Note that the temp is non-uniform, otherwise emitAddrPair() won't work.)
15789
+ off = m_currShader->GetNewVariable(pow2NElts, ISA_TYPE_UD, EALIGN_DWORD, false, CName::NONE);
15782
15790
15783
- // actualES is the actual execsize used for computing offsets.
15784
- uint32_t actualES = (uint32_t)PowerOf2Ceil(NElts);
15785
-
15786
- // incImm is UV type and can be used in execsize <= 8 only. If ExecSz is greater
15787
- // than the actual number of lanes (for example, 4GRF alignment case), the upper lanes
15788
- // beyond need to be zero'ed.
15789
- if (ExecSz > actualES)
15790
- {
15791
- // Need to zero the upper lanes.
15792
- m_encoder->SetNoMask();
15793
- m_encoder->SetSimdSize(simdmode);
15794
- m_encoder->Copy(off, m_currShader->ImmToVariable(0, ISA_TYPE_UD));
15795
- m_encoder->Push();
15796
- }
15797
-
15798
- SIMDMode sm = lanesToSIMDMode(actualES);
15799
- if (incImm > 0 &&
15800
- ((actualES <= 4 && EltBytes == 4) || (actualES <= 2 && EltBytes == 8)))
15801
- {
15802
- // This case needs a single UV immediate
15803
- incImm = incImm << (EltBytes == 4 ? 2 : 3);
15791
+ // Need a mov and mul
15792
+ m_encoder->SetNoMask();
15793
+ m_encoder->SetSimdSize(simdmode);
15794
+ m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15795
+ m_encoder->Push();
15804
15796
15805
- m_encoder->SetNoMask();
15806
- m_encoder->SetSimdSize(sm);
15807
- m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15808
- m_encoder->Push();
15809
- }
15810
- else if (incImm > 0)
15811
- {
15812
- // Need a mov and mul
15813
- m_encoder->SetNoMask();
15814
- m_encoder->SetSimdSize(sm);
15815
- m_encoder->Copy(off, m_currShader->ImmToVariable(incImm, ISA_TYPE_UV));
15816
- m_encoder->Push();
15797
+ m_encoder->SetNoMask();
15798
+ m_encoder->SetSimdSize(simdmode);
15799
+ m_encoder->SetSrcRegion(0, 1, 1, 0);
15800
+ m_encoder->SetSrcRegion(1, 0, 1, 0);
15801
+ m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15802
+ m_encoder->Push();
15803
+ }
15817
15804
15818
- m_encoder->SetNoMask();
15819
- m_encoder->SetSimdSize(sm);
15820
- m_encoder->SetSrcRegion(0, 1, 1, 0);
15821
- m_encoder->SetSrcRegion(1, 0, 1, 0);
15822
- m_encoder->Mul(off, off, m_currShader->ImmToVariable(EltBytes, ISA_TYPE_UW));
15823
- m_encoder->Push();
15824
- }
15805
+ // Only need to initialize pow2NElts elements.
15806
+ if (allocNElts > pow2NElts)
15807
+ {
15808
+ newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15825
15809
}
15826
15810
15827
- // May need splitting for A64
15828
- bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15811
+ // Currently, it's impossible to split because of NElts <= 8. In the future, NElts
15812
+ // could be 32 and we could need to split.
15813
+ bool needSplit = ((pow2NElts * newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15829
15814
if (needSplit)
15830
15815
{
15831
15816
IGC_ASSERT(!off->IsImmediate());
15832
- uint32_t bytes1 = (ExecSz / 2) * newVar->GetElemSize();
15833
- uint32_t bytes2 = (ExecSz / 2) * off->GetElemSize();
15834
- CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, ExecSz / 2);
15835
- CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, ExecSz / 2);
15836
- CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, ExecSz / 2);
15837
- CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, ExecSz / 2);
15817
+ uint32_t halfNElts = pow2NElts / 2;
15818
+ uint32_t bytes1 = halfNElts * newVar->GetElemSize();
15819
+ uint32_t bytes2 = halfNElts * off->GetElemSize();
15820
+ CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, halfNElts);
15821
+ CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes1, halfNElts);
15822
+ CVariable* offHi = m_currShader->GetNewAlias(off, off->GetType(), 0, halfNElts);
15823
+ CVariable* offLo = m_currShader->GetNewAlias(off, off->GetType(), bytes2, halfNElts);
15838
15824
15839
- if (m_currShader->m_Platform->hasNoInt64Inst())
15825
+ if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15840
15826
{
15841
15827
emitAddPair(newVarHi, AddrVar, offHi);
15842
15828
emitAddPair(newVarLo, AddrVar, offLo);
15843
15829
}
15844
15830
else
15845
15831
{
15846
- SIMDMode sm = lanesToSIMDMode(ExecSz / 2 );
15832
+ SIMDMode sm = lanesToSIMDMode(halfNElts );
15847
15833
m_encoder->SetNoMask();
15848
15834
m_encoder->SetUniformSIMDSize(sm);
15849
15835
m_encoder->SetSrcRegion(0, 0, 1, 0);
@@ -15859,7 +15845,7 @@ CVariable* EmitPass::prepareAddressForUniform(
15859
15845
m_encoder->Push();
15860
15846
}
15861
15847
}
15862
- else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst())
15848
+ else if (isA64 && m_currShader->m_Platform->hasNoInt64Inst() && pow2NElts > 1 )
15863
15849
{
15864
15850
emitAddPair(newVar, AddrVar, off);
15865
15851
}
@@ -15869,59 +15855,73 @@ CVariable* EmitPass::prepareAddressForUniform(
15869
15855
m_encoder->SetUniformSIMDSize(simdmode);
15870
15856
m_encoder->SetSrcRegion(0, 0, 1, 0);
15871
15857
m_encoder->SetSrcRegion(1, 1, 1, 0);
15872
- m_encoder->Add(newVar, AddrVar, off);
15858
+ if (pow2NElts > 1) {
15859
+ m_encoder->Add(newVar, AddrVar, off);
15860
+ }
15861
+ else {
15862
+ m_encoder->Copy(newVar, AddrVar);
15863
+ }
15873
15864
m_encoder->Push();
15874
15865
}
15875
15866
return newVar;
15876
15867
}
15877
15868
15878
15869
CVariable* EmitPass::prepareDataForUniform(
15879
- CVariable* DataVar, uint32_t ExecSz , e_alignment Align)
15870
+ CVariable* DataVar, uint32_t RequiredNElts , e_alignment Align)
15880
15871
{
15881
15872
uint32_t NElts = DataVar->GetNumberElement();
15882
15873
uint32_t EltBytes = DataVar->GetElemSize();
15883
- IGC_ASSERT(ExecSz >= NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15884
- if (NElts == ExecSz && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15874
+ uint32_t pow2NElts = (uint32_t)(uint32_t)PowerOf2Ceil(NElts);
15875
+ uint32_t allocNElts = RequiredNElts > 0 ? RequiredNElts : pow2NElts;
15876
+ IGC_ASSERT(allocNElts >= pow2NElts && NElts <= 8 && (EltBytes == 4 || EltBytes == 8));
15877
+ if (NElts == allocNElts && !DataVar->IsImmediate() && DataVar->IsGRFAligned(Align))
15885
15878
{
15886
15879
return DataVar;
15887
15880
}
15888
- CVariable* newVar = m_currShader->GetNewVariable(ExecSz , DataVar->GetType(), Align, true, CName::NONE);
15881
+ CVariable* newVar = m_currShader->GetNewVariable(allocNElts , DataVar->GetType(), Align, true, CName::NONE);
15889
15882
15890
- // Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15891
- bool needSplit = (newVar->GetSize() > (2 * (uint32_t)getGRFSize()));
15892
- if (needSplit)
15883
+ // Need to return a var with pow2NElts elements
15884
+ if (allocNElts > pow2NElts)
15893
15885
{
15894
- uint32_t esz = ExecSz / 2;
15895
- uint32_t bytes = esz * newVar->GetElemSize();
15896
- CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15897
- CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15898
-
15899
- m_encoder->SetNoMask();
15900
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15901
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15902
- m_encoder->Copy(newVarHi, DataVar);
15903
- m_encoder->Push();
15904
-
15905
- m_encoder->SetNoMask();
15906
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15907
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15908
- m_encoder->Copy(newVarLo, DataVar);
15909
- m_encoder->Push();
15886
+ newVar = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, pow2NElts);
15910
15887
}
15911
- else
15888
+
15889
+ // Initialize to DataVar's first element (set Elts from NElts and up to the first element).
15890
+ bool initWithElem0 = (pow2NElts > NElts);
15891
+ bool needSplit = ((pow2NElts *newVar->GetElemSize()) > (2 * (uint32_t)getGRFSize()));
15892
+ if (initWithElem0)
15912
15893
{
15894
+ if (needSplit)
15895
+ {
15896
+ uint32_t esz = pow2NElts / 2;
15897
+ uint32_t bytes = esz * newVar->GetElemSize();
15898
+ CVariable* newVarHi = m_currShader->GetNewAlias(newVar, newVar->GetType(), 0, esz);
15899
+ CVariable* newVarLo = m_currShader->GetNewAlias(newVar, newVar->GetType(), bytes, esz);
15913
15900
15914
- m_encoder->SetNoMask();
15915
- m_encoder->SetUniformSIMDSize(lanesToSIMDMode(ExecSz));
15916
- m_encoder->SetSrcRegion(0, 0, 1, 0);
15917
- m_encoder->Copy(newVar, DataVar);
15918
- m_encoder->Push();
15901
+ m_encoder->SetNoMask();
15902
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15903
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15904
+ m_encoder->Copy(newVarHi, DataVar);
15905
+ m_encoder->Push();
15906
+
15907
+ m_encoder->SetNoMask();
15908
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(esz));
15909
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15910
+ m_encoder->Copy(newVarLo, DataVar);
15911
+ m_encoder->Push();
15912
+ }
15913
+ else
15914
+ {
15915
+ m_encoder->SetNoMask();
15916
+ m_encoder->SetUniformSIMDSize(lanesToSIMDMode(pow2NElts));
15917
+ m_encoder->SetSrcRegion(0, 0, 1, 0);
15918
+ m_encoder->Copy(newVar, DataVar);
15919
+ m_encoder->Push();
15920
+ }
15919
15921
}
15920
15922
15921
- if (!DataVar->IsImmediate() && NElts > 1)
15923
+ if (!initWithElem0 || NElts != 1)
15922
15924
{
15923
- // Copy values over, the elements from NElts to ExecSz-1 are set to the first element
15924
- // in the initialization above.
15925
15925
emitVectorCopy(newVar, DataVar, NElts);
15926
15926
}
15927
15927
return newVar;
0 commit comments