@@ -4772,35 +4772,24 @@ void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst)
4772
4772
m_encoder->Push();
4773
4773
4774
4774
// Emit mov with direct addressing when delta is a compile-time constant.
4775
- const bool useDirectAddressng = pDelta->IsImmediate()
4775
+ const bool useDirectAddressing = pDelta->IsImmediate()
4776
4776
&& m_currShader->m_Platform->GetPlatformFamily() != IGFX_GEN8_CORE;
4777
- if (useDirectAddressng && m_SimdMode == SIMDMode::SIMD8)
4778
- {
4779
- const uint dataIndex = pDelta->GetImmediateValue() % nbElements;
4780
4777
4781
- m_encoder->SetSrcRegion(0, 1, 1, 0);
4782
- m_encoder->SetSrcSubReg(0, dataIndex);
4783
- m_encoder->Copy(m_destination, pCombinedData);
4784
- m_encoder->Push();
4785
- return;
4786
- }
4787
- if (useDirectAddressng && m_SimdMode == SIMDMode::SIMD16)
4778
+ auto nativeExecSize = numLanes(m_currShader->m_Platform->getMinDispatchMode());
4779
+ auto width = numLanes(m_SimdMode);
4780
+ if (useDirectAddressing && nativeExecSize * 2 >= width)
4788
4781
{
4789
4782
const uint dataIndex = pDelta->GetImmediateValue() % nbElements;
4790
-
4791
- m_encoder->SetSimdSize(SIMDMode::SIMD8);
4792
- m_encoder->SetSrcRegion(0, 1, 1, 0);
4793
- m_encoder->SetSrcSubReg(0, dataIndex);
4794
- m_encoder->Copy(m_destination, pCombinedData);
4795
- m_encoder->Push();
4796
-
4797
- m_encoder->SetSimdSize(SIMDMode::SIMD8);
4798
- m_encoder->SetSrcRegion(0, 1, 1, 0);
4799
- m_encoder->SetSrcSubReg(0, dataIndex + 8);
4800
- m_encoder->SetDstSubReg(8);
4801
- m_encoder->Copy(m_destination, pCombinedData);
4802
- m_encoder->Push();
4803
-
4783
+ int tripCount = width <= nativeExecSize ? 1 : 2;
4784
+ for (int i = 0; i < tripCount; ++i)
4785
+ {
4786
+ m_encoder->SetSimdSize(m_currShader->m_Platform->getMinDispatchMode());
4787
+ m_encoder->SetSrcRegion(0, 1, 1, 0);
4788
+ m_encoder->SetSrcSubReg(0, dataIndex + nativeExecSize * i);
4789
+ m_encoder->SetDstSubReg(nativeExecSize * i);
4790
+ m_encoder->Copy(m_destination, pCombinedData);
4791
+ m_encoder->Push();
4792
+ }
4804
4793
return;
4805
4794
}
4806
4795
@@ -4844,20 +4833,12 @@ void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst)
4844
4833
m_encoder->Add(pLaneId, pLaneId, imm1);
4845
4834
m_encoder->Push();
4846
4835
4847
- m_encoder->SetDstSubVar(1 );
4848
- m_encoder->SetSimdSize(SIMDMode::SIMD8 );
4836
+ m_encoder->SetSimdSize(SIMDMode::SIMD16 );
4837
+ m_encoder->SetDstSubReg(16 );
4849
4838
m_encoder->SetNoMask();
4850
4839
imm1 = m_currShader->ImmToVariable(0x10, ISA_TYPE_UD);
4851
4840
m_encoder->Add(pLaneId, pLaneId, imm1);
4852
4841
m_encoder->Push();
4853
-
4854
- m_encoder->SetDstSubVar(1);
4855
- m_encoder->SetDstSubReg(8);
4856
- m_encoder->SetSimdSize(SIMDMode::SIMD8);
4857
- m_encoder->SetNoMask();
4858
- imm1 = m_currShader->ImmToVariable(0x18, ISA_TYPE_UD);
4859
- m_encoder->Add(pLaneId, pLaneId, imm1);
4860
- m_encoder->Push();
4861
4842
}
4862
4843
4863
4844
CVariable* pShuffleIdx = m_currShader->GetNewVariable(
0 commit comments