Skip to content

Commit 0e51d79

Browse files
bcheng0127sys_zuul
authored andcommitted
SIMD media block read/write functionality extension.
Change-Id: I7bd06de93a7f49ed4072c18add4f9352b2309514
1 parent b7ca084 commit 0e51d79

File tree

1 file changed

+97
-9
lines changed

1 file changed

+97
-9
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 97 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5509,7 +5509,9 @@ void EmitPass::emitSimdMediaBlockRead(llvm::Instruction* inst)
55095509
else
55105510
{
55115511
m_encoder->Add(pTempVar0, pTempVar0, m_currShader->ImmToVariable(blockWidth, ISA_TYPE_UD));
5512-
dstSubReg = dstSubReg + scale * blockHeight;
5512+
uint32_t subOffset = maxWidth * scale * blockHeight;
5513+
subOffset /= getGRFSize();
5514+
dstSubReg = dstSubReg + subOffset;
55135515
}
55145516
m_encoder->Push();
55155517

@@ -5548,6 +5550,8 @@ void EmitPass::emitSimdMediaBlockRead(llvm::Instruction* inst)
55485550
{
55495551
dstSubReg = 0;
55505552

5553+
uint32_t srcSubReg = 0;
5554+
55515555
// Join data obtained from pass 0 and pass 1 to make
55525556
// xOffset contiguous from 0 to 63 bytes (making SIMD 16)
55535557
// mov (8) r20.0<1>:ud r28.0<8;8,1>:ud {Align1, Q1}
@@ -5559,16 +5563,63 @@ void EmitPass::emitSimdMediaBlockRead(llvm::Instruction* inst)
55595563
// mov (8) r26.0<1>:ud r31.0<8;8,1>:ud {Align1, Q1}
55605564
// mov (8) r27.0<1>:ud r35.0<8;8,1>:ud {Align1, Q2}
55615565

5562-
for (uint32_t i = 0; i < blockHeight; i++)
5563-
{
5564-
for (uint32_t pass = 0; pass < numPasses; pass++)
5566+
5567+
//For 64 bytes GRF, 32 bytes will be extended to
5568+
//.....
5569+
// A0....A1
5570+
// B0....B1
5571+
// C0....C1
5572+
// D0....D1
5573+
// E0....E1
5574+
// F0....F1
5575+
// G0....G1
5576+
// H0....H1
5577+
//
5578+
// r20....A0....B0........r30....A1....B1
5579+
// r21....C0....D0........r31....C1....D1
5580+
// r22....E0....F0........r32....E1....F1
5581+
// r23....G0....H0........r33....G1....H1
5582+
//
5583+
// r40<--r20,....r30
5584+
// r41<--r20.8,r30.8
5585+
// r42<--r21,....r31
5586+
// r43<--r21.8,r31.8
5587+
// r44<--r22,....r32
5588+
// r45<--r22.8,r32.8
5589+
// r46<--r23,....r33
5590+
// r47<--r23.8,r33.8
5591+
//
5592+
//mov (8) r40.0<1>:ud r20.0<8;8,1>:ud {Align1, Q1}
5593+
//mov (8) r40.8<1>:ud r30.0<8;8,1>:ud {Align1, Q1}
5594+
//mov (8) r41<1>:ud r20.8<8;8,1>:ud {Align1, Q1}
5595+
//mov (8) r41.8<1>:ud r30.8<8;8,1>:ud {Align1, Q1}
5596+
5597+
for (uint32_t i = 0; i < blockHeight; i++) //Height
5598+
{
5599+
uint32_t dstSubRegOffset = 0;
5600+
uint32_t srcSubRegOffset = 0;
5601+
5602+
for (uint32_t pass = 0; pass < numPasses; pass++) //Width
55655603
{
55665604
SIMDMode mode = typeSizeInBytes == 8 && blockWidth != 64 ? SIMDMode::SIMD4 : SIMDMode::SIMD8;
55675605
m_encoder->SetSimdSize(mode);
55685606
m_encoder->SetNoMask();
5569-
m_encoder->SetSrcSubVar(0, scale * (i + (blockHeight * pass)));
5607+
5608+
srcSubReg = (scale * (i + (blockHeight * pass)) * maxWidth) / getGRFSize();
5609+
srcSubRegOffset = (i * maxWidth) % getGRFSize();
5610+
5611+
m_encoder->SetSrcSubVar(0, srcSubReg);
5612+
m_encoder->SetSrcSubReg(0, srcSubRegOffset / typeSizeInBytes);
5613+
55705614
m_encoder->SetDstSubVar(dstSubReg);
5571-
dstSubReg += scale;
5615+
m_encoder->SetDstSubReg(dstSubRegOffset / typeSizeInBytes);
5616+
5617+
dstSubRegOffset = ((pass + 1) * maxWidth) % getGRFSize();
5618+
if (dstSubRegOffset == 0)
5619+
{
5620+
dstSubReg += scale;
5621+
}
5622+
55725623
m_encoder->Copy(m_destination, pTempDest);
55735624
m_encoder->Push();
55745625
}
@@ -5641,8 +5692,10 @@ void EmitPass::emitSimdMediaBlockWrite(llvm::Instruction* inst)
56415692
int scale = (blockWidth == 64) ? 2 : 1;
56425693
for (pass = 0; pass < numPasses; pass++)
56435694
{
5644-
uint32_t srcSubVar = pass * scale;
5695+
uint32_t srcSubVar = pass * scale * maxWidth / getGRFSize();
56455696
uint32_t dstSubVar = 0;
5697+
uint32_t srcSubRegOffset = (pass * maxWidth) % getGRFSize();
5698+
uint32_t dstSubRegOffset = 0;
56465699

56475700
CVariable* tempdst = nullptr;
56485701
tempdst = m_currShader->GetNewVariable(
@@ -5655,17 +5708,52 @@ void EmitPass::emitSimdMediaBlockWrite(llvm::Instruction* inst)
56555708
// mov (8) r23.0<1>:d r16.0<8;8,1>:d {Align1, Q1, Compacted}
56565709
// mov (8) r24.0<1>:d r18.0<8;8,1>:d {Align1, Q1, Compacted}
56575710
// mov (8) r25.0<1>:d r20.0<8;8,1>:d {Align1, Q1, Compacted}
5711+
5712+
//FOR 64 bytes GRF:
5713+
// A0....A1....A2....A3........r60....r60.8....r61....r61.8
5714+
// B0....B1....B2....B3........r62....r62.8....r63....r63.8
5715+
// C0....C1....C2....C3........r64....r64.8....r65....r65.8
5716+
// D0....D1....D2....D3........r66....r66.8....r67....r67.8
5717+
// E0....E1....E2....E3........r68....r68.8....r69....r69.8
5718+
// F0....F1....F2....F3........r70....r70.8....r71....r71.8
5719+
// G0....G1....G2....G3........r72....r72.8....r73....r73.8
5720+
// H0....H1....H2....H3........r74....r74.8....r75....r75.8
5721+
//
5722+
// block 0
5723+
// mov (8) r20.0<1>:d r60.0<8;8,1>:d {Align1, Q1, Compacted}
5724+
// mov (8) r20.8<1>:d r62.0<8;8,1>:d {Align1, Q1, Compacted}
5725+
// mov (8) r21.0<1>:d r64.0<8;8,1>:d {Align1, Q1, Compacted}
5726+
// mov (8) r21.8<1>:d rr66.0<8;8,1>:d {Align1, Q1, Compacted}
5727+
// ...
5728+
//block 1
5729+
// mov (8) r30.0<1>:d r60.8<8;8,1>:d {Align1, Q1, Compacted}
5730+
// mov (8) r30.8<1>:d r62.8<8;8,1>:d {Align1, Q1, Compacted}
5731+
// mov (8) r31.0<1>:d r64.8<8;8,1>:d {Align1, Q1, Compacted}
5732+
// mov (8) r31.8<1>:d rr66.8<8;8,1>:d {Align1, Q1, Compacted}
5733+
//...
5734+
56585735
if (numPasses > 1)
56595736
{
56605737
for (uint i = 0; i < nbElements; ++i)
56615738
{
56625739
SIMDMode mode = (typeSizeInBytes == 8 && blockWidth != 64) ? SIMDMode::SIMD4 : SIMDMode::SIMD8;
56635740
m_encoder->SetSimdSize(mode);
56645741
m_encoder->SetNoMask();
5742+
5743+
//Src
56655744
m_encoder->SetSrcSubVar(0, srcSubVar);
5745+
m_encoder->SetSrcSubReg(0, srcSubRegOffset / typeSizeInBytes);
5746+
//Dst
56665747
m_encoder->SetDstSubVar(dstSubVar);
5667-
dstSubVar += scale;
5668-
srcSubVar = srcSubVar + scale * numPasses;
5748+
m_encoder->SetDstSubReg(dstSubRegOffset / typeSizeInBytes);
5749+
//Strides for dst and src
5750+
dstSubRegOffset = ((i + 1) * maxWidth) % getGRFSize();
5751+
if (dstSubRegOffset == 0)
5752+
{
5753+
dstSubVar += scale;
5754+
}
5755+
srcSubVar = srcSubVar + (scale * numPasses * blockWidth / getGRFSize());
5756+
56695757
m_encoder->Copy(tempdst, data);
56705758
m_encoder->Push();
56715759
}

0 commit comments

Comments
 (0)