@@ -5173,6 +5173,7 @@ void EmitPass::emitSimdMediaBlockRead( llvm::Instruction* inst )
5173
5173
blockWidth = 32 ;
5174
5174
}
5175
5175
5176
+
5176
5177
CVariable* pTempVar0 = nullptr ;
5177
5178
CVariable* pTempVar = nullptr ;
5178
5179
@@ -5199,6 +5200,8 @@ void EmitPass::emitSimdMediaBlockRead( llvm::Instruction* inst )
5199
5200
// mov( 1 ) r36.1<1>:d r13.1<0; 1, 0>:d{ Align1, NoMask }
5200
5201
// send( 8 ) r32.0<1>:ud r36 0xc 0x2490000:ud{ Align1, NoMask } // media block read
5201
5202
5203
+ int scale = (blockWidth == 64 ) ? 2 : 1 ;
5204
+
5202
5205
for ( pass = 0 ; pass < numPasses; pass++ )
5203
5206
{
5204
5207
m_encoder->SetSimdSize (SIMDMode::SIMD1);
@@ -5216,8 +5219,8 @@ void EmitPass::emitSimdMediaBlockRead( llvm::Instruction* inst )
5216
5219
}
5217
5220
else
5218
5221
{
5219
- m_encoder->Add (pTempVar0, pTempVar0, m_currShader->ImmToVariable (32 , ISA_TYPE_UD));
5220
- dstSubReg = dstSubReg + blockHeight;
5222
+ m_encoder->Add (pTempVar0, pTempVar0, m_currShader->ImmToVariable (blockWidth , ISA_TYPE_UD));
5223
+ dstSubReg = dstSubReg + scale * blockHeight;
5221
5224
}
5222
5225
m_encoder->Push ();
5223
5226
@@ -5271,10 +5274,12 @@ void EmitPass::emitSimdMediaBlockRead( llvm::Instruction* inst )
5271
5274
{
5272
5275
for (uint32_t pass = 0 ; pass < numPasses; pass++)
5273
5276
{
5274
- m_encoder->SetSimdSize (typeSizeInBytes == 8 ? SIMDMode::SIMD4 : SIMDMode::SIMD8);
5277
+ SIMDMode mode = typeSizeInBytes == 8 && blockWidth != 64 ? SIMDMode::SIMD4 : SIMDMode::SIMD8;
5278
+ m_encoder->SetSimdSize (mode);
5275
5279
m_encoder->SetNoMask ();
5276
- m_encoder->SetSrcSubVar (0 , i + (blockHeight * pass));
5277
- m_encoder->SetDstSubVar (dstSubReg++);
5280
+ m_encoder->SetSrcSubVar (0 , scale * (i + (blockHeight * pass)));
5281
+ m_encoder->SetDstSubVar (dstSubReg);
5282
+ dstSubReg += scale;
5278
5283
m_encoder->Copy (m_destination, pTempDest);
5279
5284
m_encoder->Push ();
5280
5285
}
@@ -5336,14 +5341,16 @@ void EmitPass::emitSimdMediaBlockWrite( llvm::Instruction* inst )
5336
5341
blockWidth = 32 ;
5337
5342
}
5338
5343
5344
+
5339
5345
CVariable* pTempVar0 = nullptr ;
5340
5346
CVariable* pTempVar = nullptr ;
5341
5347
5342
5348
uint32_t dstSubReg = 0 ;
5343
5349
5350
+ int scale = (blockWidth == 64 ) ? 2 : 1 ;
5344
5351
for ( pass = 0 ; pass < numPasses; pass++ )
5345
5352
{
5346
- uint32_t srcSubVar = pass;
5353
+ uint32_t srcSubVar = pass * scale ;
5347
5354
uint32_t dstSubVar = 0 ;
5348
5355
5349
5356
CVariable* tempdst = m_currShader->GetNewVariable (
@@ -5359,12 +5366,14 @@ void EmitPass::emitSimdMediaBlockWrite( llvm::Instruction* inst )
5359
5366
if ( numPasses > 1 )
5360
5367
{
5361
5368
for ( uint i = 0 ; i < nbElements; ++i )
5362
- {
5363
- m_encoder->SetSimdSize (typeSizeInBytes == 8 ? SIMDMode::SIMD4 : SIMDMode::SIMD8);
5369
+ {
5370
+ SIMDMode mode = (typeSizeInBytes == 8 && blockWidth != 64 ) ? SIMDMode::SIMD4 : SIMDMode::SIMD8;
5371
+ m_encoder->SetSimdSize (mode);
5364
5372
m_encoder->SetNoMask ();
5365
5373
m_encoder->SetSrcSubVar ( 0 , srcSubVar );
5366
- m_encoder->SetDstSubVar ( dstSubVar++ );
5367
- srcSubVar = srcSubVar + numPasses;
5374
+ m_encoder->SetDstSubVar ( dstSubVar );
5375
+ dstSubVar += scale;
5376
+ srcSubVar = srcSubVar + scale * numPasses;
5368
5377
m_encoder->Copy ( tempdst, data );
5369
5378
m_encoder->Push ();
5370
5379
}
@@ -5416,9 +5425,9 @@ void EmitPass::emitSimdMediaBlockWrite( llvm::Instruction* inst )
5416
5425
m_encoder->SetSimdSize ( SIMDMode::SIMD1 );
5417
5426
m_encoder->SetNoMask ();
5418
5427
m_encoder->SetSrcRegion ( 0 , 0 , 1 , 0 );
5419
- m_encoder->Add ( pTempVar0, pTempVar0, m_currShader->ImmToVariable ( 32 , ISA_TYPE_UD ) );
5428
+ m_encoder->Add ( pTempVar0, pTempVar0, m_currShader->ImmToVariable ( blockWidth , ISA_TYPE_UD ) );
5420
5429
m_encoder->Push ();
5421
- dstSubReg = dstSubReg + blockHeight;
5430
+ dstSubReg = dstSubReg + scale * blockHeight;
5422
5431
}
5423
5432
5424
5433
m_encoder->SetDstSubVar ( dstSubReg );
0 commit comments