@@ -6466,7 +6466,7 @@ void EmitPass::emitLSCSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal
6466
6466
uint32_t bytesToRead = getLSCBlockMsgSize(bytesRemaining, m_currShader->m_Platform->getMaxLSCBlockMsgSize(isD64));
6467
6467
uint32_t nBlks = (bytesToRead * 8) / blkBits;
6468
6468
6469
- emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset);
6469
+ emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset, 1 );
6470
6470
m_encoder->Push();
6471
6471
6472
6472
bytesRemaining -= bytesToRead;
@@ -9913,6 +9913,7 @@ void EmitPass::emitLoadRawIndexed(
9913
9913
bufPtrv,
9914
9914
varOffset,
9915
9915
immOffset,
9916
+ nullptr,
9916
9917
cacheOpts,
9917
9918
addrSpace);
9918
9919
return;
@@ -10207,7 +10208,8 @@ void EmitPass::emitLoad3DInner(LdRawIntrinsic* inst, ResourceDescriptor& resourc
10207
10208
void EmitPass::emitLoad(
10208
10209
LoadInst *inst,
10209
10210
Value *offset,
10210
- ConstantInt *immOffset
10211
+ ConstantInt *immOffset,
10212
+ ConstantInt *immScale
10211
10213
) {
10212
10214
if (m_pCtx->getModuleMetaData()->isHDCFastClearShader)
10213
10215
{
@@ -10221,15 +10223,19 @@ void EmitPass::emitLoad(
10221
10223
translateLSCCacheControlsFromMetadata(inst, true);
10222
10224
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->m_UserAddrSpaceMD.
10223
10225
Get(inst);
10226
+
10224
10227
emitLSCVectorLoad(
10225
10228
inst,
10226
10229
inst->getPointerOperand(),
10227
10230
offset,
10228
10231
immOffset,
10232
+ immScale,
10229
10233
cacheOpts,
10230
10234
addrSpace);
10231
10235
return;
10232
10236
}
10237
+ IGC_ASSERT_MESSAGE(immScale ? immScale->getSExtValue() == 1 : true,
10238
+ "Immediate Scale not supported on non-LSC path!");
10233
10239
emitVectorLoad(inst, offset, immOffset);
10234
10240
}
10235
10241
@@ -10619,7 +10625,7 @@ void EmitPass::ReadStackDataBlocks(StackDataBlocks& blkData, uint offsetS)
10619
10625
// Fixme: Is it possible for args to be non 8byte aligned?
10620
10626
IGC_ASSERT_MESSAGE(RmnBytes == 8, "Minimum LSC block size is 8 bytes");
10621
10627
}
10622
- emitLSCLoad(nullptr, LdDst, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset);
10628
+ emitLSCLoad(nullptr, LdDst, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset, 1 );
10623
10629
m_encoder->Push();
10624
10630
}
10625
10631
else
@@ -17841,7 +17847,8 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17841
17847
ResourceDescriptor &Resource,
17842
17848
CVariable *Dest,
17843
17849
CVariable *Offset, int ImmOffset,
17844
- uint32_t NumElts, uint32_t EltBytes,
17850
+ int ImmScale, uint32_t NumElts,
17851
+ uint32_t EltBytes,
17845
17852
LSC_DOC_ADDR_SPACE addrSpace) {
17846
17853
// NumElts must be 1 !
17847
17854
IGC_ASSERT(NumElts == 1 && (EltBytes == 1 || EltBytes == 2));
@@ -17897,7 +17904,7 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17897
17904
emitLSCLoad(cacheOpts, gatherDst,
17898
17905
eOffset, EltBytes * 8, 1, 0, &Resource,
17899
17906
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17900
- LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, addrSpace);
17907
+ LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
17901
17908
m_encoder->Push();
17902
17909
});
17903
17910
@@ -17917,12 +17924,14 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17917
17924
// (max size = UQ x 64 = 512 bytes)
17918
17925
// 2. sub-DW-aligned load, vectorSize is 1|2|3|4|8
17919
17926
// (max size = UQ x 8 = 64 bytes)
17920
- void EmitPass::emitLSCVectorLoad_uniform(
17921
- LSC_CACHE_OPTS cacheOpts, bool UseA32,
17922
- ResourceDescriptor& Resource, CVariable* Dest, CVariable* Offset, int ImmOffset,
17923
- uint32_t NumElts, uint32_t EltBytes, uint64_t Align, uint32_t Addrspace,
17924
- LSC_DOC_ADDR_SPACE userAddrSpace)
17925
- {
17927
+ void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17928
+ ResourceDescriptor &Resource,
17929
+ CVariable *Dest,
17930
+ CVariable *Offset, int ImmOffset,
17931
+ int ImmScale, uint32_t NumElts,
17932
+ uint32_t EltBytes, uint64_t Align,
17933
+ uint32_t Addrspace,
17934
+ LSC_DOC_ADDR_SPACE userAddrSpace) {
17926
17935
IGC_ASSERT(Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));
17927
17936
CVariable* eOffset = Offset;
17928
17937
CVariable* ldDest = Dest;
@@ -17960,10 +17969,11 @@ void EmitPass::emitLSCVectorLoad_uniform(
17960
17969
}
17961
17970
17962
17971
m_encoder->SetNoMask();
17963
- emitLSCLoad(
17964
- cacheOpts, tDest, eOffset, dSize * 8, vSize, 0, &Resource,
17965
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17966
- LSC_DATA_ORDER_TRANSPOSE, ImmOffset, userAddrSpace);
17972
+ emitLSCLoad(cacheOpts, tDest,
17973
+ eOffset, dSize * 8, vSize, 0, &Resource,
17974
+ UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17975
+ LSC_DATA_ORDER_TRANSPOSE, ImmOffset, ImmScale,
17976
+ userAddrSpace);
17967
17977
m_encoder->Push();
17968
17978
17969
17979
if (needTemp)
@@ -18000,10 +18010,11 @@ void EmitPass::emitLSCVectorLoad_uniform(
18000
18010
}
18001
18011
18002
18012
m_encoder->SetNoMask();
18003
- emitLSCLoad(
18004
- cacheOpts, tDest, nEOff, dSize * 8, 1, 0, &Resource,
18005
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18006
- LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, userAddrSpace);
18013
+ emitLSCLoad(cacheOpts, tDest,
18014
+ nEOff, dSize * 8, 1, 0, &Resource,
18015
+ UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18016
+ LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale,
18017
+ userAddrSpace);
18007
18018
m_encoder->Push();
18008
18019
18009
18020
if (needTemp)
@@ -18016,6 +18027,7 @@ void EmitPass::emitLSCVectorLoad_uniform(
18016
18027
void EmitPass::emitLSCVectorLoad(Instruction* inst,
18017
18028
Value *Ptr,
18018
18029
Value *varOffset, ConstantInt *immOffset,
18030
+ ConstantInt *immScale,
18019
18031
LSC_CACHE_OPTS cacheOpts,
18020
18032
LSC_DOC_ADDR_SPACE addrSpace) {
18021
18033
Type *Ty = inst->getType();
@@ -18050,23 +18062,26 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18050
18062
18051
18063
const int immOffsetInt =
18052
18064
immOffset ? static_cast<int>(immOffset->getSExtValue()) : 0;
18065
+ const int immScaleInt =
18066
+ immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
18053
18067
18054
18068
// 1. handle cases eltBytes < 4
18055
18069
if (eltBytes < 4)
18056
18070
{
18057
18071
IGC_ASSERT(elts == 1);
18072
+ // todo handle pjurek scale
18058
18073
emitLSCVectorLoad_subDW(cacheOpts, useA32, resource, destCVar,
18059
- eOffset, immOffsetInt, 1, eltBytes, addrSpace);
18074
+ eOffset, immOffsetInt, immScaleInt, 1, eltBytes, addrSpace);
18060
18075
return;
18061
18076
}
18062
18077
18063
18078
// 2. Handle uniform load
18064
18079
if (srcUniform && resource.m_resource->IsUniform())
18065
18080
{
18066
- emitLSCVectorLoad_uniform(
18067
- cacheOpts, useA32 ,
18068
- resource, destCVar, eOffset, immOffsetInt, elts, eltBytes, align,
18069
- ptrType->getPointerAddressSpace(), addrSpace);
18081
+ emitLSCVectorLoad_uniform(cacheOpts, useA32, resource, destCVar,
18082
+ eOffset, immOffsetInt, immScaleInt, elts ,
18083
+ eltBytes, align,
18084
+ ptrType->getPointerAddressSpace(), addrSpace);
18070
18085
return;
18071
18086
}
18072
18087
@@ -18129,13 +18144,13 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18129
18144
case VectorMessage::MESSAGE_A32_LSC_RW:
18130
18145
emitLSCLoad(
18131
18146
cacheOpts, gatherDst, rawAddrVar, blkBits, numBlks, 0, &resource,
18132
- LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetInt, addrSpace);
18147
+ LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetInt, immScaleInt, addrSpace);
18133
18148
break;
18134
18149
case VectorMessage::MESSAGE_A64_LSC_RW:
18135
18150
emitLSCLoad(cacheOpts, gatherDst,
18136
18151
rawAddrVar, blkBits, numBlks, 0, &resource,
18137
18152
LSC_ADDR_SIZE_64b, LSC_DATA_ORDER_NONTRANSPOSE,
18138
- immOffsetInt, addrSpace);
18153
+ immOffsetInt, immScaleInt, addrSpace);
18139
18154
break;
18140
18155
default:
18141
18156
IGC_ASSERT_MESSAGE(0, "Internal Error: unexpected message kind for load!");
@@ -21158,7 +21173,7 @@ void EmitPass::emitLscIntrinsicLoad(llvm::GenIntrinsicInst* inst)
21158
21173
LSC_LOAD, gatherDst,
21159
21174
offset, dataSize, fragElems, 0, &resource,
21160
21175
useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
21161
- LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, cacheOpts, addrSpace);
21176
+ LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
21162
21177
}
21163
21178
m_encoder->Push();
21164
21179
});
@@ -21226,7 +21241,7 @@ void EmitPass::emitLscIntrinsicPrefetch(llvm::GenIntrinsicInst* inst)
21226
21241
lscOp, fragDst,
21227
21242
offset, dataSize, fragElems, 0, &resource,
21228
21243
useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
21229
- LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, cacheOpts, addrSpace);
21244
+ LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
21230
21245
m_encoder->Push();
21231
21246
});
21232
21247
//
@@ -21345,13 +21360,14 @@ void EmitPass::emitLSCLoad(
21345
21360
ResourceDescriptor* resource,
21346
21361
LSC_ADDR_SIZE addr_size,
21347
21362
LSC_DATA_ORDER data_order,
21348
- int immOffset)
21363
+ int immOffset,
21364
+ int immScale)
21349
21365
{
21350
21366
LSC_CACHE_OPTS cacheOpts =
21351
21367
translateLSCCacheControlsFromMetadata(inst, true);
21352
21368
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->m_UserAddrSpaceMD.Get(inst);
21353
21369
emitLSCLoad(cacheOpts, dst, offset, elemSize, numElems, blockOffset,
21354
- resource, addr_size, data_order, immOffset, addrSpace);
21370
+ resource, addr_size, data_order, immOffset, immScale, addrSpace);
21355
21371
}
21356
21372
21357
21373
void EmitPass::emitLSCLoad(
@@ -21365,13 +21381,14 @@ void EmitPass::emitLSCLoad(
21365
21381
LSC_ADDR_SIZE addr_size,
21366
21382
LSC_DATA_ORDER data_order,
21367
21383
int immOffset,
21384
+ int immScale,
21368
21385
LSC_DOC_ADDR_SPACE addrSpace)
21369
21386
{
21370
21387
LSC_DATA_SIZE elemSizeEnum = m_encoder->LSC_GetElementSize(elemSize);
21371
21388
LSC_DATA_ELEMS numElemsEnum = m_encoder->LSC_GetElementNum(numElems);
21372
21389
m_encoder->LSC_LoadGather(LSC_LOAD, dst,
21373
21390
offset, elemSizeEnum, numElemsEnum, blockOffset,
21374
- resource, addr_size, data_order, immOffset,
21391
+ resource, addr_size, data_order, immOffset, immScale,
21375
21392
cacheOpts, addrSpace);
21376
21393
}
21377
21394
0 commit comments