@@ -11347,7 +11347,8 @@ void EmitPass::emitStoreRawIndexed(
11347
11347
cacheOpts,
11348
11348
inst->getAlignment(),
11349
11349
false,
11350
- addrSpace);
11350
+ addrSpace
11351
+ );
11351
11352
return;
11352
11353
}
11353
11354
IGC_ASSERT(immOffset == nullptr);
@@ -11527,7 +11528,8 @@ void EmitPass::emitStore(
11527
11528
cacheOpts,
11528
11529
IGCLLVM::getAlignmentValue(inst),
11529
11530
inst->getMetadata("enable.vmask"),
11530
- addrSpace);
11531
+ addrSpace
11532
+ );
11531
11533
return;
11532
11534
}
11533
11535
IGC_ASSERT_MESSAGE(immScale ? immScale->getSExtValue() == 1 : true,
@@ -17833,7 +17835,9 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS CacheOpts, bool UseA32,
17833
17835
CVariable *Offset, int ImmOffset,
17834
17836
int ImmScale, uint32_t NumElts,
17835
17837
uint32_t EltBytes,
17836
- LSC_DOC_ADDR_SPACE AddrSpace) {
17838
+ LSC_DOC_ADDR_SPACE AddrSpace,
17839
+ LSC_ADDR_SIZE AddrSize
17840
+ ) {
17837
17841
// NumElts must be 1 !
17838
17842
IGC_ASSERT(NumElts == 1 && (EltBytes == 1 || EltBytes == 2));
17839
17843
@@ -17869,14 +17873,6 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS CacheOpts, bool UseA32,
17869
17873
eOffset = BroadcastIfUniform(eOffset);
17870
17874
}
17871
17875
17872
- LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
17873
- if (UseA32) {
17874
- addressSize = LSC_ADDR_SIZE_32b;
17875
- }
17876
- else {
17877
- addressSize = LSC_ADDR_SIZE_64b;
17878
- }
17879
-
17880
17876
// Need a temp as Dest is smaller than DW.
17881
17877
e_alignment dataAlign = ((4 * alloc_nbelts) <= (uint32_t)getGRFSize()) ? EALIGN_GRF : EALIGN_2GRF;
17882
17878
CVariable* gatherDst =
@@ -17894,7 +17890,7 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS CacheOpts, bool UseA32,
17894
17890
m_encoder->SetPredicate(flag);
17895
17891
17896
17892
emitLSCLoad(CacheOpts, gatherDst,
17897
- eOffset, EltBytes * 8, 1, 0, &Resource, addressSize ,
17893
+ eOffset, EltBytes * 8, 1, 0, &Resource, AddrSize ,
17898
17894
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, AddrSpace);
17899
17895
m_encoder->Push();
17900
17896
});
@@ -17916,11 +17912,12 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS CacheOpts, bool UseA32,
17916
17912
// 2. sub-DW-aligned load, vectorSize is 1|2|3|4|8
17917
17913
// (max size = UQ x 8 = 64 bytes)
17918
17914
void EmitPass::emitLSCVectorLoad_uniform(
17919
- LSC_CACHE_OPTS cacheOpts , bool UseA32, ResourceDescriptor &Resource,
17915
+ LSC_CACHE_OPTS CacheOpts , bool UseA32, ResourceDescriptor &Resource,
17920
17916
CVariable *Dest,
17921
- CVariable *Offset, int ImmOffset,
17922
- int ImmScale, uint32_t NumElts, uint32_t EltBytes, uint64_t Align,
17923
- uint32_t Addrspace, LSC_DOC_ADDR_SPACE userAddrSpace) {
17917
+ CVariable *Offset, int ImmOffset, int ImmScale, uint32_t NumElts,
17918
+ uint32_t EltBytes, uint64_t Align, uint32_t Addrspace,
17919
+ LSC_DOC_ADDR_SPACE UserAddrSpace, LSC_ADDR_SIZE AddrSize) {
17920
+
17924
17921
IGC_ASSERT(Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));
17925
17922
CVariable *eOffset = Offset;
17926
17923
CVariable* ldDest = Dest;
@@ -17942,16 +17939,6 @@ void EmitPass::emitLSCVectorLoad_uniform(
17942
17939
bool destUniform = Dest->IsUniform();
17943
17940
IGC_ASSERT((vSize <= 64 && (vSize <= 8 || isPowerOf2_32(vSize))));
17944
17941
17945
- LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
17946
- if (UseA32)
17947
- {
17948
- addressSize = LSC_ADDR_SIZE_32b;
17949
- }
17950
- else
17951
- {
17952
- addressSize = LSC_ADDR_SIZE_64b;
17953
- }
17954
-
17955
17942
// 1. Do a SIMT1 transposed load
17956
17943
if ((isPowerOf2_32(vSize) || vSize == 3) && vSize <= 64 &&
17957
17944
((Align >= 8 && dSize == 8) || (Align >= 4 && dSize == 4)))
@@ -17968,11 +17955,10 @@ void EmitPass::emitLSCVectorLoad_uniform(
17968
17955
}
17969
17956
17970
17957
m_encoder->SetNoMask();
17971
- emitLSCLoad(cacheOpts, tDest,
17972
- eOffset, dSize * 8, vSize, 0, &Resource,
17973
- addressSize,
17958
+ emitLSCLoad(CacheOpts, tDest,
17959
+ eOffset, dSize * 8, vSize, 0, &Resource, AddrSize,
17974
17960
LSC_DATA_ORDER_TRANSPOSE, ImmOffset, ImmScale,
17975
- userAddrSpace );
17961
+ UserAddrSpace );
17976
17962
m_encoder->Push();
17977
17963
17978
17964
if (needTemp)
@@ -18009,11 +17995,11 @@ void EmitPass::emitLSCVectorLoad_uniform(
18009
17995
}
18010
17996
18011
17997
m_encoder->SetNoMask();
18012
- emitLSCLoad(cacheOpts , tDest,
17998
+ emitLSCLoad(CacheOpts , tDest,
18013
17999
nEOff, dSize * 8, 1, 0, &Resource,
18014
- addressSize ,
18000
+ AddrSize ,
18015
18001
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale,
18016
- userAddrSpace );
18002
+ UserAddrSpace );
18017
18003
m_encoder->Push();
18018
18004
18019
18005
if (needTemp)
@@ -18031,6 +18017,7 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18031
18017
LSC_CACHE_OPTS cacheOpts,
18032
18018
LSC_DOC_ADDR_SPACE addrSpace
18033
18019
) {
18020
+
18034
18021
Type *Ty = inst->getType();
18035
18022
uint64_t align = 0;
18036
18023
if (auto LI = dyn_cast<LoadInst>(inst))
@@ -18066,22 +18053,30 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18066
18053
const int immScaleInt =
18067
18054
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
18068
18055
18056
+ LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_INVALID;
18057
+ if (useA32) {
18058
+ addrSize = LSC_ADDR_SIZE_32b;
18059
+ }
18060
+ else {
18061
+ addrSize = LSC_ADDR_SIZE_64b;
18062
+ }
18063
+
18069
18064
// 1. handle cases eltBytes < 4
18070
18065
if (eltBytes < 4)
18071
18066
{
18072
18067
IGC_ASSERT(elts == 1);
18073
18068
emitLSCVectorLoad_subDW(cacheOpts, useA32, resource, destCVar,
18074
- eOffset, immOffsetInt,
18075
- immScaleInt, 1, eltBytes, addrSpace );
18069
+ eOffset, immOffsetInt, immScaleInt, 1, eltBytes,
18070
+ addrSpace, addrSize );
18076
18071
return;
18077
18072
}
18078
18073
18079
18074
// 2. Handle uniform load
18080
18075
if (srcUniform && resource.m_resource->IsUniform()) {
18081
- emitLSCVectorLoad_uniform(cacheOpts, useA32, resource, destCVar,
18082
- eOffset, immOffsetInt ,
18083
- immScaleInt, elts, eltBytes, align,
18084
- ptrType->getPointerAddressSpace(), addrSpace);
18076
+ emitLSCVectorLoad_uniform(
18077
+ cacheOpts, useA32, resource, destCVar ,
18078
+ eOffset, immOffsetInt, immScaleInt, elts, eltBytes, align,
18079
+ ptrType->getPointerAddressSpace(), addrSpace, addrSize );
18085
18080
return;
18086
18081
}
18087
18082
@@ -18138,25 +18133,18 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18138
18133
gatherDst = m_currShader->GetNewAlias(destCVar,
18139
18134
dVisaTy, (uint16_t)eltOffBytes, (uint16_t)nbelts);
18140
18135
}
18141
- VectorMessage::MESSAGE_KIND messageType = VecMessInfo.insts[i].kind;
18136
+
18142
18137
m_encoder->SetPredicate(flag);
18143
- LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
18144
18138
18145
- switch (messageType) {
18146
- case VectorMessage::MESSAGE_A32_LSC_RW:
18147
- addressSize = LSC_ADDR_SIZE_32b;
18148
- break;
18149
- case VectorMessage::MESSAGE_A64_LSC_RW:
18150
- addressSize = LSC_ADDR_SIZE_64b;
18151
- break;
18152
- default:
18153
- IGC_ASSERT_MESSAGE(
18154
- 0, "Internal Error: unexpected message kind for load!");
18155
- }
18139
+ VectorMessage::MESSAGE_KIND messageType = VecMessInfo.insts[i].kind;
18140
+ IGC_ASSERT_MESSAGE(
18141
+ messageType == VectorMessage::MESSAGE_A32_LSC_RW ||
18142
+ messageType == VectorMessage::MESSAGE_A64_LSC_RW,
18143
+ "Internal Error: unexpected message kind for load!");
18156
18144
18157
18145
emitLSCLoad(cacheOpts, gatherDst,
18158
18146
rawAddrVar, blkBits, numBlks, 0, &resource,
18159
- addressSize , LSC_DATA_ORDER_NONTRANSPOSE,
18147
+ addrSize , LSC_DATA_ORDER_NONTRANSPOSE,
18160
18148
immOffsetInt, immScaleInt, addrSpace);
18161
18149
m_encoder->Push();
18162
18150
@@ -18169,13 +18157,15 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18169
18157
}
18170
18158
18171
18159
// Sub-function of emitLSCVectorStore()
18172
- void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts , bool UseA32,
18160
+ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS CacheOpts , bool UseA32,
18173
18161
ResourceDescriptor &Resource,
18174
18162
CVariable *StoreVar, CVariable *Offset,
18175
18163
int ImmOffset, int ImmScale,
18176
18164
uint32_t NumElts, uint32_t EltBytes,
18177
18165
alignment_t Alignment,
18178
- LSC_DOC_ADDR_SPACE addrSpace) {
18166
+ LSC_DOC_ADDR_SPACE AddrSpace,
18167
+ LSC_ADDR_SIZE AddrSize
18168
+ ) {
18179
18169
// NumElts must be 1!
18180
18170
IGC_ASSERT_MESSAGE(NumElts == 1 && (EltBytes == 1 || EltBytes == 2),
18181
18171
"Number of elements must be 1 for an 8bit or 16bit data type in a non-transposed LSC store.");
@@ -18232,12 +18222,11 @@ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18232
18222
{
18233
18223
m_encoder->SetNoMask();
18234
18224
}
18235
- // NumElts = 1
18236
- emitLSCStore(cacheOpts,
18237
- stVar, eOffset, EltBytes * 8, 1, 0, &Resource,
18238
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18239
- LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
18240
- m_encoder->Push();
18225
+ // NumElts = 1
18226
+ emitLSCStore(CacheOpts,
18227
+ stVar, eOffset, EltBytes * 8, 1, 0, &Resource, AddrSize,
18228
+ LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, AddrSpace);
18229
+ m_encoder->Push();
18241
18230
});
18242
18231
18243
18232
return;
@@ -18250,13 +18239,15 @@ void EmitPass::emitLSCVectorStore_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18250
18239
// (max size = UQ x 64 = 512 bytes)
18251
18240
// 2. sub-DW-aligned store, vectorSize is 1|2|3|4|8
18252
18241
// (max size = UQ x 8 = 64 bytes)
18253
- void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts , bool UseA32,
18242
+ void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS CacheOpts , bool UseA32,
18254
18243
ResourceDescriptor &Resource,
18255
18244
CVariable *StoreVar,
18256
18245
CVariable *Offset, int ImmOffset,
18257
18246
int ImmScale, uint32_t NumElts,
18258
18247
uint32_t EltBytes, alignment_t Align,
18259
- LSC_DOC_ADDR_SPACE addrSpace) {
18248
+ LSC_DOC_ADDR_SPACE AddrSpace,
18249
+ LSC_ADDR_SIZE AddrSize
18250
+ ) {
18260
18251
// If needed, can handle non-uniform StoreVar.
18261
18252
IGC_ASSERT(StoreVar->IsUniform() && Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));
18262
18253
@@ -18277,6 +18268,7 @@ void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18277
18268
stVar = m_currShader->GetNewAlias(stVar, ISA_TYPE_UD, 0, 0);
18278
18269
}
18279
18270
18271
+
18280
18272
IGC_ASSERT(vSize <= 64 && (vSize < 8 || isPowerOf2_32(vSize)));
18281
18273
if (Align < 4 || (dSize == 8 && Align < 8) || !(isPowerOf2_32(vSize) || vSize == 3))
18282
18274
{
@@ -18301,11 +18293,11 @@ void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18301
18293
18302
18294
ResourceLoop(Resource, [&](CVariable* /*flag*/) {
18303
18295
m_encoder->SetNoMask();
18304
- emitLSCStore(
18305
- cacheOpts, new_stVar, new_eoff, dSize * 8, 1, 0, &Resource,
18306
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
18307
- LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace );
18308
- m_encoder->Push();
18296
+ emitLSCStore(CacheOpts,
18297
+ new_stVar, new_eoff, dSize * 8, 1, 0, &Resource,
18298
+ AddrSize, LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset ,
18299
+ ImmScale, AddrSpace );
18300
+ m_encoder->Push();
18309
18301
});
18310
18302
return;
18311
18303
}
@@ -18323,10 +18315,9 @@ void EmitPass::emitLSCVectorStore_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18323
18315
ResourceLoop(Resource, [&](CVariable* /*flag*/) {
18324
18316
m_encoder->SetUniformSIMDSize(SIMDMode::SIMD1);
18325
18317
m_encoder->SetNoMask();
18326
- emitLSCStore(cacheOpts, stVar, eOffset, dSize * 8, vSize, 0, &Resource,
18327
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18328
- LSC_DATA_ORDER_TRANSPOSE,
18329
- ImmOffset, ImmScale, addrSpace);
18318
+ emitLSCStore(CacheOpts,
18319
+ stVar, eOffset, dSize * 8, vSize, 0, &Resource, AddrSize,
18320
+ LSC_DATA_ORDER_TRANSPOSE, ImmOffset, ImmScale, AddrSpace);
18330
18321
m_encoder->Push();
18331
18322
});
18332
18323
return;
@@ -18337,7 +18328,10 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18337
18328
ConstantInt *immScale, Value *storedVal,
18338
18329
BasicBlock *BB, LSC_CACHE_OPTS cacheOpts,
18339
18330
alignment_t align, bool dontForceDmask,
18340
- LSC_DOC_ADDR_SPACE addrSpace) {
18331
+ LSC_DOC_ADDR_SPACE addrSpace
18332
+
18333
+ ) {
18334
+
18341
18335
PointerType* ptrType = cast<PointerType>(Ptr->getType());
18342
18336
Type* Ty = storedVal->getType();
18343
18337
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
@@ -18390,6 +18384,14 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18390
18384
const int immScaleVal =
18391
18385
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
18392
18386
18387
+ LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_INVALID;
18388
+ if (useA32) {
18389
+ addrSize = LSC_ADDR_SIZE_32b;
18390
+ }
18391
+ else {
18392
+ addrSize = LSC_ADDR_SIZE_64b;
18393
+ }
18394
+
18393
18395
// 1. handle cases eltBytes < 4
18394
18396
if (eltBytes < 4)
18395
18397
{
@@ -18398,7 +18400,7 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18398
18400
IGC_ASSERT(elts == 1);
18399
18401
emitLSCVectorStore_subDW(cacheOpts, useA32, resource,
18400
18402
storedVar, eOffset, immOffsetVal, immScaleVal,
18401
- 1, eltBytes, align, addrSpace);
18403
+ 1, eltBytes, align, addrSpace, addrSize );
18402
18404
return;
18403
18405
}
18404
18406
@@ -18407,9 +18409,10 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18407
18409
// 2. Handle uniform Store
18408
18410
if (dstUniform && srcUniform)
18409
18411
{
18410
- emitLSCVectorStore_uniform(cacheOpts, useA32, resource, storedVar,
18411
- eOffset, immOffsetVal, immScaleVal, elts,
18412
- eltBytes, align, addrSpace);
18412
+ emitLSCVectorStore_uniform(cacheOpts, useA32, resource,
18413
+ storedVar, eOffset, immOffsetVal,
18414
+ immScaleVal, elts, eltBytes, align,
18415
+ addrSpace, addrSize);
18413
18416
return;
18414
18417
}
18415
18418
@@ -18466,22 +18469,18 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18466
18469
IGC_ASSERT_MESSAGE(nbelts < (UINT16_MAX), "nbelts > higher than 64k");
18467
18470
CVariable* subStoredVar = m_currShader->GetNewAlias(
18468
18471
storedVar, storedType, (uint16_t)eltOffBytes, (uint16_t)nbelts);
18469
- switch (VecMessInfo.insts[i].kind) {
18470
- case VectorMessage::MESSAGE_A32_LSC_RW:
18471
- emitLSCStore(
18472
- cacheOpts, subStoredVar, rawAddrVar, blkBits, numBlks, 0, &resource,
18473
- LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal, immScaleVal, addrSpace);
18474
- break;
18475
- case VectorMessage::MESSAGE_A64_LSC_RW:
18476
- emitLSCStore(cacheOpts,
18477
- subStoredVar, rawAddrVar, blkBits, numBlks, 0,
18478
- &resource, LSC_ADDR_SIZE_64b,
18479
- LSC_DATA_ORDER_NONTRANSPOSE, immOffsetVal,
18480
- immScaleVal, addrSpace);
18481
- break;
18482
- default:
18483
- IGC_ASSERT_MESSAGE(0, "Internal Error: unexpected Message kind for store");
18484
- }
18472
+
18473
+ VectorMessage::MESSAGE_KIND messageType = VecMessInfo.insts[i].kind;
18474
+ IGC_ASSERT_MESSAGE(
18475
+ messageType == VectorMessage::MESSAGE_A32_LSC_RW ||
18476
+ messageType == VectorMessage::MESSAGE_A64_LSC_RW,
18477
+ "Internal Error: unexpected message kind for load!");
18478
+
18479
+ emitLSCStore(cacheOpts,
18480
+ subStoredVar, rawAddrVar, blkBits, numBlks, 0,
18481
+ &resource, addrSize, LSC_DATA_ORDER_NONTRANSPOSE,
18482
+ immOffsetVal, immScaleVal, addrSpace);
18483
+
18485
18484
m_encoder->Push();
18486
18485
}
18487
18486
});
0 commit comments