@@ -9900,7 +9900,8 @@ void EmitPass::emitLoadRawIndexed(
9900
9900
immOffset,
9901
9901
nullptr,
9902
9902
cacheOpts,
9903
- addrSpace);
9903
+ addrSpace
9904
+ );
9904
9905
return;
9905
9906
}
9906
9907
IGC_ASSERT(immOffset == nullptr);
@@ -10216,11 +10217,13 @@ void EmitPass::emitLoad(
10216
10217
immOffset,
10217
10218
immScale,
10218
10219
cacheOpts,
10219
- addrSpace);
10220
+ addrSpace
10221
+ );
10220
10222
return;
10221
10223
}
10222
10224
IGC_ASSERT_MESSAGE(immScale ? immScale->getSExtValue() == 1 : true,
10223
10225
"Immediate Scale not supported on non-LSC path!");
10226
+
10224
10227
emitVectorLoad(inst, offset, immOffset);
10225
10228
}
10226
10229
@@ -17824,13 +17827,13 @@ CVariable* EmitPass::prepareDataForUniform(
17824
17827
return newVar;
17825
17828
}
17826
17829
17827
- void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts , bool UseA32,
17830
+ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS CacheOpts , bool UseA32,
17828
17831
ResourceDescriptor &Resource,
17829
17832
CVariable *Dest,
17830
17833
CVariable *Offset, int ImmOffset,
17831
17834
int ImmScale, uint32_t NumElts,
17832
17835
uint32_t EltBytes,
17833
- LSC_DOC_ADDR_SPACE addrSpace ) {
17836
+ LSC_DOC_ADDR_SPACE AddrSpace ) {
17834
17837
// NumElts must be 1 !
17835
17838
IGC_ASSERT(NumElts == 1 && (EltBytes == 1 || EltBytes == 2));
17836
17839
@@ -17866,6 +17869,14 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17866
17869
eOffset = BroadcastIfUniform(eOffset);
17867
17870
}
17868
17871
17872
+ LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
17873
+ if (UseA32) {
17874
+ addressSize = LSC_ADDR_SIZE_32b;
17875
+ }
17876
+ else {
17877
+ addressSize = LSC_ADDR_SIZE_64b;
17878
+ }
17879
+
17869
17880
// Need a temp as Dest is smaller than DW.
17870
17881
e_alignment dataAlign = ((4 * alloc_nbelts) <= (uint32_t)getGRFSize()) ? EALIGN_GRF : EALIGN_2GRF;
17871
17882
CVariable* gatherDst =
@@ -17882,10 +17893,9 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17882
17893
else
17883
17894
m_encoder->SetPredicate(flag);
17884
17895
17885
- emitLSCLoad(cacheOpts, gatherDst,
17886
- eOffset, EltBytes * 8, 1, 0, &Resource,
17887
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17888
- LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
17896
+ emitLSCLoad(CacheOpts, gatherDst,
17897
+ eOffset, EltBytes * 8, 1, 0, &Resource, addressSize,
17898
+ LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, AddrSpace);
17889
17899
m_encoder->Push();
17890
17900
});
17891
17901
@@ -17905,16 +17915,14 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17905
17915
// (max size = UQ x 64 = 512 bytes)
17906
17916
// 2. sub-DW-aligned load, vectorSize is 1|2|3|4|8
17907
17917
// (max size = UQ x 8 = 64 bytes)
17908
- void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17909
- ResourceDescriptor &Resource,
17910
- CVariable *Dest,
17911
- CVariable *Offset, int ImmOffset,
17912
- int ImmScale, uint32_t NumElts,
17913
- uint32_t EltBytes, uint64_t Align,
17914
- uint32_t Addrspace,
17915
- LSC_DOC_ADDR_SPACE userAddrSpace) {
17918
+ void EmitPass::emitLSCVectorLoad_uniform(
17919
+ LSC_CACHE_OPTS cacheOpts, bool UseA32, ResourceDescriptor &Resource,
17920
+ CVariable *Dest,
17921
+ CVariable *Offset, int ImmOffset,
17922
+ int ImmScale, uint32_t NumElts, uint32_t EltBytes, uint64_t Align,
17923
+ uint32_t Addrspace, LSC_DOC_ADDR_SPACE userAddrSpace) {
17916
17924
IGC_ASSERT(Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));
17917
- CVariable* eOffset = Offset;
17925
+ CVariable * eOffset = Offset;
17918
17926
CVariable* ldDest = Dest;
17919
17927
uint32_t dSize = EltBytes; // lsc's data size
17920
17928
uint32_t vSize = NumElts; // lsc's vector size
@@ -17934,6 +17942,16 @@ void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17934
17942
bool destUniform = Dest->IsUniform();
17935
17943
IGC_ASSERT((vSize <= 64 && (vSize <= 8 || isPowerOf2_32(vSize))));
17936
17944
17945
+ LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
17946
+ if (UseA32)
17947
+ {
17948
+ addressSize = LSC_ADDR_SIZE_32b;
17949
+ }
17950
+ else
17951
+ {
17952
+ addressSize = LSC_ADDR_SIZE_64b;
17953
+ }
17954
+
17937
17955
// 1. Do a SIMT1 transposed load
17938
17956
if ((isPowerOf2_32(vSize) || vSize == 3) && vSize <= 64 &&
17939
17957
((Align >= 8 && dSize == 8) || (Align >= 4 && dSize == 4)))
@@ -17952,7 +17970,7 @@ void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17952
17970
m_encoder->SetNoMask();
17953
17971
emitLSCLoad(cacheOpts, tDest,
17954
17972
eOffset, dSize * 8, vSize, 0, &Resource,
17955
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
17973
+ addressSize ,
17956
17974
LSC_DATA_ORDER_TRANSPOSE, ImmOffset, ImmScale,
17957
17975
userAddrSpace);
17958
17976
m_encoder->Push();
@@ -17993,7 +18011,7 @@ void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17993
18011
m_encoder->SetNoMask();
17994
18012
emitLSCLoad(cacheOpts, tDest,
17995
18013
nEOff, dSize * 8, 1, 0, &Resource,
17996
- UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
18014
+ addressSize ,
17997
18015
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale,
17998
18016
userAddrSpace);
17999
18017
m_encoder->Push();
@@ -18007,10 +18025,12 @@ void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
18007
18025
18008
18026
void EmitPass::emitLSCVectorLoad(Instruction* inst,
18009
18027
Value *Ptr,
18010
- Value *varOffset, ConstantInt *immOffset,
18028
+ Value *varOffset,
18029
+ ConstantInt *immOffset,
18011
18030
ConstantInt *immScale,
18012
18031
LSC_CACHE_OPTS cacheOpts,
18013
- LSC_DOC_ADDR_SPACE addrSpace) {
18032
+ LSC_DOC_ADDR_SPACE addrSpace
18033
+ ) {
18014
18034
Type *Ty = inst->getType();
18015
18035
uint64_t align = 0;
18016
18036
if (auto LI = dyn_cast<LoadInst>(inst))
@@ -18050,18 +18070,17 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18050
18070
if (eltBytes < 4)
18051
18071
{
18052
18072
IGC_ASSERT(elts == 1);
18053
- // todo handle pjurek scale
18054
18073
emitLSCVectorLoad_subDW(cacheOpts, useA32, resource, destCVar,
18055
- eOffset, immOffsetInt, immScaleInt, 1, eltBytes, addrSpace);
18074
+ eOffset, immOffsetInt,
18075
+ immScaleInt, 1, eltBytes, addrSpace);
18056
18076
return;
18057
18077
}
18058
18078
18059
18079
// 2. Handle uniform load
18060
- if (srcUniform && resource.m_resource->IsUniform())
18061
- {
18080
+ if (srcUniform && resource.m_resource->IsUniform()) {
18062
18081
emitLSCVectorLoad_uniform(cacheOpts, useA32, resource, destCVar,
18063
- eOffset, immOffsetInt, immScaleInt, elts,
18064
- eltBytes, align,
18082
+ eOffset, immOffsetInt,
18083
+ immScaleInt, elts, eltBytes, align,
18065
18084
ptrType->getPointerAddressSpace(), addrSpace);
18066
18085
return;
18067
18086
}
@@ -18121,21 +18140,24 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18121
18140
}
18122
18141
VectorMessage::MESSAGE_KIND messageType = VecMessInfo.insts[i].kind;
18123
18142
m_encoder->SetPredicate(flag);
18143
+ LSC_ADDR_SIZE addressSize = LSC_ADDR_SIZE_INVALID;
18144
+
18124
18145
switch (messageType) {
18125
18146
case VectorMessage::MESSAGE_A32_LSC_RW:
18126
- emitLSCLoad(
18127
- cacheOpts, gatherDst, rawAddrVar, blkBits, numBlks, 0, &resource,
18128
- LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetInt, immScaleInt, addrSpace);
18147
+ addressSize = LSC_ADDR_SIZE_32b;
18129
18148
break;
18130
18149
case VectorMessage::MESSAGE_A64_LSC_RW:
18131
- emitLSCLoad(cacheOpts, gatherDst,
18132
- rawAddrVar, blkBits, numBlks, 0, &resource,
18133
- LSC_ADDR_SIZE_64b, LSC_DATA_ORDER_NONTRANSPOSE,
18134
- immOffsetInt, immScaleInt, addrSpace);
18150
+ addressSize = LSC_ADDR_SIZE_64b;
18135
18151
break;
18136
18152
default:
18137
- IGC_ASSERT_MESSAGE(0, "Internal Error: unexpected message kind for load!");
18153
+ IGC_ASSERT_MESSAGE(
18154
+ 0, "Internal Error: unexpected message kind for load!");
18138
18155
}
18156
+
18157
+ emitLSCLoad(cacheOpts, gatherDst,
18158
+ rawAddrVar, blkBits, numBlks, 0, &resource,
18159
+ addressSize, LSC_DATA_ORDER_NONTRANSPOSE,
18160
+ immOffsetInt, immScaleInt, addrSpace);
18139
18161
m_encoder->Push();
18140
18162
18141
18163
if (needTemp)
0 commit comments