Skip to content

Commit 1a304b3

Browse files
PawelJurekigcbot
authored andcommitted
Fill scale in LSC_ADDR structure for load instructions
LSC_ADDR has scale field that can be used by vISA to multiply the provided offset. This change prepares interfaces for use in emitter to match cases where this could be profitable to emit smaller vISA.
1 parent 16c186e commit 1a304b3

File tree

4 files changed

+60
-38
lines changed

4 files changed

+60
-38
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8139,6 +8139,7 @@ namespace IGC
81398139
LSC_ADDR_SIZE addr_size,
81408140
LSC_DATA_ORDER data_order,
81418141
int immOffset,
8142+
int immScale,
81428143
LSC_CACHE_OPTS cacheOpts,
81438144
LSC_DOC_ADDR_SPACE addrSpace)
81448145
{
@@ -8151,7 +8152,7 @@ namespace IGC
81518152
LSC_ADDR addr{ };
81528153
VISA_VectorOpnd* globalOffsetOpnd = nullptr;
81538154
addr.type = LSC_ADDR_TYPE_FLAT;
8154-
addr.immScale = 1;
8155+
addr.immScale = immScale;
81558156
addr.immOffset = immOffset;
81568157
addr.size = addr_size;
81578158
addr.addrSpace = addrSpace;

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ namespace IGC
261261
LSC_DATA_ELEMS numElems, unsigned blockOffset,
262262
ResourceDescriptor *resource,
263263
LSC_ADDR_SIZE addr_size, LSC_DATA_ORDER data_order,
264-
int immOffset, LSC_CACHE_OPTS cacheOpts,
264+
int immOffset, int immScale, LSC_CACHE_OPTS cacheOpts,
265265
LSC_DOC_ADDR_SPACE addrSpace);
266266
void LSC_StoreScatter(LSC_OP subOp,
267267
CVariable *src, CVariable *offset,

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6466,7 +6466,7 @@ void EmitPass::emitLSCSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal
64666466
uint32_t bytesToRead = getLSCBlockMsgSize(bytesRemaining, m_currShader->m_Platform->getMaxLSCBlockMsgSize(isD64));
64676467
uint32_t nBlks = (bytesToRead * 8) / blkBits;
64686468

6469-
emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset);
6469+
emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
64706470
m_encoder->Push();
64716471

64726472
bytesRemaining -= bytesToRead;
@@ -9913,6 +9913,7 @@ void EmitPass::emitLoadRawIndexed(
99139913
bufPtrv,
99149914
varOffset,
99159915
immOffset,
9916+
nullptr,
99169917
cacheOpts,
99179918
addrSpace);
99189919
return;
@@ -10207,7 +10208,8 @@ void EmitPass::emitLoad3DInner(LdRawIntrinsic* inst, ResourceDescriptor& resourc
1020710208
void EmitPass::emitLoad(
1020810209
LoadInst *inst,
1020910210
Value *offset,
10210-
ConstantInt *immOffset
10211+
ConstantInt *immOffset,
10212+
ConstantInt *immScale
1021110213
) {
1021210214
if (m_pCtx->getModuleMetaData()->isHDCFastClearShader)
1021310215
{
@@ -10221,15 +10223,19 @@ void EmitPass::emitLoad(
1022110223
translateLSCCacheControlsFromMetadata(inst, true);
1022210224
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->m_UserAddrSpaceMD.
1022310225
Get(inst);
10226+
1022410227
emitLSCVectorLoad(
1022510228
inst,
1022610229
inst->getPointerOperand(),
1022710230
offset,
1022810231
immOffset,
10232+
immScale,
1022910233
cacheOpts,
1023010234
addrSpace);
1023110235
return;
1023210236
}
10237+
IGC_ASSERT_MESSAGE(immScale ? immScale->getSExtValue() == 1 : true,
10238+
"Immediate Scale not supported on non-LSC path!");
1023310239
emitVectorLoad(inst, offset, immOffset);
1023410240
}
1023510241

@@ -10619,7 +10625,7 @@ void EmitPass::ReadStackDataBlocks(StackDataBlocks& blkData, uint offsetS)
1061910625
// Fixme: Is it possible for args to be non 8byte aligned?
1062010626
IGC_ASSERT_MESSAGE(RmnBytes == 8, "Minimum LSC block size is 8 bytes");
1062110627
}
10622-
emitLSCLoad(nullptr, LdDst, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset);
10628+
emitLSCLoad(nullptr, LdDst, pSP, blkBits, nBlks, ArgOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_TRANSPOSE, spOffset, 1);
1062310629
m_encoder->Push();
1062410630
}
1062510631
else
@@ -17841,7 +17847,8 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
1784117847
ResourceDescriptor &Resource,
1784217848
CVariable *Dest,
1784317849
CVariable *Offset, int ImmOffset,
17844-
uint32_t NumElts, uint32_t EltBytes,
17850+
int ImmScale, uint32_t NumElts,
17851+
uint32_t EltBytes,
1784517852
LSC_DOC_ADDR_SPACE addrSpace) {
1784617853
// NumElts must be 1 !
1784717854
IGC_ASSERT(NumElts == 1 && (EltBytes == 1 || EltBytes == 2));
@@ -17897,7 +17904,7 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
1789717904
emitLSCLoad(cacheOpts, gatherDst,
1789817905
eOffset, EltBytes * 8, 1, 0, &Resource,
1789917906
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17900-
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, addrSpace);
17907+
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale, addrSpace);
1790117908
m_encoder->Push();
1790217909
});
1790317910

@@ -17917,12 +17924,14 @@ void EmitPass::emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
1791717924
// (max size = UQ x 64 = 512 bytes)
1791817925
// 2. sub-DW-aligned load, vectorSize is 1|2|3|4|8
1791917926
// (max size = UQ x 8 = 64 bytes)
17920-
void EmitPass::emitLSCVectorLoad_uniform(
17921-
LSC_CACHE_OPTS cacheOpts, bool UseA32,
17922-
ResourceDescriptor& Resource, CVariable* Dest, CVariable* Offset, int ImmOffset,
17923-
uint32_t NumElts, uint32_t EltBytes, uint64_t Align, uint32_t Addrspace,
17924-
LSC_DOC_ADDR_SPACE userAddrSpace)
17925-
{
17927+
void EmitPass::emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
17928+
ResourceDescriptor &Resource,
17929+
CVariable *Dest,
17930+
CVariable *Offset, int ImmOffset,
17931+
int ImmScale, uint32_t NumElts,
17932+
uint32_t EltBytes, uint64_t Align,
17933+
uint32_t Addrspace,
17934+
LSC_DOC_ADDR_SPACE userAddrSpace) {
1792617935
IGC_ASSERT(Offset->IsUniform() && (EltBytes == 4 || EltBytes == 8));
1792717936
CVariable* eOffset = Offset;
1792817937
CVariable* ldDest = Dest;
@@ -17960,10 +17969,11 @@ void EmitPass::emitLSCVectorLoad_uniform(
1796017969
}
1796117970

1796217971
m_encoder->SetNoMask();
17963-
emitLSCLoad(
17964-
cacheOpts, tDest, eOffset, dSize * 8, vSize, 0, &Resource,
17965-
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17966-
LSC_DATA_ORDER_TRANSPOSE, ImmOffset, userAddrSpace);
17972+
emitLSCLoad(cacheOpts, tDest,
17973+
eOffset, dSize * 8, vSize, 0, &Resource,
17974+
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
17975+
LSC_DATA_ORDER_TRANSPOSE, ImmOffset, ImmScale,
17976+
userAddrSpace);
1796717977
m_encoder->Push();
1796817978

1796917979
if (needTemp)
@@ -18000,10 +18010,11 @@ void EmitPass::emitLSCVectorLoad_uniform(
1800018010
}
1800118011

1800218012
m_encoder->SetNoMask();
18003-
emitLSCLoad(
18004-
cacheOpts, tDest, nEOff, dSize * 8, 1, 0, &Resource,
18005-
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18006-
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, userAddrSpace);
18013+
emitLSCLoad(cacheOpts, tDest,
18014+
nEOff, dSize * 8, 1, 0, &Resource,
18015+
UseA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
18016+
LSC_DATA_ORDER_NONTRANSPOSE, ImmOffset, ImmScale,
18017+
userAddrSpace);
1800718018
m_encoder->Push();
1800818019

1800918020
if (needTemp)
@@ -18016,6 +18027,7 @@ void EmitPass::emitLSCVectorLoad_uniform(
1801618027
void EmitPass::emitLSCVectorLoad(Instruction* inst,
1801718028
Value *Ptr,
1801818029
Value *varOffset, ConstantInt *immOffset,
18030+
ConstantInt *immScale,
1801918031
LSC_CACHE_OPTS cacheOpts,
1802018032
LSC_DOC_ADDR_SPACE addrSpace) {
1802118033
Type *Ty = inst->getType();
@@ -18050,23 +18062,26 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1805018062

1805118063
const int immOffsetInt =
1805218064
immOffset ? static_cast<int>(immOffset->getSExtValue()) : 0;
18065+
const int immScaleInt =
18066+
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
1805318067

1805418068
// 1. handle cases eltBytes < 4
1805518069
if (eltBytes < 4)
1805618070
{
1805718071
IGC_ASSERT(elts == 1);
18072+
// todo handle pjurek scale
1805818073
emitLSCVectorLoad_subDW(cacheOpts, useA32, resource, destCVar,
18059-
eOffset, immOffsetInt, 1, eltBytes, addrSpace);
18074+
eOffset, immOffsetInt, immScaleInt, 1, eltBytes, addrSpace);
1806018075
return;
1806118076
}
1806218077

1806318078
// 2. Handle uniform load
1806418079
if (srcUniform && resource.m_resource->IsUniform())
1806518080
{
18066-
emitLSCVectorLoad_uniform(
18067-
cacheOpts, useA32,
18068-
resource, destCVar, eOffset, immOffsetInt, elts, eltBytes, align,
18069-
ptrType->getPointerAddressSpace(), addrSpace);
18081+
emitLSCVectorLoad_uniform(cacheOpts, useA32, resource, destCVar,
18082+
eOffset, immOffsetInt, immScaleInt, elts,
18083+
eltBytes, align,
18084+
ptrType->getPointerAddressSpace(), addrSpace);
1807018085
return;
1807118086
}
1807218087

@@ -18129,13 +18144,13 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
1812918144
case VectorMessage::MESSAGE_A32_LSC_RW:
1813018145
emitLSCLoad(
1813118146
cacheOpts, gatherDst, rawAddrVar, blkBits, numBlks, 0, &resource,
18132-
LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetInt, addrSpace);
18147+
LSC_ADDR_SIZE_32b, LSC_DATA_ORDER_NONTRANSPOSE, immOffsetInt, immScaleInt, addrSpace);
1813318148
break;
1813418149
case VectorMessage::MESSAGE_A64_LSC_RW:
1813518150
emitLSCLoad(cacheOpts, gatherDst,
1813618151
rawAddrVar, blkBits, numBlks, 0, &resource,
1813718152
LSC_ADDR_SIZE_64b, LSC_DATA_ORDER_NONTRANSPOSE,
18138-
immOffsetInt, addrSpace);
18153+
immOffsetInt, immScaleInt, addrSpace);
1813918154
break;
1814018155
default:
1814118156
IGC_ASSERT_MESSAGE(0, "Internal Error: unexpected message kind for load!");
@@ -21158,7 +21173,7 @@ void EmitPass::emitLscIntrinsicLoad(llvm::GenIntrinsicInst* inst)
2115821173
LSC_LOAD, gatherDst,
2115921174
offset, dataSize, fragElems, 0, &resource,
2116021175
useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
21161-
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, cacheOpts, addrSpace);
21176+
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
2116221177
}
2116321178
m_encoder->Push();
2116421179
});
@@ -21226,7 +21241,7 @@ void EmitPass::emitLscIntrinsicPrefetch(llvm::GenIntrinsicInst* inst)
2122621241
lscOp, fragDst,
2122721242
offset, dataSize, fragElems, 0, &resource,
2122821243
useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b,
21229-
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, cacheOpts, addrSpace);
21244+
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
2123021245
m_encoder->Push();
2123121246
});
2123221247
//
@@ -21345,13 +21360,14 @@ void EmitPass::emitLSCLoad(
2134521360
ResourceDescriptor* resource,
2134621361
LSC_ADDR_SIZE addr_size,
2134721362
LSC_DATA_ORDER data_order,
21348-
int immOffset)
21363+
int immOffset,
21364+
int immScale)
2134921365
{
2135021366
LSC_CACHE_OPTS cacheOpts =
2135121367
translateLSCCacheControlsFromMetadata(inst, true);
2135221368
LSC_DOC_ADDR_SPACE addrSpace = m_pCtx->m_UserAddrSpaceMD.Get(inst);
2135321369
emitLSCLoad(cacheOpts, dst, offset, elemSize, numElems, blockOffset,
21354-
resource, addr_size, data_order, immOffset, addrSpace);
21370+
resource, addr_size, data_order, immOffset, immScale, addrSpace);
2135521371
}
2135621372

2135721373
void EmitPass::emitLSCLoad(
@@ -21365,13 +21381,14 @@ void EmitPass::emitLSCLoad(
2136521381
LSC_ADDR_SIZE addr_size,
2136621382
LSC_DATA_ORDER data_order,
2136721383
int immOffset,
21384+
int immScale,
2136821385
LSC_DOC_ADDR_SPACE addrSpace)
2136921386
{
2137021387
LSC_DATA_SIZE elemSizeEnum = m_encoder->LSC_GetElementSize(elemSize);
2137121388
LSC_DATA_ELEMS numElemsEnum = m_encoder->LSC_GetElementNum(numElems);
2137221389
m_encoder->LSC_LoadGather(LSC_LOAD, dst,
2137321390
offset, elemSizeEnum, numElemsEnum, blockOffset,
21374-
resource, addr_size, data_order, immOffset,
21391+
resource, addr_size, data_order, immOffset, immScale,
2137521392
cacheOpts, addrSpace);
2137621393
}
2137721394

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ class EmitPass : public llvm::FunctionPass
178178
void emitStore3DInner(llvm::Value* pllValToStore, llvm::Value* pllDstPtr, llvm::Value* pllElmIdx);
179179

180180
void emitLoad(llvm::LoadInst *inst, llvm::Value *varOffset,
181-
llvm::ConstantInt *immOffset
181+
llvm::ConstantInt *immOffset, ConstantInt *immScale = nullptr
182182
); // single load, no pattern
183183
void emitLoad3DInner(llvm::LdRawIntrinsic* inst, ResourceDescriptor& resource, llvm::Value* elemIdxV);
184184

@@ -427,6 +427,7 @@ class EmitPass : public llvm::FunctionPass
427427
void emitLSCVectorLoad(llvm::Instruction *Inst,
428428
llvm::Value *Ptr,
429429
llvm::Value *offset, llvm::ConstantInt *immOffset,
430+
ConstantInt *immScale,
430431
LSC_CACHE_OPTS cacheOpts,
431432
LSC_DOC_ADDR_SPACE addrSpace);
432433
void emitLSCVectorStore(llvm::Value *Ptr,
@@ -544,7 +545,8 @@ class EmitPass : public llvm::FunctionPass
544545
ResourceDescriptor* resource,
545546
LSC_ADDR_SIZE addr_size,
546547
LSC_DATA_ORDER data_order,
547-
int immOffset);
548+
int immOffset,
549+
int immScale);
548550
void emitLSCLoad(
549551
LSC_CACHE_OPTS cacheOpts,
550552
CVariable* dst,
@@ -556,6 +558,7 @@ class EmitPass : public llvm::FunctionPass
556558
LSC_ADDR_SIZE addr_size,
557559
LSC_DATA_ORDER data_order,
558560
int immOffset,
561+
int immScale,
559562
LSC_DOC_ADDR_SPACE addrSpace);
560563
void emitLSCStore(
561564
llvm::Instruction* inst,
@@ -945,13 +948,14 @@ class EmitPass : public llvm::FunctionPass
945948
// sub-function of vector load/store
946949
void emitLSCVectorLoad_subDW(LSC_CACHE_OPTS cacheOpts, bool UseA32,
947950
ResourceDescriptor &Resource, CVariable *Dest,
948-
CVariable *Offset, int ImmOffset,
951+
CVariable *Offset, int ImmOffset, int ImmScale,
949952
uint32_t NumElts, uint32_t EltBytes,
950953
LSC_DOC_ADDR_SPACE addrSpace);
951954
void emitLSCVectorLoad_uniform(LSC_CACHE_OPTS cacheOpts, bool UseA32,
952955
ResourceDescriptor &Resource,
953-
CVariable *Dest, CVariable *Offset,
954-
int ImmOffset, uint32_t NumElts,
956+
CVariable *Dest,
957+
CVariable *Offset, int ImmOffset,
958+
int ImmScale, uint32_t NumElts,
955959
uint32_t EltBytes, uint64_t Align,
956960
uint32_t Addrspace,
957961
LSC_DOC_ADDR_SPACE userAddrSpace);

0 commit comments

Comments
 (0)