@@ -6339,6 +6339,7 @@ void EmitPass::emitLSCSimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVa
6339
6339
6340
6340
CVariable* data = GetSymbol(dataPtr);
6341
6341
bool useA64 = isA64Ptr(ptrType, m_currShader->GetContext());
6342
+ LSC_ADDR_SIZE addrSize = useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
6342
6343
6343
6344
if (!IsGRFAligned(data, EALIGN_GRF) && !data->IsUniform())
6344
6345
{
@@ -6389,7 +6390,7 @@ void EmitPass::emitLSCSimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVa
6389
6390
uint32_t blkBits = 64;
6390
6391
uint32_t nBlks = bytesToRead * 8 / 64;
6391
6392
6392
- emitLSCStore(inst, data, pTempVar, blkBits, nBlks, srcOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b , LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
6393
+ emitLSCStore(inst, data, pTempVar, blkBits, nBlks, srcOffset, &resource, addrSize , LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
6393
6394
m_encoder->Push();
6394
6395
6395
6396
bytesRemaining -= bytesToRead;
@@ -6416,6 +6417,7 @@ void EmitPass::emitLSCSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal
6416
6417
}
6417
6418
6418
6419
bool useA64 = isA64Ptr(ptrType, m_currShader->GetContext());
6420
+ LSC_ADDR_SIZE addrSize = useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
6419
6421
6420
6422
Type* Ty = inst->getType();
6421
6423
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
@@ -6451,7 +6453,7 @@ void EmitPass::emitLSCSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal
6451
6453
uint32_t bytesToRead = getLSCBlockMsgSize(bytesRemaining, m_currShader->m_Platform->getMaxLSCBlockMsgSize(isD64));
6452
6454
uint32_t nBlks = (bytesToRead * 8) / blkBits;
6453
6455
6454
- emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b , LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
6456
+ emitLSCLoad(inst, m_destination, pTempVar, blkBits, nBlks, dstOffset, &resource, addrSize , LSC_DATA_ORDER_TRANSPOSE, immOffset, 1);
6455
6457
m_encoder->Push();
6456
6458
6457
6459
bytesRemaining -= bytesToRead;
@@ -14390,6 +14392,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
14390
14392
14391
14393
PointerType* PtrTy = dyn_cast<PointerType>(pllDstAddr->getType());
14392
14394
bool isA64 = PtrTy && isA64Ptr(PtrTy, m_currShader->GetContext());
14395
+ LSC_ADDR_SIZE addrSize = isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
14393
14396
bool extendPointer = (bitwidth == 64 && !isA64);
14394
14397
// DG2 onward with LSC we do not have to extend an A32 pointer to an
14395
14398
// A64 pointer for 64bit atomics
@@ -14415,7 +14418,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
14415
14418
pSrc0, pSrc1,
14416
14419
bitwidth,
14417
14420
&resource,
14418
- isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b ,
14421
+ addrSize ,
14419
14422
0,
14420
14423
LSC_DEFAULT_CACHING);
14421
14424
}
@@ -14470,7 +14473,7 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
14470
14473
pDst, pDstAddr,
14471
14474
pSrc0, pSrc1,
14472
14475
bitwidth,
14473
- &resource, isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b ,
14476
+ &resource, addrSize ,
14474
14477
0,
14475
14478
LSC_DEFAULT_CACHING);
14476
14479
}
@@ -14732,7 +14735,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
14732
14735
14733
14736
uint numChannels = iSTD::BitCount(writeMask.getEM());
14734
14737
auto doLSC = shouldGenerateLSC(pInsn, true);
14735
-
14738
+ LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_32b;
14736
14739
if (m_currShader->GetIsUniform(pInsn))
14737
14740
{
14738
14741
SIMDMode nativeDispatchMode = m_currShader->m_Platform->getMinDispatchMode();
@@ -14749,7 +14752,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
14749
14752
if (doLSC)
14750
14753
{
14751
14754
m_encoder->LSC_TypedReadWrite(LSC_LOAD_QUAD, &resource, pU, pV, pR, pLOD, tempdst, 4 * 8,
14752
- numLanes(nativeDispatchMode), LSC_ADDR_SIZE_32b , writeMask.getEM(), cacheOpts);
14755
+ numLanes(nativeDispatchMode), addrSize , writeMask.getEM(), cacheOpts);
14753
14756
}
14754
14757
else
14755
14758
{
@@ -14793,7 +14796,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
14793
14796
if (doLSC)
14794
14797
{
14795
14798
m_encoder->LSC_TypedReadWrite(LSC_LOAD_QUAD, &resource, pU, pV, pR, pLOD, m_destination, 4 * 8,
14796
- numLanes(SIMDMode::SIMD16), LSC_ADDR_SIZE_32b , writeMask.getEM(), cacheOpts);
14799
+ numLanes(SIMDMode::SIMD16), addrSize , writeMask.getEM(), cacheOpts);
14797
14800
}
14798
14801
else
14799
14802
{
@@ -14827,7 +14830,7 @@ void EmitPass::emitTypedRead(llvm::Instruction* pInsn)
14827
14830
if (doLSC)
14828
14831
{
14829
14832
m_encoder->LSC_TypedReadWrite(LSC_LOAD_QUAD, &resource, pU, pV, pR, pLOD, tempdst[i], 4 * 8,
14830
- numLanes(SIMDMode::SIMD16), LSC_ADDR_SIZE_32b , writeMask.getEM(), cacheOpts);
14833
+ numLanes(SIMDMode::SIMD16), addrSize , writeMask.getEM(), cacheOpts);
14831
14834
}
14832
14835
else
14833
14836
{
@@ -14884,7 +14887,7 @@ void EmitPass::emitTypedWrite(llvm::Instruction* pInsn)
14884
14887
ResourceDescriptor resource = GetResourceVariable(pllDstBuffer);
14885
14888
LSC_CACHE_OPTS cacheOpts = translateLSCCacheControlsFromMetadata(pInsn, false, true);
14886
14889
m_currShader->HasLscStoreCacheControls(cacheOpts);
14887
-
14890
+ LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_32b;
14888
14891
if (m_currShader->GetIsUniform(pInsn))
14889
14892
{
14890
14893
IGC_ASSERT_MESSAGE(0, "Uniform store_uav_typed not implemented yet");
@@ -18025,13 +18028,14 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18025
18028
else if (auto CI = dyn_cast<LdRawIntrinsic>(inst))
18026
18029
align = CI->getAlignment();
18027
18030
PointerType* ptrType = cast<PointerType>(Ptr->getType());
18031
+ ResourceDescriptor resource = GetResourceVariable(Ptr);
18028
18032
bool useA32 = !IGC::isA64Ptr(ptrType, m_currShader->GetContext());
18033
+ LSC_ADDR_SIZE addrSize = useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b;
18029
18034
IGCLLVM::FixedVectorType* VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
18030
18035
Type* eltTy = VTy ? VTy->getElementType() : Ty;
18031
18036
uint32_t eltBytes = GetScalarTypeSizeInRegister(eltTy);
18032
18037
uint32_t elts = VTy ? int_cast<uint32_t>(VTy->getNumElements()) : 1;
18033
18038
18034
- ResourceDescriptor resource = GetResourceVariable(Ptr);
18035
18039
CountStatelessIndirectAccess(Ptr, resource);
18036
18040
// eOffset is in bytes
18037
18041
// offset corresponds to Int2Ptr operand obtained during pattern matching
@@ -18080,14 +18084,6 @@ void EmitPass::emitLSCVectorLoad(Instruction* inst,
18080
18084
const int immScaleInt =
18081
18085
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
18082
18086
18083
- LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_INVALID;
18084
- if (useA32) {
18085
- addrSize = LSC_ADDR_SIZE_32b;
18086
- }
18087
- else {
18088
- addrSize = LSC_ADDR_SIZE_64b;
18089
- }
18090
-
18091
18087
// 1. handle cases eltBytes < 4
18092
18088
if (eltBytes < 4)
18093
18089
{
@@ -18379,6 +18375,7 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18379
18375
// offset corresponds to Int2Ptr operand obtained during pattern matching
18380
18376
CVariable* eOffset = GetSymbol(varOffset);
18381
18377
bool useA32 = !isA64Ptr(ptrType, m_currShader->GetContext());
18378
+ LSC_ADDR_SIZE addrSize = useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b;
18382
18379
if (useA32)
18383
18380
{
18384
18381
eOffset = TruncatePointer(eOffset);
@@ -18411,14 +18408,6 @@ void EmitPass::emitLSCVectorStore(Value *Ptr,
18411
18408
const int immScaleVal =
18412
18409
immScale ? static_cast<int>(immScale->getSExtValue()) : 1;
18413
18410
18414
- LSC_ADDR_SIZE addrSize = LSC_ADDR_SIZE_INVALID;
18415
- if (useA32) {
18416
- addrSize = LSC_ADDR_SIZE_32b;
18417
- }
18418
- else {
18419
- addrSize = LSC_ADDR_SIZE_64b;
18420
- }
18421
-
18422
18411
// 1. handle cases eltBytes < 4
18423
18412
if (eltBytes < 4)
18424
18413
{
@@ -21132,6 +21121,7 @@ void EmitPass::emitLscIntrinsicLoad(llvm::GenIntrinsicInst* inst)
21132
21121
ResourceDescriptor resource = GetResourceVariable(Ptr);
21133
21122
CVariable* offset = GetSymbol(Ptr);
21134
21123
bool useA32 = !isA64Ptr(ptrType, m_currShader->GetContext());
21124
+ LSC_ADDR_SIZE addrSize = useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b;
21135
21125
offset = useA32 ? TruncatePointer(offset) : offset;
21136
21126
bool isBlockLoad = inst->getIntrinsicID() == GenISAIntrinsic::GenISA_LSCLoadBlock;
21137
21127
if (isBlockLoad) {
@@ -21157,14 +21147,14 @@ void EmitPass::emitLscIntrinsicLoad(llvm::GenIntrinsicInst* inst)
21157
21147
m_encoder->LSC_LoadBlock1D(
21158
21148
gatherDst, offset,
21159
21149
dataSize, fragElems, &resource,
21160
- useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
21150
+ addrSize ,
21161
21151
fragImmOffset,
21162
21152
cacheOpts);
21163
21153
} else {
21164
21154
m_encoder->LSC_LoadGather(
21165
21155
LSC_LOAD, gatherDst,
21166
21156
offset, dataSize, fragElems, 0, &resource,
21167
- useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
21157
+ addrSize ,
21168
21158
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
21169
21159
}
21170
21160
m_encoder->Push();
@@ -21194,6 +21184,7 @@ void EmitPass::emitLscIntrinsicPrefetch(llvm::GenIntrinsicInst* inst)
21194
21184
ResourceDescriptor resource = GetResourceVariable(Ptr);
21195
21185
CVariable* offset = GetSymbol(Ptr);
21196
21186
bool useA32 = !isA64Ptr(ptrType, m_currShader->GetContext());
21187
+ LSC_ADDR_SIZE addrSize = useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b;
21197
21188
offset = useA32 ? TruncatePointer(offset) : offset;
21198
21189
21199
21190
auto dataSize = (LSC_DATA_SIZE)cast<ConstantInt>(inst->getOperand(2))->getZExtValue();
@@ -21232,7 +21223,7 @@ void EmitPass::emitLscIntrinsicPrefetch(llvm::GenIntrinsicInst* inst)
21232
21223
m_encoder->LSC_LoadGather(
21233
21224
lscOp, fragDst,
21234
21225
offset, dataSize, fragElems, 0, &resource,
21235
- useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
21226
+ addrSize ,
21236
21227
LSC_DATA_ORDER_NONTRANSPOSE, fragImmOffset, 1, cacheOpts, addrSpace);
21237
21228
m_encoder->Push();
21238
21229
});
@@ -21297,6 +21288,7 @@ void EmitPass::emitLscIntrinsicStore(llvm::GenIntrinsicInst* inst)
21297
21288
PointerType* ptrType = cast<PointerType>(Ptr->getType());
21298
21289
CVariable* offset = GetSymbol(Ptr);
21299
21290
bool useA32 = !isA64Ptr(ptrType, m_currShader->GetContext());
21291
+ LSC_ADDR_SIZE addrSize = useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b;
21300
21292
offset = useA32 ? TruncatePointer(offset) : offset;
21301
21293
bool isBlockStore = inst->getIntrinsicID() == GenISAIntrinsic::GenISA_LSCStoreBlock;
21302
21294
if (isBlockStore) {
@@ -21322,7 +21314,7 @@ void EmitPass::emitLscIntrinsicStore(llvm::GenIntrinsicInst* inst)
21322
21314
fragData, offset,
21323
21315
dataSize, fragElems,
21324
21316
&resource,
21325
- useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
21317
+ addrSize ,
21326
21318
fragImmOffset,
21327
21319
cacheOpts);
21328
21320
} else {
@@ -21331,7 +21323,7 @@ void EmitPass::emitLscIntrinsicStore(llvm::GenIntrinsicInst* inst)
21331
21323
fragData, offset,
21332
21324
dataSize, fragElems,
21333
21325
0, &resource,
21334
- useA32 ? LSC_ADDR_SIZE_32b : LSC_ADDR_SIZE_64b ,
21326
+ addrSize ,
21335
21327
LSC_DATA_ORDER_NONTRANSPOSE,
21336
21328
fragImmOffset,
21337
21329
1,
@@ -21575,6 +21567,8 @@ void EmitPass::emitLSCAtomic(llvm::GenIntrinsicInst* inst)
21575
21567
21576
21568
PointerType* ptrType = cast<PointerType>(Ptr->getType());
21577
21569
bool isA64 = isA64Ptr(ptrType, m_currShader->GetContext());
21570
+ LSC_ADDR_SIZE addrSize = isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b;
21571
+ ResourceDescriptor resource = GetResourceVariable(Ptr);
21578
21572
if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
21579
21573
{
21580
21574
m_currShader->SetHasGlobalAtomics();
@@ -21597,7 +21591,6 @@ void EmitPass::emitLSCAtomic(llvm::GenIntrinsicInst* inst)
21597
21591
GetSymbol(inst->getArgOperand(3)) : nullptr;
21598
21592
pAtomicCmp = (pAtomicCmp != nullptr) ? BroadcastIfUniform(pAtomicCmp) : pAtomicCmp;
21599
21593
21600
- ResourceDescriptor resource = GetResourceVariable(Ptr);
21601
21594
// take the bitwidth from the pointer type since the return type might
21602
21595
// differ; e.g. uint lsc_atomic_add(ushort *, uint) D16U32
21603
21596
unsigned short bitwidth =
@@ -21609,7 +21602,7 @@ void EmitPass::emitLSCAtomic(llvm::GenIntrinsicInst* inst)
21609
21602
m_encoder->LSC_AtomicRaw(
21610
21603
atomicOp, pOldValue, pDstAddr, pAtomicVal,
21611
21604
pAtomicCmp, bitwidth, &resource,
21612
- isA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b , immOff,
21605
+ addrSize , immOff,
21613
21606
cacheOpts);
21614
21607
m_encoder->Push();
21615
21608
}
0 commit comments