Skip to content

Commit adb0b17

Browse files
YuriPlyakhinigcbot
authored andcommitted
Fix wrong fence scope generated for group barrier
- Modify GenISA_memoryfence intrinsic to accept memory scope parameter and take it into account, when generating LSC fence (previously GPU scope was always used for global fence and GROUP scope was always used for local fence). - Modify other passes and built-ins which are using this intrinsic to pass through scope parameter, where it is available and makes sense.
1 parent 3bc6fb7 commit adb0b17

File tree

14 files changed

+122
-47
lines changed

14 files changed

+122
-47
lines changed

IGC/AdaptorCommon/DivergentBarrierPass.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ void DivergentBarrierPass::updateFenceArgs(
5656
else
5757
return true;
5858
};
59+
auto updateScope = [](const Value* V)
60+
{
61+
// If we don't know at compile-time, conservatively assume LSC_SCOPE_GPU
62+
if (auto* C = dyn_cast<ConstantInt>(V))
63+
return static_cast<uint32_t>(C->getValue().getZExtValue());
64+
else
65+
return static_cast<uint32_t>(LSC_SCOPE_GPU);
66+
};
5967

6068
// Always CommitEnable
6169

@@ -66,6 +74,7 @@ void DivergentBarrierPass::updateFenceArgs(
6674
Args.Global |= update(I->getOperand(5));
6775
Args.L1_Invalidate |= update(I->getOperand(6));
6876
Args.L1_Evict |= update(I->getOperand(7));
77+
Args.Scope = updateScope(I->getOperand(8));
6978
}
7079

7180
CallInst* DivergentBarrierPass::insertFence(
@@ -83,6 +92,7 @@ CallInst* DivergentBarrierPass::insertFence(
8392
IRB.getInt1(FA.Global),
8493
IRB.getInt1(FA.L1_Invalidate),
8594
IRB.getInt1(FA.L1_Evict),
95+
IRB.getInt32(FA.Scope)
8696
};
8797
return IRB.CreateCall(FenceFn, Args);
8898
}

IGC/AdaptorCommon/DivergentBarrierPass.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ namespace IGC
4949
bool Global = false;
5050
bool L1_Invalidate = false;
5151
bool L1_Evict = false;
52+
// init Scope with GROUP which should be used for SLM barriers
53+
// to be consistent with Global argument initialized to false
54+
uint Scope = LSC_SCOPE_GROUP;
5255
};
5356

5457
typedef llvm::DenseMap<uint64_t, WIAnalysis::WIDependancy> SlotDepMap;

IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@ void __builtin_IB_write_2d_f(int, int2, float4, int);
113113
local uchar* __builtin_IB_AllocLocalMemPool(bool allocAllWorkgroups, uint numAdditionalElements, uint elementSize);
114114

115115
// Memory fences
116-
// See GenISAIntrinsics.td for documentation
117-
void __builtin_IB_memfence(bool commitEnable, bool flushRW, bool flushConstant, bool flushTexture, bool flushIcache, bool isGlobal, bool invalidateL1, bool evictL1);
116+
void __builtin_IB_memfence(bool commitEnable, bool flushRW, bool flushConstant, bool flushTexture, bool flushIcache, bool isGlobal, bool invalidateL1, bool evictL1, uint scope);
118117
void __builtin_IB_flush_sampler_cache(void);
119118
void __builtin_IB_typedmemfence(bool invalidateCache);
120119

IGC/BiFModule/Implementation/IGCBiF_Intrinsics_Lsc.cl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -263,29 +263,29 @@ bool __builtin_IB_lsc_load_status_global_ulong8(const __global ulong8 *base, int
263263
///////////////////////////////////////////////////////////////////////
264264

265265
// FS - Fence Scope
266-
enum LSC_FS {
266+
typedef enum {
267267
LSC_FS_THREAD_GROUP,
268268
LSC_FS_LOCAL,
269269
LSC_FS_TILE,
270270
LSC_FS_GPU,
271271
LSC_FS_GPUs,
272272
LSC_FS_SYSTEM_RELEASE,
273273
LSC_FS_SYSTEM_ACQUIRE
274-
};
274+
} LSC_FS;
275275

276276
// FT - Fence Type
277-
enum LSC_FT {
277+
typedef enum {
278278
LSC_FT_DEFAULT,
279279
LSC_FT_EVICT,
280280
LSC_FT_INVALIDATE,
281281
LSC_FT_DISCARD,
282282
LSC_FT_CLEAN,
283283
LSC_FT_L3
284-
};
284+
} LSC_FT;
285285

286-
void __builtin_IB_lsc_fence_global_untyped(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - UGM
287-
void __builtin_IB_lsc_fence_global_untyped_cross_tile(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - UGML
288-
void __builtin_IB_lsc_fence_global_typed(enum LSC_FS scope, enum LSC_FT flushType); // Mem Port - TGM
286+
void __builtin_IB_lsc_fence_global_untyped(LSC_FS scope, LSC_FT flushType); // Mem Port - UGM
287+
void __builtin_IB_lsc_fence_global_untyped_cross_tile(LSC_FS scope, LSC_FT flushType); // Mem Port - UGML
288+
void __builtin_IB_lsc_fence_global_typed(LSC_FS scope, LSC_FT flushType); // Mem Port - TGM
289289
void __builtin_IB_lsc_fence_local(); // Mem Port - SLM
290290
void __builtin_IB_lsc_fence_evict_to_memory(); // Mem Port - UGM
291291

IGC/BiFModule/Implementation/atomics.cl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ SPDX-License-Identifier: MIT
2020
__local int* __builtin_IB_get_local_lock();
2121
__global int* __builtin_IB_get_global_lock();
2222
void __builtin_IB_eu_thread_pause(uint value);
23-
void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1);
23+
void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1, Scope_t scope);
2424

2525
#define LOCAL_SPINLOCK_START() \
2626
{ \
@@ -51,14 +51,14 @@ SPDX-License-Identifier: MIT
5151
if( ( (Semantics) & ( SEMANTICS_PRE_OP_NEED_FENCE ) ) > 0 ) \
5252
{ \
5353
bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice); \
54-
__intel_memfence_handler(flushL3, isGlobal, false, isGlobal); \
54+
__intel_memfence_handler(flushL3, isGlobal, false, isGlobal, Scope); \
5555
}
5656

5757
#define FENCE_POST_OP(Scope, Semantics, isGlobal) \
5858
if( ( (Semantics) & ( SEMANTICS_POST_OP_NEEDS_FENCE ) ) > 0 ) \
5959
{ \
6060
bool flushL3 = (isGlobal) && ((Scope) == Device || (Scope) == CrossDevice); \
61-
__intel_memfence_handler(flushL3, isGlobal, isGlobal, false); \
61+
__intel_memfence_handler(flushL3, isGlobal, isGlobal, false, Scope); \
6262
}
6363

6464
// This fencing scheme allows us to obey the memory model when coherency is

IGC/BiFModule/Implementation/barrier.cl

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,39 +16,62 @@ SPDX-License-Identifier: MIT
1616

1717
// MEMFENCE IMPLEMENTATION
1818

19-
void OPTNONE __intel_memfence_optnone(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1)
19+
// Use macro to make conversion of scope parameter a compile-time constant for O0 path
20+
// Below choice of some HW scopes is explained.
21+
// 1. CrossDevice -> LSC_FS_SYSTEM_RELEASE:
22+
// sycl::memory_scope::system is lowered to SPIR-V CrossDevice. SYCL SPEC 2020 for system scope:
23+
// "applies to any work-item or host thread in the system that is currently permitted to access
24+
// the memory allocation containing the referenced object".
25+
// Hence, conservatively mapping to “system”.
26+
// 2. Subgroup and Invocation --> LSC_FS_THREAD_GROUP:
27+
// our HW spec doesn’t have corresponding scope for Invocation or Subgroup, hence mapping to
28+
// lowest possible scope
29+
#define CONVERT_SCOPE_SPIRV_TO_VISA(scope) \
30+
((scope) == CrossDevice ? LSC_FS_SYSTEM_RELEASE : \
31+
(scope) == Device ? LSC_FS_GPU : \
32+
LSC_FS_THREAD_GROUP)
33+
34+
void OPTNONE __intel_memfence_optnone(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1, Scope_t scope)
2035
{
21-
#define MEMFENCE_IF(V1, V5, V6, V7) \
22-
if (flushRW == V1 && isGlobal == V5 && invalidateL1 == V6 && evictL1 == V7) \
23-
{ \
24-
__builtin_IB_memfence(true, V1, false, false, false, V5, V6, V7); \
36+
#define MEMFENCE_IF(V1, V5, V6, V7, V8) \
37+
if (flushRW == V1 && isGlobal == V5 && invalidateL1 == V6 && evictL1 == V7 && scope == V8) \
38+
{ \
39+
LSC_FS lsc_scope = CONVERT_SCOPE_SPIRV_TO_VISA(V8); \
40+
__builtin_IB_memfence(true, V1, false, false, false, V5, V6, V7, lsc_scope); \
2541
} else
2642

2743
// Generate combinations for all MEMFENCE_IF cases, e.g.:
2844
// true, true, true
2945
// true, true, false etc.
46+
#define MF_L4(...) MF_L3(__VA_ARGS__,false) MF_L3(__VA_ARGS__,true)
3047
#define MF_L3(...) MF_L2(__VA_ARGS__,false) MF_L2(__VA_ARGS__,true)
3148
#define MF_L2(...) MF_L1(__VA_ARGS__,false) MF_L1(__VA_ARGS__,true)
32-
#define MF_L1(...) MEMFENCE_IF(__VA_ARGS__,false) MEMFENCE_IF(__VA_ARGS__,true)
33-
MF_L3(false)
34-
MF_L3(true) {}
49+
#define MF_L1(...) MEMFENCE_IF(__VA_ARGS__,CrossDevice) \
50+
MEMFENCE_IF(__VA_ARGS__,Device) \
51+
MEMFENCE_IF(__VA_ARGS__,Workgroup) \
52+
MEMFENCE_IF(__VA_ARGS__,Subgroup) \
53+
MEMFENCE_IF(__VA_ARGS__,Invocation)
54+
MF_L4(false)
55+
MF_L4(true) {}
3556

3657
#undef MEMFENCE_IF
58+
#undef MF_L4
3759
#undef MF_L3
3860
#undef MF_L2
3961
#undef MF_L1
4062
}
41-
void __intel_memfence(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1)
63+
void __intel_memfence(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1, Scope_t scope)
4264
{
43-
__builtin_IB_memfence(true, flushRW, false, false, false, isGlobal, invalidateL1, evictL1);
65+
LSC_FS lsc_scope = CONVERT_SCOPE_SPIRV_TO_VISA(scope);
66+
__builtin_IB_memfence(true, flushRW, false, false, false, isGlobal, invalidateL1, evictL1, lsc_scope);
4467
}
4568

46-
void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1)
69+
void __intel_memfence_handler(bool flushRW, bool isGlobal, bool invalidateL1, bool evictL1, Scope_t scope)
4770
{
4871
if (BIF_FLAG_CTRL_GET(OptDisable))
49-
__intel_memfence_optnone(flushRW, isGlobal, invalidateL1, evictL1);
72+
__intel_memfence_optnone(flushRW, isGlobal, invalidateL1, evictL1, scope);
5073
else
51-
__intel_memfence(flushRW, isGlobal, invalidateL1, evictL1);
74+
__intel_memfence(flushRW, isGlobal, invalidateL1, evictL1, scope);
5275
}
5376

5477
// TYPEDMEMFENCE IMPLEMENTATION
@@ -98,19 +121,19 @@ static void __intel_atomic_work_item_fence( Scope_t Memory, uint Semantics )
98121
// although on some platforms they may be elided; platform-specific checks are performed in codegen
99122
if (Semantics & WorkgroupMemory)
100123
{
101-
__intel_memfence_handler(false, false, false, false);
124+
__intel_memfence_handler(false, false, false, false, Memory);
102125
}
103126
if (Semantics & CrossWorkgroupMemory)
104127
{
105128
if (Memory == Device || Memory == CrossDevice)
106129
{
107-
__intel_memfence_handler(true, true, invalidateL1, evictL1);
130+
__intel_memfence_handler(true, true, invalidateL1, evictL1, Memory);
108131
}
109132
else
110133
{
111134
// Single workgroup executes on one DSS and shares the same L1 cache.
112135
// If scope doesn't reach outside of workgroup, L1 flush can be skipped.
113-
__intel_memfence_handler(false, true, false, false);
136+
__intel_memfence_handler(false, true, false, false, Memory);
114137
}
115138
}
116139
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15865,8 +15865,9 @@ LSC_FENCE_OP EmitPass::getLSCMemoryFenceOp(bool IsGlobalMemFence, bool Invalidat
1586515865

1586615866
void EmitPass::emitMemoryFence(llvm::Instruction* inst)
1586715867
{
15868-
static constexpr int ExpectedNumberOfArguments = 8;
15869-
IGC_ASSERT(IGCLLVM::getNumArgOperands(cast<CallInst>(inst)) == ExpectedNumberOfArguments);
15868+
static constexpr int ExpectedNumberOfBoolArguments = 8;
15869+
static constexpr int ScopeArgumentNumber = 8;
15870+
IGC_ASSERT(IGCLLVM::getNumArgOperands(cast<CallInst>(inst)) == (ExpectedNumberOfBoolArguments + 1));
1587015871
CodeGenContext* ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
1587115872

1587215873
// If passed a non-constant value for any of the parameters,
@@ -15882,8 +15883,9 @@ void EmitPass::emitMemoryFence(llvm::Instruction* inst)
1588215883
// Note: this flag is respected only for LSC case currently.
1588315884
// TODO: add support for non-LSC and typed fence.
1588415885
bool L1_Evict = true;
15886+
LSC_SCOPE Scope = LSC_SCOPE_GPU;
1588515887

15886-
std::array<reference_wrapper<bool>, ExpectedNumberOfArguments> MemFenceArguments{
15888+
std::array<reference_wrapper<bool>, ExpectedNumberOfBoolArguments> MemFenceArguments{
1588715889
CommitEnable,
1588815890
L3_Flush_RW_Data,
1588915891
L3_Flush_Constant_Data,
@@ -15901,6 +15903,11 @@ void EmitPass::emitMemoryFence(llvm::Instruction* inst)
1590115903
}
1590215904
}
1590315905

15906+
if (ConstantInt* CI = llvm::dyn_cast<llvm::ConstantInt>(inst->getOperand(ScopeArgumentNumber)))
15907+
{
15908+
Scope = (LSC_SCOPE) CI->getValue().getZExtValue();
15909+
}
15910+
1590415911
bool EmitFence = true;
1590515912

1590615913
// Check whether we know this is a local fence. If we do, don't emit fence for a BDW+SKL/BXT only.
@@ -15929,8 +15936,11 @@ void EmitPass::emitMemoryFence(llvm::Instruction* inst)
1592915936
{
1593015937
// tgm should use GenISA_typedmemoryfence
1593115938
LSC_SFID sfid = Global_Mem_Fence ? LSC_UGM : LSC_SLM;
15932-
// ToDo: replace with fence instrinsics that take scope/op
15933-
LSC_SCOPE scope = Global_Mem_Fence ? LSC_SCOPE_GPU : LSC_SCOPE_GROUP;
15939+
// SLM only gets .group and .none
15940+
if(!Global_Mem_Fence && Scope >= LSC_SCOPE_LOCAL)
15941+
{
15942+
Scope = LSC_SCOPE_GROUP;
15943+
}
1593415944
// Do L1 evict only when default L1 cache policy is write-back.
1593515945
if (L1_Evict && m_pCtx->type == ShaderType::OPENCL_SHADER)
1593615946
{
@@ -15939,19 +15949,32 @@ void EmitPass::emitMemoryFence(llvm::Instruction* inst)
1593915949
L1_Evict = static_cast<LSC_L1_L3_CC>(CLCtx->m_InternalOptions.StoreCacheDefault) == LSC_L1IAR_WB_L3C_WB;
1594015950
}
1594115951
// Change the scope from `GPU` to `Tile` on single-tile platforms to avoid L3 flush on DG2 and MTL and ARL.
15942-
if (scope == LSC_SCOPE_GPU &&
15952+
if (Scope == LSC_SCOPE_GPU &&
1594315953
!m_currShader->m_Platform->hasMultiTile() &&
1594415954
m_currShader->m_Platform->hasL3FlushOnGPUScopeInvalidate() &&
1594515955
IGC_IS_FLAG_DISABLED(EnableGPUFenceScopeOnSingleTileGPUs))
1594615956
{
15947-
scope = LSC_SCOPE_TILE;
15957+
Scope = LSC_SCOPE_TILE;
1594815958
}
15959+
15960+
// On some products cache is not flushed for scope < tile.
15961+
if (Global_Mem_Fence && (L1_Invalidate || L1_Evict) &&
15962+
(Scope < LSC_SCOPE_TILE) && (
15963+
m_currShader->m_Platform->getPlatformInfo().eProductFamily == IGFX_DG2
15964+
|| m_currShader->m_Platform->getPlatformInfo().eProductFamily == IGFX_PVC
15965+
|| m_currShader->m_Platform->getPlatformInfo().eProductFamily == IGFX_METEORLAKE
15966+
|| m_currShader->m_Platform->getPlatformInfo().eProductFamily == IGFX_ARROWLAKE
15967+
))
15968+
{
15969+
Scope = LSC_SCOPE_TILE;
15970+
}
15971+
1594915972
LSC_FENCE_OP op = getLSCMemoryFenceOp(Global_Mem_Fence, L1_Invalidate, L1_Evict);
1595015973
if (inst->getMetadata("forceFlushNone") || sfid == LSC_SLM)
1595115974
{
1595215975
op = LSC_FENCE_OP_NONE;
1595315976
}
15954-
m_encoder->LSC_Fence(sfid, scope, op);
15977+
m_encoder->LSC_Fence(sfid, Scope, op);
1595515978
m_encoder->Push();
1595615979
return;
1595715980
}

IGC/Compiler/Optimizer/OpenCLPasses/Atomics/ResolveOCLAtomics.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ void ResolveOCLAtomics::findLockUsers(Value* V)
323323
// br label %init_spinlock_var.end
324324
//
325325
// init_spinlock_var.end: ; preds = %init_spinlock_var.start, %entry
326-
// call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true)
326+
// call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i32 0)
327327
// call void @llvm.genx.GenISA.threadgroupbarrier()
328328
void ResolveOCLAtomics::generateLockInitilization(Function* F)
329329
{
@@ -361,11 +361,12 @@ void ResolveOCLAtomics::generateLockInitilization(Function* F)
361361
m_builder->CreateStore(m_localLock->getInitializer(), m_localLock);
362362
m_builder->CreateBr(initSpinLockEndBB);
363363

364-
// insert call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false)
364+
// insert call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i32 0)
365365
// call void @llvm.genx.GenISA.threadgroupbarrier()
366366
// to guarantee synchronization in accessing spin lock variable
367367
Value* trueValue = m_builder->getTrue();
368368
Value* falseValue = m_builder->getFalse();
369+
Value* groupScopeValue = m_builder->getInt32(LSC_SCOPE_GROUP);
369370
Value* localMemFenceArgs[] =
370371
{
371372
trueValue,
@@ -376,6 +377,7 @@ void ResolveOCLAtomics::generateLockInitilization(Function* F)
376377
falseValue,
377378
trueValue,
378379
falseValue,
380+
groupScopeValue
379381
};
380382
m_builder->SetInsertPoint(initSpinLockEndBB, initSpinLockEndBB->getFirstInsertionPt());
381383
Function* localMemFence = GenISAIntrinsic::getDeclaration(m_pModule, GenISAIntrinsic::GenISA_memoryfence);

IGC/Compiler/Optimizer/OpenCLPasses/NamedBarriers/NamedBarriersResolution.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,11 +241,16 @@ void NamedBarriersResolution::HandleNamedBarrierSyncHW(CallInst& NBarrierSyncCal
241241

242242
Value* trueValue = IRB.getInt1(true);
243243
Value* falseValue = IRB.getInt1(false);
244+
Value* gpuScopeValue = IRB.getInt32(LSC_SCOPE_GPU);
245+
Value* groupScopeValue = IRB.getInt32(LSC_SCOPE_GROUP);
244246

245247
ConstantInt* memFenceType = cast<ConstantInt>(NBarrierSyncCall.getArgOperand(1));
246248
// LOCAL = 1
247249
// GLOBAL = 2
248-
Value* isGlobal = ( (int)memFenceType->getValue().getSExtValue() & 2 ) == 2 ? trueValue : falseValue;
250+
bool isGlobal = ( (int)memFenceType->getValue().getSExtValue() & 2 ) == 2;
251+
Value* isGlobalValue = isGlobal ? trueValue : falseValue;
252+
// Conservatively for local barrier set GROUP scope, for global barrier set GPU scope
253+
Value* scopeValue = isGlobal ? gpuScopeValue : groupScopeValue;
249254

250255
GenIntrinsicInst::Create(
251256
GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_memoryfence),
@@ -255,9 +260,10 @@ void NamedBarriersResolution::HandleNamedBarrierSyncHW(CallInst& NBarrierSyncCal
255260
falseValue, // bool flushConstant
256261
falseValue, // bool flushTexture
257262
falseValue, // bool flushIcache
258-
isGlobal, // bool isGlobal
263+
isGlobalValue, // bool isGlobal
259264
falseValue, // bool invalidateL1
260265
falseValue, // bool evictL1
266+
scopeValue // int memory scope
261267
},
262268
"",
263269
&(NBarrierSyncCall));

IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,8 @@ void SynchronizationObjectCoalescing::EraseRedundantGlobalScope(llvm::Instructio
630630
{
631631
constexpr uint32_t globalMemFenceArg = 5;
632632
pGenIntrinsicInst->setOperand(globalMemFenceArg, llvm::ConstantInt::getFalse(pGenIntrinsicInst->getOperand(globalMemFenceArg)->getType()));
633+
constexpr uint32_t scopeMemFenceArg = 8;
634+
pGenIntrinsicInst->setOperand(scopeMemFenceArg, llvm::ConstantInt::get(pGenIntrinsicInst->getOperand(scopeMemFenceArg)->getType(), static_cast<uint32_t>(LSC_SCOPE::LSC_SCOPE_GROUP)));
633635
break;
634636
}
635637
case llvm::GenISAIntrinsic::GenISA_LSCFence:

0 commit comments

Comments
 (0)