Skip to content

Commit b5003a8

Browse files
aratajewweb-flow
authored andcommitted
Support SPV_INTEL_cache_controls for 1d block reads and writes
This change implements the support for cache controls for block operations from `SPV_INTEL_subgroups` extension. (cherry picked from commit 6559ba2)
1 parent a069632 commit b5003a8

File tree

8 files changed

+835
-51
lines changed

8 files changed

+835
-51
lines changed

IGC/AdaptorOCL/preprocess_spvir/HandleSPIRVDecorations/HandleSpirvDecorationMetadata.cpp

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,42 @@ void HandleSpirvDecorationMetadata::visitPrefetchCallInst(CallInst& I)
184184
}
185185
}
186186

187+
void HandleSpirvDecorationMetadata::visit1DBlockReadCallInst(CallInst& I)
188+
{
189+
Value* ptr = I.getArgOperand(0);
190+
auto spirvDecorations = parseSPIRVDecorationsFromMD(ptr);
191+
for (auto& [DecorationId, MDNodes] : spirvDecorations)
192+
{
193+
switch (DecorationId)
194+
{
195+
// IDecCacheControlLoadINTEL
196+
case DecorationIdCacheControlLoad:
197+
{
198+
handleCacheControlINTELFor1DBlockIO<LoadCacheControl>(I, MDNodes);
199+
break;
200+
}
201+
}
202+
}
203+
}
204+
205+
void HandleSpirvDecorationMetadata::visit1DBlockWriteCallInst(CallInst& I)
206+
{
207+
Value* ptr = I.getArgOperand(0);
208+
auto spirvDecorations = parseSPIRVDecorationsFromMD(ptr);
209+
for (auto& [DecorationId, MDNodes] : spirvDecorations)
210+
{
211+
switch (DecorationId)
212+
{
213+
// IDecCacheControlStoreINTEL
214+
case DecorationIdCacheControlStore:
215+
{
216+
handleCacheControlINTELFor1DBlockIO<StoreCacheControl>(I, MDNodes);
217+
break;
218+
}
219+
}
220+
}
221+
}
222+
187223
void HandleSpirvDecorationMetadata::visitCallInst(CallInst& I)
188224
{
189225
Function* F = I.getCalledFunction();
@@ -195,8 +231,12 @@ void HandleSpirvDecorationMetadata::visitCallInst(CallInst& I)
195231
"_Z[0-9]+(intel_sub_group_2d_block_write_[0-9]+b_[0-9]+r[0-9]+x[0-9]+c)");
196232
#if defined(IGC_SCALAR_USE_KHRONOS_SPIRV_TRANSLATOR)
197233
Regex patternPrefetch("_Z[0-9]+__spirv_ocl_prefetch");
234+
Regex pattern1DBlockRead("_Z[0-9]+__spirv_SubgroupBlockReadINTEL");
235+
Regex pattern1DBlockWrite("_Z[0-9]+__spirv_SubgroupBlockWriteINTEL");
198236
#else // IGC Legacy SPIRV Translator
199237
Regex patternPrefetch("__builtin_spirv_OpenCL_prefetch");
238+
Regex pattern1DBlockRead("__builtin_spirv_OpSubgroupBlockReadINTEL");
239+
Regex pattern1DBlockWrite("__builtin_spirv_OpSubgroupBlockWriteINTEL");
200240
#endif
201241

202242
SmallVector<StringRef, 4> Matches;
@@ -214,6 +254,14 @@ void HandleSpirvDecorationMetadata::visitCallInst(CallInst& I)
214254
{
215255
visitPrefetchCallInst(I);
216256
}
257+
else if (pattern1DBlockRead.match(funcName, &Matches))
258+
{
259+
visit1DBlockReadCallInst(I);
260+
}
261+
else if (pattern1DBlockWrite.match(funcName, &Matches))
262+
{
263+
visit1DBlockWriteCallInst(I);
264+
}
217265
}
218266

219267
template<typename T>
@@ -327,3 +375,86 @@ void HandleSpirvDecorationMetadata::handleCacheControlINTELForPrefetch(llvm::Cal
327375
if (F->getNumUses() == 0)
328376
m_BuiltinsToRemove.insert(F);
329377
}
378+
379+
template<typename T>
380+
void HandleSpirvDecorationMetadata::handleCacheControlINTELFor1DBlockIO(CallInst& I, SmallPtrSetImpl<MDNode*>& MDNodes)
381+
{
382+
static_assert(std::is_same_v<T, LoadCacheControl> || std::is_same_v<T, StoreCacheControl>);
383+
CacheControlFromMDNodes cacheControl = resolveCacheControlFromMDNodes<T>(m_pCtx, MDNodes);
384+
if (cacheControl.isEmpty) return;
385+
if (cacheControl.isInvalid)
386+
{
387+
m_pCtx->EmitWarning("Unsupported cache controls configuration requested. Applying default configuration.");
388+
return;
389+
}
390+
391+
Function* F = I.getCalledFunction();
392+
IGC_ASSERT(F);
393+
394+
Type* operationType = nullptr;
395+
std::string funcName;
396+
if constexpr (std::is_same_v<T, LoadCacheControl>)
397+
{
398+
operationType = I.getType();
399+
funcName = "SubgroupBlockReadINTEL";
400+
}
401+
else
402+
{
403+
operationType = I.getArgOperand(1)->getType();
404+
funcName = "SubgroupBlockWriteINTEL";
405+
}
406+
407+
std::string typeName;
408+
uint32_t numElements = 1;
409+
Type* elementType = operationType;
410+
if (auto* vecTy = dyn_cast<IGCLLVM::FixedVectorType>(operationType))
411+
{
412+
numElements = (uint32_t)vecTy->getNumElements();
413+
elementType = vecTy->getElementType();
414+
}
415+
416+
if (elementType->isIntegerTy())
417+
{
418+
switch (elementType->getIntegerBitWidth())
419+
{
420+
case 8:
421+
typeName = "char";
422+
break;
423+
case 16:
424+
typeName = "short";
425+
break;
426+
case 32:
427+
typeName = "int";
428+
break;
429+
case 64:
430+
typeName = "long";
431+
break;
432+
default:
433+
IGC_ASSERT(0 && "Unsupported integer type");
434+
break;
435+
}
436+
}
437+
438+
if(numElements > 1)
439+
typeName += std::to_string(numElements);
440+
441+
SmallVector<Value*, 3> args(I.args());
442+
args.push_back(ConstantInt::get(Type::getInt32Ty(I.getContext()), cacheControl.value));
443+
444+
SmallVector<Type*, 3> argTypes;
445+
for (const auto& arg : args)
446+
argTypes.push_back(arg->getType());
447+
448+
FunctionType* funcTy = FunctionType::get(I.getType(), argTypes, false);
449+
std::string newFuncName = "__internal_" + funcName + "_" + typeName + "_cache_controls";
450+
auto newFunction = m_Module->getOrInsertFunction(newFuncName, funcTy);
451+
452+
auto newCall = CallInst::Create(newFunction, args, "", &I);
453+
I.replaceAllUsesWith(newCall);
454+
I.eraseFromParent();
455+
m_changed = true;
456+
457+
// Cleanup unused function if all calls have been replaced with the internal version
458+
if (F->getNumUses() == 0)
459+
m_BuiltinsToRemove.insert(F);
460+
}

IGC/AdaptorOCL/preprocess_spvir/HandleSPIRVDecorations/HandleSpirvDecorationMetadata.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ namespace IGC
5050
void visit2DBlockReadCallInst(llvm::CallInst& I, llvm::StringRef unmangledName);
5151
void visit2DBlockWriteCallInst(llvm::CallInst& I, llvm::StringRef unmangledName);
5252
void visitPrefetchCallInst(llvm::CallInst& I);
53+
void visit1DBlockReadCallInst(llvm::CallInst& I);
54+
void visit1DBlockWriteCallInst(llvm::CallInst& I);
5355

5456
private:
5557
llvm::Module* m_Module = nullptr;
@@ -67,5 +69,7 @@ namespace IGC
6769
template<typename T>
6870
void handleCacheControlINTELFor2DBlockIO(llvm::CallInst& I, llvm::SmallPtrSetImpl<llvm::MDNode*>& MDNodes, llvm::StringRef unmangledName);
6971
void handleCacheControlINTELForPrefetch(llvm::CallInst& I, llvm::SmallPtrSetImpl<llvm::MDNode*>& MDNodes);
72+
template<typename T>
73+
void handleCacheControlINTELFor1DBlockIO(llvm::CallInst& I, llvm::SmallPtrSetImpl<llvm::MDNode*>& MDNodes);
7074
};
7175
}

IGC/BiFModule/Implementation/IGCBiF_Intrinsics.cl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,51 @@ void __builtin_IB_simd_block_write_2_global_l( __global ulong*, ulong2 );
612612
void __builtin_IB_simd_block_write_4_global_l( __global ulong*, ulong4 );
613613
void __builtin_IB_simd_block_write_8_global_l( __global ulong*, ulong8 );
614614

615+
// 1D block reads and writes with cache control argument : global address space
616+
uint __builtin_IB_cache_controls_simd_block_read_1_global(const __global uint* base, uint cacheOpt);
617+
uint2 __builtin_IB_cache_controls_simd_block_read_2_global(const __global uint* base, uint cacheOpt);
618+
uint4 __builtin_IB_cache_controls_simd_block_read_4_global(const __global uint* base, uint cacheOpt);
619+
uint8 __builtin_IB_cache_controls_simd_block_read_8_global(const __global uint* base, uint cacheOpt);
620+
621+
ushort __builtin_IB_cache_controls_simd_block_read_1_global_h(const __global ushort* base, uint cacheOpt);
622+
ushort2 __builtin_IB_cache_controls_simd_block_read_2_global_h(const __global ushort* base, uint cacheOpt);
623+
ushort4 __builtin_IB_cache_controls_simd_block_read_4_global_h(const __global ushort* base, uint cacheOpt);
624+
ushort8 __builtin_IB_cache_controls_simd_block_read_8_global_h(const __global ushort* base, uint cacheOpt);
625+
ushort16 __builtin_IB_cache_controls_simd_block_read_16_global_h(const __global ushort* base, uint cacheOpt);
626+
627+
uchar __builtin_IB_cache_controls_simd_block_read_1_global_b(const __global uchar* base, uint cacheOpt);
628+
uchar2 __builtin_IB_cache_controls_simd_block_read_2_global_b(const __global uchar* base, uint cacheOpt);
629+
uchar4 __builtin_IB_cache_controls_simd_block_read_4_global_b(const __global uchar* base, uint cacheOpt);
630+
uchar8 __builtin_IB_cache_controls_simd_block_read_8_global_b(const __global uchar* base, uint cacheOpt);
631+
uchar16 __builtin_IB_cache_controls_simd_block_read_16_global_b(const __global uchar* base, uint cacheOpt);
632+
633+
ulong __builtin_IB_cache_controls_simd_block_read_1_global_l(const __global ulong* base, uint cacheOpt);
634+
ulong2 __builtin_IB_cache_controls_simd_block_read_2_global_l(const __global ulong* base, uint cacheOpt);
635+
ulong4 __builtin_IB_cache_controls_simd_block_read_4_global_l(const __global ulong* base, uint cacheOpt);
636+
ulong8 __builtin_IB_cache_controls_simd_block_read_8_global_l(const __global ulong* base, uint cacheOpt);
637+
638+
void __builtin_IB_cache_controls_simd_block_write_1_global(__global uint* base, uint val, uint cacheOpt);
639+
void __builtin_IB_cache_controls_simd_block_write_2_global(__global uint* base, uint2 val, uint cacheOpt);
640+
void __builtin_IB_cache_controls_simd_block_write_4_global(__global uint* base, uint4 val, uint cacheOpt);
641+
void __builtin_IB_cache_controls_simd_block_write_8_global(__global uint* base, uint8 val, uint cacheOpt);
642+
643+
void __builtin_IB_cache_controls_simd_block_write_1_global_h(__global ushort* base, ushort val, uint cacheOpt);
644+
void __builtin_IB_cache_controls_simd_block_write_2_global_h(__global ushort* base, ushort2 val, uint cacheOpt);
645+
void __builtin_IB_cache_controls_simd_block_write_4_global_h(__global ushort* base, ushort4 val, uint cacheOpt);
646+
void __builtin_IB_cache_controls_simd_block_write_8_global_h(__global ushort* base, ushort8 val, uint cacheOpt);
647+
void __builtin_IB_cache_controls_simd_block_write_16_global_h(__global ushort* base, ushort16 val, uint cacheOpt);
648+
649+
void __builtin_IB_cache_controls_simd_block_write_1_global_b(__global uchar* base, uchar val, uint cacheOpt);
650+
void __builtin_IB_cache_controls_simd_block_write_2_global_b(__global uchar* base, uchar2 val, uint cacheOpt);
651+
void __builtin_IB_cache_controls_simd_block_write_4_global_b(__global uchar* base, uchar4 val, uint cacheOpt);
652+
void __builtin_IB_cache_controls_simd_block_write_8_global_b(__global uchar* base, uchar8 val, uint cacheOpt);
653+
void __builtin_IB_cache_controls_simd_block_write_16_global_b(__global uchar* base, uchar16 val, uint cacheOpt);
654+
655+
void __builtin_IB_cache_controls_simd_block_write_1_global_l(__global ulong* base, ulong val, uint cacheOpt);
656+
void __builtin_IB_cache_controls_simd_block_write_2_global_l(__global ulong* base, ulong2 val, uint cacheOpt);
657+
void __builtin_IB_cache_controls_simd_block_write_4_global_l(__global ulong* base, ulong4 val, uint cacheOpt);
658+
void __builtin_IB_cache_controls_simd_block_write_8_global_l(__global ulong* base, ulong8 val, uint cacheOpt);
659+
615660
// Block read : local address space
616661
uint __builtin_IB_simd_block_read_1_local( const __local uint* );
617662
uint2 __builtin_IB_simd_block_read_2_local( const __local uint* );

0 commit comments

Comments
 (0)