Skip to content

Commit 2ba320d

Browse files
aratajewigcbot
authored andcommitted
Add missing cl_intel_subgroup_extended_block_read_cacheopts macro
This change adds missing support for 2D block prefetch and enables `cl_intel_subgroup_extended_block_read_cacheopts` macro for BiFModule.
1 parent 312883c commit 2ba320d

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

IGC/BiFModule/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_rt_production")
487487
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_matrix_multiply_accumulate_tf32")
488488
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_extended_block_read")
489489
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_pvc_lsc_validation")
490+
set(KHR_DEFINES ${KHR_DEFINES} "cl_intel_subgroup_extended_block_read_cacheopts")
490491

491492
igc_bif_build_bc(
492493
OUTPUT "${IGC_BUILD__BIF_DIR}/IBiF_Impl_int.bc"

IGC/BiFModule/Implementation/IGCBiF_Intrinsics_Lsc.cl

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,46 @@ uint8 __builtin_IB_subgroup_block_read_flat_transpose_u32_k8(long baseoffset, in
422422
ulong4 __builtin_IB_subgroup_block_read_flat_transpose_u64_k4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord);
423423
#endif // cl_intel_subgroup_extended_block_read
424424

425+
#ifdef cl_intel_subgroup_extended_block_read_cacheopts
426+
// 2d block read cacheopts
427+
ushort2 __builtin_IB_subgroup_block_read_cacheopts_u8_m1k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
428+
ushort4 __builtin_IB_subgroup_block_read_cacheopts_u8_m2k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
429+
ushort8 __builtin_IB_subgroup_block_read_cacheopts_u8_m4k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
430+
ushort16 __builtin_IB_subgroup_block_read_cacheopts_u8_m8k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
431+
ushort2 __builtin_IB_subgroup_block_read_cacheopts_u16_m1k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
432+
ushort4 __builtin_IB_subgroup_block_read_cacheopts_u16_m2k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
433+
ushort8 __builtin_IB_subgroup_block_read_cacheopts_u16_m4k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
434+
ushort16 __builtin_IB_subgroup_block_read_cacheopts_u16_m8k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
435+
uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u8_k32(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
436+
uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u16_k16(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
437+
// 2d block write cacheopts
438+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
439+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
440+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m4k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
441+
void __builtin_IB_subgroup_block_write_cacheopts_u8_m8k32v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
442+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m1k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort val, enum LSC_STCC cache_control);
443+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort2 val, enum LSC_STCC cache_control);
444+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort4 val, enum LSC_STCC cache_control);
445+
void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1(long base_address, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, ushort8 val, enum LSC_STCC cache_control);
446+
// equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
447+
uint8 __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
448+
ulong4 __builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
449+
450+
// 2d block read prefetch
451+
void __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
452+
void __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
453+
void __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
454+
void __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
455+
void __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
456+
void __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
457+
void __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
458+
void __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
459+
void __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
460+
void __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
461+
// equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
462+
void __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
463+
void __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4(long baseoffset, int width_minus_one, int height_minus_one, int pitch_minus_one, int2 coord, enum LSC_LDCC cacheOpt);
464+
#endif // cl_intel_subgroup_extended_block_read_cacheopts
425465

426466
// experimental
427467
#ifdef cl_intel_subgroup_extended_block_read

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22710,6 +22710,8 @@ void EmitPass::emitLSCStore(
2271022710
void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
2271122711
{
2271222712
bool isRead = inst->getIntrinsicID() == GenISAIntrinsic::GenISA_LSC2DBlockRead;
22713+
const bool isPrefetch = inst->getIntrinsicID() == GenISAIntrinsic::GenISA_LSC2DBlockPrefetch;
22714+
isRead |= isPrefetch;
2271322715

2271422716
CVariable* pFlatImageBaseoffset = GetSymbol(inst->getOperand(0));
2271522717
CVariable* pFlatImageWidth = GetSymbol(inst->getOperand(1));
@@ -22728,6 +22730,7 @@ void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
2272822730

2272922731
CVariable* destination = m_destination;
2273022732
if (numBlocksV == 2 && blockHeight == 1 &&
22733+
!isPrefetch &&
2273122734
elemSizeInBits * blockWidth == 256 &&
2273222735
m_currShader->m_Platform->getPlatformInfo().eProductFamily >= IGFX_PVC)
2273322736
{
@@ -22770,6 +22773,7 @@ void EmitPass::emitLSC2DBlockOperation(llvm::GenIntrinsicInst* inst)
2277022773
m_encoder->Push();
2277122774

2277222775
if (isRead &&
22776+
!isPrefetch &&
2277322777
destination != m_destination)
2277422778
{
2277522779
// m1 v2 block read
@@ -23136,6 +23140,7 @@ void EmitPass::emitLSCIntrinsic(llvm::GenIntrinsicInst* GII)
2313623140
emitLSCFence(GII);
2313723141
break;
2313823142
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
23143+
case GenISAIntrinsic::GenISA_LSC2DBlockPrefetch:
2313923144
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
2314023145
emitLSC2DBlockOperation(GII);
2314123146
break;

0 commit comments

Comments
 (0)