Skip to content

Commit fab64a9

Browse files
ppogotovigcbot
authored andcommitted
Reform prefetch instructions.
Reform prefetch instructions to maximize width and minimize the number of blocks.
1 parent 9291fc3 commit fab64a9

File tree

5 files changed

+144
-28
lines changed

5 files changed

+144
-28
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,30 @@ void CustomSafeOptPass::mergeDotAddToDp4a(llvm::CallInst* I)
452452
}
453453
}
454454

455+
// Reform prefetch instructions to maximize width and minimize the number of blocks. This function assumes that
456+
// it operates on the correct form of the intrinsic (width X array_length matches the requirements).
457+
void CustomSafeOptPass::visitLSC2DBlockPrefetch(CallInst* I) {
458+
const size_t widthInd = 7;
459+
const size_t numBlocksInd = 9;
460+
461+
ConstantInt *constWidth = dyn_cast<ConstantInt>(I->getArgOperand(widthInd));
462+
ConstantInt *constNumBlocks = dyn_cast<ConstantInt>(I->getArgOperand(numBlocksInd));
463+
464+
if (!constWidth || !constNumBlocks) {
465+
return;
466+
}
467+
468+
uint64_t width = constWidth->getZExtValue();
469+
uint64_t numBlocks = constNumBlocks->getZExtValue();
470+
471+
if (numBlocks == 1) {
472+
return;
473+
}
474+
475+
I->setArgOperand(widthInd, ConstantInt::get(Type::getInt32Ty(I->getContext()), width * numBlocks));
476+
I->setArgOperand(numBlocksInd, ConstantInt::get(Type::getInt32Ty(I->getContext()), 1));
477+
}
478+
455479
// Check if Lower 64b to 32b transformation is applicable for binary operator
456480
// i.e. trunc(a op b) == trunc(a) op trunc(b)
457481
static bool isTruncInvariant(unsigned Opcode) {
@@ -878,6 +902,12 @@ void CustomSafeOptPass::visitCallInst(CallInst& C)
878902
break;
879903
}
880904

905+
case GenISAIntrinsic::GenISA_LSC2DBlockPrefetch:
906+
{
907+
visitLSC2DBlockPrefetch(inst);
908+
break;
909+
}
910+
881911
default:
882912
break;
883913
}

IGC/Compiler/CustomSafeOptPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ namespace IGC
7272
bool isIdentityMatrix(llvm::ExtractElementInst& I);
7373
void visitAnd(llvm::BinaryOperator& I);
7474
void visitXor(llvm::Instruction& XorInstr);
75+
void visitLSC2DBlockPrefetch(llvm::CallInst* I);
7576
void visitShuffleIndex(llvm::CallInst* I);
7677
void visitSelectInst(llvm::SelectInst& S);
7778
void mergeDotAddToDp4a(llvm::CallInst* I);
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: llvm-14-plus, pvc-supported
10+
;
11+
; RUN: igc_opt %s -S -o - -igc-custom-safe-opt -platform pvc | FileCheck %s
12+
13+
define spir_kernel void @block_prefetch_d16.1x16x8nn(i64 %val1, i32 %val2) {
14+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 4)
15+
;
16+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 16, i32 8, i32 1, i1 false, i1 false, i32 4)
17+
;
18+
ret void
19+
}
20+
21+
define spir_kernel void @block_prefetch_d16.2x16x8nn(i64 %val1, i32 %val2) {
22+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 16, i32 8, i32 2, i1 false, i1 false, i32 4)
23+
;
24+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 32, i32 8, i32 1, i1 false, i1 false, i32 4)
25+
;
26+
ret void
27+
}
28+
29+
define spir_kernel void @block_prefetch_d16.4x8x8nn(i64 %val1, i32 %val2) {
30+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 8, i32 8, i32 4, i1 false, i1 false, i32 4)
31+
;
32+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 16, i32 32, i32 8, i32 1, i1 false, i1 false, i32 4)
33+
;
34+
ret void
35+
}
36+
37+
define spir_kernel void @block_prefetch_d8.4x64x8nn(i64 %val1, i32 %val2) {
38+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 8, i32 64, i32 8, i32 4, i1 false, i1 false, i32 4)
39+
;
40+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 8, i32 256, i32 8, i32 1, i1 false, i1 false, i32 4)
41+
;
42+
ret void
43+
}
44+
45+
define spir_kernel void @block_prefetch_d8.4x16x8nn(i64 %val1, i32 %val2) {
46+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 8, i32 16, i32 8, i32 4, i1 false, i1 false, i32 4)
47+
;
48+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 8, i32 64, i32 8, i32 1, i1 false, i1 false, i32 4)
49+
;
50+
ret void
51+
}
52+
53+
define spir_kernel void @block_prefetch_d32.2x16x8nn(i64 %val1, i32 %val2) {
54+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 32, i32 16, i32 8, i32 2, i1 false, i1 false, i32 4)
55+
;
56+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 32, i32 32, i32 8, i32 1, i1 false, i1 false, i32 4)
57+
;
58+
ret void
59+
}
60+
61+
define spir_kernel void @block_prefetch_d32.2x32x8nn(i64 %val1, i32 %val2) {
62+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 32, i32 32, i32 8, i32 2, i1 false, i1 false, i32 4)
63+
;
64+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 32, i32 64, i32 8, i32 1, i1 false, i1 false, i32 4)
65+
;
66+
ret void
67+
}
68+
69+
define spir_kernel void @block_prefetch_d64.4x8x8nn(i64 %val1, i32 %val2) {
70+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 64, i32 8, i32 8, i32 4, i1 false, i1 false, i32 4)
71+
;
72+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 64, i32 8, i32 8, i32 4, i1 false, i1 false, i32 4)
73+
;
74+
ret void
75+
}
76+
77+
define spir_kernel void @block_prefetch_d64.1x8x8nn(i64 %val1, i32 %val2) {
78+
call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 64, i32 8, i32 8, i32 1, i1 false, i1 false, i32 4)
79+
;
80+
; CHECK: call void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64 %val1, i32 8191, i32 4095, i32 8191, i32 0, i32 %val2, i32 64, i32 8, i32 8, i32 1, i1 false, i1 false, i32 4)
81+
;
82+
ret void
83+
}
84+
85+
declare void @llvm.genx.GenISA.LSC2DBlockPrefetch.isVoid(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)

IGC/ocloc_tests/Builtins/cl_intel_subgroup_2d_block_io/PVC/prefetches.cl

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,37 @@ SPDX-License-Identifier: MIT
1212
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_1r32x2c" \
1313
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-1R-32X2C
1414

15-
// CHECK-VISAASM-8B-1R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
15+
// CHECK-VISAASM-8B-1R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
1616

1717
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
1818
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_2r32x2c" \
1919
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-2R-32X2C
2020

21-
// CHECK-VISAASM-8B-2R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
21+
// CHECK-VISAASM-8B-2R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
2222

2323
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
2424
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_4r32x2c" \
2525
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-4R-32X2C
2626

27-
// CHECK-VISAASM-8B-4R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
27+
// CHECK-VISAASM-8B-4R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
2828

2929
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
3030
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_8r32x2c" \
3131
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-8R-32X2C
3232

33-
// CHECK-VISAASM-8B-8R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
33+
// CHECK-VISAASM-8B-8R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
3434

3535
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
3636
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_16r32x2c" \
3737
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-16R-32X2C
3838

39-
// CHECK-VISAASM-8B-16R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
39+
// CHECK-VISAASM-8B-16R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
4040

4141
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
4242
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_32r32x2c" \
4343
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-32R-32X2C
4444

45-
// CHECK-VISAASM-8B-32R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x32x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
45+
// CHECK-VISAASM-8B-32R-32X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
4646

4747
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
4848
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_1r16x1c" \
@@ -84,37 +84,37 @@ SPDX-License-Identifier: MIT
8484
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_1r16x2c" \
8585
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-1R-16X2C
8686

87-
// CHECK-VISAASM-16B-1R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
87+
// CHECK-VISAASM-16B-1R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
8888

8989
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
9090
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_2r16x2c" \
9191
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-2R-16X2C
9292

93-
// CHECK-VISAASM-16B-2R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
93+
// CHECK-VISAASM-16B-2R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
9494

9595
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
9696
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_4r16x2c" \
9797
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-4R-16X2C
9898

99-
// CHECK-VISAASM-16B-4R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
99+
// CHECK-VISAASM-16B-4R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
100100

101101
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
102102
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_8r16x2c" \
103103
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-8R-16X2C
104104

105-
// CHECK-VISAASM-16B-8R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
105+
// CHECK-VISAASM-16B-8R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
106106

107107
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
108108
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_16r16x2c" \
109109
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-16R-16X2C
110110

111-
// CHECK-VISAASM-16B-16R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
111+
// CHECK-VISAASM-16B-16R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
112112

113113
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
114114
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_16b_32r16x2c" \
115115
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-16B-32R-16X2C
116116

117-
// CHECK-VISAASM-16B-32R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.2x16x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
117+
// CHECK-VISAASM-16B-32R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d16.32x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
118118

119119
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
120120
// RUN: -DINPUT_TYPE=ushort -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_32r16x1c" \
@@ -234,49 +234,49 @@ SPDX-License-Identifier: MIT
234234
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_1r8x2c" \
235235
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-1R-8X2C
236236

237-
// CHECK-VISAASM-32B-1R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
237+
// CHECK-VISAASM-32B-1R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x1nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
238238

239239
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
240240
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_2r8x2c" \
241241
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-2R-8X2C
242242

243-
// CHECK-VISAASM-32B-2R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
243+
// CHECK-VISAASM-32B-2R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x2nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
244244

245245
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
246246
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_4r8x2c" \
247247
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-4R-8X2C
248248

249-
// CHECK-VISAASM-32B-4R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
249+
// CHECK-VISAASM-32B-4R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x4nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
250250

251251
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
252252
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_8r8x2c" \
253253
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-8R-8X2C
254254

255-
// CHECK-VISAASM-32B-8R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
255+
// CHECK-VISAASM-32B-8R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x8nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
256256

257257
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
258258
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_16r8x2c" \
259259
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-16R-8X2C
260260

261-
// CHECK-VISAASM-32B-16R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
261+
// CHECK-VISAASM-32B-16R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x16nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
262262

263263
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
264264
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_32b_32r8x2c" \
265265
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-32B-32R-8X2C
266266

267-
// CHECK-VISAASM-32B-32R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.2x8x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
267+
// CHECK-VISAASM-32B-32R-8X2C: lsc_load_block2d.ugm (M1, 1) %null:d32.16x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
268268

269269
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
270270
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_32r16x2c" \
271271
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-32R-16X2C
272272

273-
// CHECK-VISAASM-8B-32R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.2x16x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
273+
// CHECK-VISAASM-8B-32R-16X2C: lsc_load_block2d.ugm (M1, 1) %null:d8.32x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
274274

275275
// RUN: ocloc compile -file %s -device pvc -options "-igc_opts 'DumpVISAASMToConsole=1' \
276276
// RUN: -DINPUT_TYPE=uint -DFUNCTION=intel_sub_group_2d_block_prefetch_8b_32r16x4c" \
277277
// RUN: -internal_options "-cl-ext=-all,+cl_intel_subgroup_2d_block_io" | FileCheck %s --check-prefix=CHECK-VISAASM-8B-32R-16X4C
278278

279-
// CHECK-VISAASM-8B-32R-16X4C: lsc_load_block2d.ugm (M1, 1) %null:d8.4x16x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
279+
// CHECK-VISAASM-8B-32R-16X4C: lsc_load_block2d.ugm (M1, 1) %null:d8.64x32nn flat[{{.+}},0x1FF,0x2D,0x1FF,V{{[0-9]+}},V{{[0-9]+}}
280280

281281
__attribute__((intel_reqd_sub_group_size(16)))
282282
kernel void test_default(global INPUT_TYPE* input, const global int2* coord) {

0 commit comments

Comments
 (0)