Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 417b60f

Browse files
authored
[SYCL] Use Subgroup size 16 as default for InlineASM tests (#1476)
Subgroup size 8 is not supported on all gpu platforms. Instead use 16 as default subgroup size unless specifically testing a given size (8 or 32 for example in some tests).
1 parent 376bc88 commit 417b60f

22 files changed

+62
-61
lines changed

SYCL/InlineAsm/Negative/asm_bad_opcode.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"
20-
"movi (M1_NM, 8) tmp1(0,1)<1> tmp2(0,0)\n");
20+
"movi (M1_NM, 16) tmp1(0,1)<1> tmp2(0,0)\n");
2121
#endif
2222
});
2323
}

SYCL/InlineAsm/Negative/asm_bad_operand_syntax.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"
20-
"mov (M1_NM, 8) tmp1(0,1)<1>:f tmp2(0,0)<1;1,0>\n");
20+
"mov (M1_NM, 16) tmp1(0,1)<1>:f tmp2(0,0)<1;1,0>\n");
2121
#endif
2222
});
2323
}

SYCL/InlineAsm/Negative/asm_duplicate_label.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"

SYCL/InlineAsm/Negative/asm_illegal_exec_size.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"

SYCL/InlineAsm/Negative/asm_missing_label.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"

SYCL/InlineAsm/Negative/asm_missing_region.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"
20-
"mov (M1_NM, 8) tmp1(0,1)<1> tmp2(0,0)\n");
20+
"mov (M1_NM, 16) tmp1(0,1)<1> tmp2(0,0)\n");
2121
#endif
2222
});
2323
}

SYCL/InlineAsm/Negative/asm_simple.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"

SYCL/InlineAsm/Negative/asm_undefined_decl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"
20-
"mov (M1_NM, 8) tmp1(0,1)<1> my_super_var(0,0)\n");
20+
"mov (M1_NM, 16) tmp1(0,1)<1> my_super_var(0,0)\n");
2121
#endif
2222
});
2323
}

SYCL/InlineAsm/Negative/asm_undefined_pred.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"
20-
"cmp.lt (M1_NM, 8) P3 tmp1(0,0)<0;1,0> 0x3:ud\n");
20+
"cmp.lt (M1_NM, 16) P3 tmp1(0,0)<0;1,0> 0x3:ud\n");
2121
#endif
2222
});
2323
}

SYCL/InlineAsm/Negative/asm_wrong_declare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct KernelFunctor {
1313
void operator()(sycl::handler &cgh) {
1414
cgh.parallel_for<KernelFunctor>(
1515
sycl::range<1>{16},
16-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
16+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
1717
#if defined(__SYCL_DEVICE_ONLY__)
1818
asm volatile(".decl tmp1 v_type=G type=d num_elts=16 align=GRF\n"
1919
".decl tmp2 v_type=G type=d num_elts=16 align=GRF\n"

SYCL/InlineAsm/asm_arbitrary_ops_order.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {
3333

3434
cgh.parallel_for<KernelFunctor<T>>(
3535
sycl::range<1>{this->getOutputBufferSize()},
36-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
36+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3737
#if defined(__SYCL_DEVICE_ONLY__)
38-
asm("mad (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0> %3(0, "
38+
asm("mad (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0> %3(0, "
3939
"0)<1;1,0>"
4040
: "=rw"(D[wiID])
4141
: "rw"(A[wiID]), "rw"(B[wiID]), "rw"(C[wiID]));

SYCL/InlineAsm/asm_float_add.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
3030

3131
cgh.parallel_for<KernelFunctor<T>>(
3232
sycl::range<1>{this->getOutputBufferSize()},
33-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
33+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3434
#if defined(__SYCL_DEVICE_ONLY__)
35-
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
35+
asm("add (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
3636
: "=rw"(C[wiID])
3737
: "rw"(A[wiID]), "rw"(B[wiID]));
3838
#else

SYCL/InlineAsm/asm_float_imm_arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
2727

2828
cgh.parallel_for<KernelFunctor<T>>(
2929
sycl::range<1>{this->getOutputBufferSize()},
30-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
30+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3131
#if defined(__SYCL_DEVICE_ONLY__)
32-
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
32+
asm("mul (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
3333
: "=rw"(B[wiID])
3434
: "rw"(A[wiID]), "i"(IMM_ARGUMENT));
3535
#else

SYCL/InlineAsm/asm_float_neg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
2525

2626
cgh.parallel_for<KernelFunctor<T>>(
2727
sycl::range<1>{this->getOutputBufferSize()},
28-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
28+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
2929
#if defined(__SYCL_DEVICE_ONLY__)
30-
asm("mov (M1, 8) %0(0, 0)<1> (-)%1(0, 0)<1;1,0>"
30+
asm("mov (M1, 16) %0(0, 0)<1> (-)%1(0, 0)<1;1,0>"
3131
: "=rw"(B[wiID])
3232
: "rw"(A[wiID]));
3333
#else

SYCL/InlineAsm/asm_if.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
1818
bool switchField = false;
1919
CGH.parallel_for<KernelFunctor<T>>(
2020
sycl::range<1>{this->getOutputBufferSize()},
21-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
21+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
2222
int Output = 0;
2323
#if defined(__SYCL_DEVICE_ONLY__)
2424
asm volatile("{\n"
2525
".decl P1 v_type=P num_elts=1\n"
26-
"cmp.eq (M1_NM, 8) P1 %1(0,0)<0;1,0> 0x0:b\n"
27-
"(P1) sel (M1_NM, 8) %0(0,0)<1> 0x7:d 0x8:d"
26+
"cmp.eq (M1_NM, 16) P1 %1(0,0)<0;1,0> 0x0:b\n"
27+
"(P1) sel (M1_NM, 16) %0(0,0)<1> 0x7:d 0x8:d"
2828
"}\n"
2929
: "=rw"(Output)
3030
: "rw"(switchField));

SYCL/InlineAsm/asm_imm_arg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
2626

2727
cgh.parallel_for<KernelFunctor<T>>(
2828
sycl::range<1>{this->getOutputBufferSize()},
29-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
29+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3030
#if defined(__SYCL_DEVICE_ONLY__)
31-
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
31+
asm("add (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
3232
: "=rw"(B[wiID])
3333
: "rw"(A[wiID]), "i"(CONST_ARGUMENT));
3434
#else

SYCL/InlineAsm/asm_loop.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,21 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
2929
CGH);
3030
CGH.parallel_for<KernelFunctor<T>>(
3131
sycl::range<1>{this->getOutputBufferSize()},
32-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
32+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3333
#if defined(__SYCL_DEVICE_ONLY__)
3434
asm volatile("{\n"
35-
".decl P1 v_type=P num_elts=8\n"
36-
".decl P2 v_type=P num_elts=8\n"
37-
".decl temp v_type=G type=d num_elts=8 align=dword\n"
38-
"mov (M1, 8) %0(0, 0)<1> 0x0:d\n"
39-
"cmp.le (M1, 8) P1 %1(0,0)<1;1,0> 0x0:d\n"
40-
"(P1) goto (M1, 8) label0%=\n"
41-
"mov (M1, 8) temp(0,0)<1> 0x0:d\n"
35+
".decl P1 v_type=P num_elts=16\n"
36+
".decl P2 v_type=P num_elts=16\n"
37+
".decl temp v_type=G type=d num_elts=16 align=dword\n"
38+
"mov (M1, 16) %0(0, 0)<1> 0x0:d\n"
39+
"cmp.le (M1, 16) P1 %1(0,0)<1;1,0> 0x0:d\n"
40+
"(P1) goto (M1, 16) label0%=\n"
41+
"mov (M1, 16) temp(0,0)<1> 0x0:d\n"
4242
"label1%=:\n"
43-
"add (M1, 8) temp(0,0)<1> temp(0,0)<1;1,0> 0x1:w\n"
44-
"add (M1, 8) %0(0,0)<1> %0(0,0)<1;1,0> %2(0,0)<1;1,0>\n"
45-
"cmp.lt (M1, 8) P2 temp(0,0)<0;8,1> %1(0,0)<0;8,1>\n"
46-
"(P2) goto (M1, 8) label1%=\n"
43+
"add (M1, 16) temp(0,0)<1> temp(0,0)<1;1,0> 0x1:w\n"
44+
"add (M1, 16) %0(0,0)<1> %0(0,0)<1;1,0> %2(0,0)<1;1,0>\n"
45+
"cmp.lt (M1, 16) P2 temp(0,0)<0;16,1> %1(0,0)<0;16,1>\n"
46+
"(P2) goto (M1, 16) label1%=\n"
4747
"label0%=:"
4848
"}\n"
4949
: "+rw"(C[wiID])

SYCL/InlineAsm/asm_mul.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
2828

2929
cgh.parallel_for<KernelFunctor<T>>(
3030
sycl::range<1>{this->getOutputBufferSize()},
31-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
31+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
3232
#if defined(__SYCL_DEVICE_ONLY__)
33-
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
33+
asm("mul (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
3434
: "=rw"(C[wiID])
3535
: "rw"(A[wiID]), "rw"(B[wiID]));
3636
#else

SYCL/InlineAsm/asm_multiple_instructions.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,17 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {
3636

3737
cgh.parallel_for<KernelFunctor<T>>(
3838
sycl::range<1>{this->getOutputBufferSize()},
39-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
39+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
4040
#if defined(TO_PASS)
4141
// The code below passing verification
4242
volatile int output = -1;
4343

4444
#if defined(__SYCL_DEVICE_ONLY__)
4545
asm volatile(
4646
"{\n"
47-
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
48-
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %3(0, 0)<1;1,0>\n"
49-
"mov (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0>\n"
47+
"add (M1, 16) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
48+
"add (M1, 16) %1(0, 0)<1> %1(0, 0)<1;1,0> %3(0, 0)<1;1,0>\n"
49+
"mov (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0>\n"
5050
"}\n"
5151
: "=rw"(output), "+rw"(A[wiID])
5252
: "rw"(B[wiID]), "rw"(C[wiID]));
@@ -58,10 +58,11 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {
5858
D[wiID] = output;
5959
#else
6060
#if defined(__SYCL_DEVICE_ONLY__)
61-
asm volatile("{\n"
62-
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
63-
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %3(0, 0)<1;1,0>\n"
64-
"mov (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0>\n"
61+
asm volatile(
62+
"{\n"
63+
"add (M1, 16) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
64+
"add (M1, 16) %1(0, 0)<1> %1(0, 0)<1;1,0> %3(0, 0)<1;1,0>\n"
65+
"mov (M1, 16) %0(0, 0)<1> %1(0, 0)<1;1,0>\n"
6566
"}\n"
6667
: "=rw"(D[wiID]), "+rw"(A[wiID])
6768
: "rw"(B[wiID]), "rw"(C[wiID]));

SYCL/InlineAsm/asm_no_operands.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ int main() {
2525
// Submitting command group(work) to queue
2626
Queue.submit([&](sycl::handler &cgh) {
2727
// Executing kernel
28-
cgh.parallel_for<no_operands_kernel>(NumOfWorkItems,
29-
[=](sycl::id<1> WIid)
30-
[[intel::reqd_sub_group_size(8)]] {
28+
cgh.parallel_for<no_operands_kernel>(
29+
NumOfWorkItems,
30+
[=](sycl::id<1> WIid) [[intel::reqd_sub_group_size(16)]] {
3131
#if defined(__SYCL_DEVICE_ONLY__)
32-
asm("barrier");
32+
asm("barrier");
3333
#endif
34-
});
34+
});
3535
});
3636
}

SYCL/InlineAsm/asm_no_output.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ template <typename T = dataType> struct KernelFunctor : WithOutputBuffer<T> {
1919
cgh);
2020
cgh.parallel_for<KernelFunctor<T>>(
2121
sycl::range<1>{this->getOutputBufferSize()},
22-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
22+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
2323
volatile int local_var = 47;
2424
local_var += C[0];
2525
#if defined(__SYCL_DEVICE_ONLY__)

SYCL/InlineAsm/asm_switch.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,25 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
1818
int switchField = 2;
1919
CGH.parallel_for<KernelFunctor<T>>(
2020
sycl::range<1>{this->getOutputBufferSize()},
21-
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
21+
[=](sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
2222
int Output = 0;
2323
#if defined(__SYCL_DEVICE_ONLY__)
2424
asm volatile("{\n"
2525
".decl P1 v_type=P num_elts=1\n"
2626
".decl P2 v_type=P num_elts=1\n"
2727
".decl P3 v_type=P num_elts=1\n"
28-
"cmp.ne (M1_NM, 8) P1 %1(0,0)<0;1,0> 0x0:d\n"
28+
"cmp.ne (M1_NM, 16) P1 %1(0,0)<0;1,0> 0x0:d\n"
2929
"(P1) goto (M1, 1) label0%=\n"
3030
"mov (M1, 8) %0(0,0)<1> 0x9:d\n"
3131
"(P1) goto (M1, 1) label0%=\n"
3232
"label0%=:\n"
33-
"cmp.ne (M1_NM, 8) P2 %1(0,0)<0;1,0> 0x1:d\n"
33+
"cmp.ne (M1_NM, 16) P2 %1(0,0)<0;1,0> 0x1:d\n"
3434
"(P2) goto (M1, 1) label1%=\n"
3535
"mov (M1, 8) %0(0,0)<1> 0x8:d\n"
3636
"label1%=:\n"
37-
"cmp.ne (M1_NM, 8) P3 %1(0,0)<0;1,0> 0x2:d\n"
37+
"cmp.ne (M1_NM, 16) P3 %1(0,0)<0;1,0> 0x2:d\n"
3838
"(P3) goto (M1, 1) label2%=\n"
39-
"mov (M1, 8) %0(0,0)<1> 0x7:d\n"
39+
"mov (M1, 16) %0(0,0)<1> 0x7:d\n"
4040
"label2%=:"
4141
"}\n"
4242
: "=rw"(Output)

0 commit comments

Comments
 (0)