Skip to content

[NFC][SYCL] Remove deprecated spelling of IntelReqdSubGroupSize attribute #2399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Sep 3, 2020
1 change: 0 additions & 1 deletion clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,6 @@ def LoopUnrollHint : InheritableAttr {

def IntelReqdSubGroupSize: InheritableAttr {
let Spellings = [GNU<"intel_reqd_sub_group_size">,
CXX11<"cl", "intel_reqd_sub_group_size">,
CXX11<"intel", "reqd_sub_group_size">];
let Args = [ExprArgument<"SubGroupSize">];
let Subjects = SubjectList<[Function, CXXMethod], ErrorDiag>;
Expand Down
20 changes: 3 additions & 17 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -3476,30 +3476,16 @@ code. See `cl_intel_required_subgroup_size
for details.

SYCL documentation:
The [[cl::intel_reqd_sub_group_size(n)]] and [[intel::reqd_sub_group_size(n)]]
attribute indicates that the kernel must be compiled and executed with a
sub-group of size n. The value of n must be set to a sub-group size supported
by the device, or device compilation will fail.
The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must
be compiled and executed with a sub-group of size n. The value of n must be set
to a sub-group size supported by the device, or device compilation will fail.

In addition to device functions, the required sub-group size attribute may also
be specified in the definition of a named functor object and lambda functions,
as in the examples below:

.. code-block:: c++

class Functor
{
void operator()(item<1> item) [[cl::intel_reqd_sub_group_size(16)]]
{
/* kernel code */
}
}

kernel<class kernel_name>(
[]() [[cl::intel_reqd_sub_group_size(n)]] {
/* kernel code */
});

class Functor
{
[[intel::reqd_sub_group_size(16)]] void operator()(item<1> item)
Expand Down
5 changes: 0 additions & 5 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -11063,11 +11063,6 @@ def err_ivdep_declrefexpr_arg : Error<
def warn_ivdep_redundant : Warning <"ignoring redundant Intel FPGA loop "
"attribute 'ivdep': safelen %select{INF|%1}0 >= safelen %select{INF|%3}2">,
InGroup<IgnoredAttributes>;
def warn_attribute_spelling_deprecated : Warning<
"attribute %0 is deprecated">,
InGroup<DeprecatedAttributes>;
def note_spelling_suggestion : Note<
"did you mean to use %0 instead?">;

// errors of expect.with.probability
def err_probability_not_constant_float : Error<
Expand Down
7 changes: 0 additions & 7 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3013,13 +3013,6 @@ static void handleSubGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) {
if (D->getAttr<IntelReqdSubGroupSizeAttr>())
S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL;

if (AL.getAttributeSpellingListIndex() ==
IntelReqdSubGroupSizeAttr::CXX11_cl_intel_reqd_sub_group_size) {
S.Diag(AL.getLoc(), diag::warn_attribute_spelling_deprecated) << AL;
S.Diag(AL.getLoc(), diag::note_spelling_suggestion)
<< "'intel::reqd_sub_group_size'";
}

S.addIntelReqdSubGroupSizeAttr(D, AL, E);
}

Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGenSYCL/reqd-sub-group-size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

class Functor16 {
public:
[[cl::intel_reqd_sub_group_size(16)]] void operator()() const {}
[[intel::reqd_sub_group_size(16)]] void operator()() const {}
};

[[cl::intel_reqd_sub_group_size(8)]] void foo() {}
[[intel::reqd_sub_group_size(8)]] void foo() {}

class Functor {
public:
Expand All @@ -17,7 +17,7 @@ class Functor {
template <int SIZE>
class Functor5 {
public:
[[cl::intel_reqd_sub_group_size(SIZE)]] void operator()() const {}
[[intel::reqd_sub_group_size(SIZE)]] void operator()() const {}
};

template <typename name, typename Func>
Expand All @@ -33,7 +33,7 @@ void bar() {
kernel<class kernel_name2>(f);

kernel<class kernel_name3>(
[]() [[cl::intel_reqd_sub_group_size(4)]] {});
[]() [[intel::reqd_sub_group_size(4)]]{});

Functor5<2> f5;
kernel<class kernel_name4>(f5);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

class Functor {
public:
[[cl::intel_reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {}
[[intel::reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {}
};

template <typename Name, typename Func>
Expand Down
8 changes: 2 additions & 6 deletions clang/test/SemaSYCL/reqd-sub-group-size-device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@

class Functor16 {
public:
// expected-warning@+2 {{attribute 'intel_reqd_sub_group_size' is deprecated}}
// expected-note@+1 {{did you mean to use 'intel::reqd_sub_group_size' instead?}}
[[cl::intel_reqd_sub_group_size(16)]] void operator()() const {}
[[intel::reqd_sub_group_size(16)]] void operator()() const {}
};

class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}}
Expand Down Expand Up @@ -55,9 +53,7 @@ void bar() {

kernel<class kernel_name5>([]() [[intel::reqd_sub_group_size(2)]]{});
kernel<class kernel_name6>([]() [[intel::reqd_sub_group_size(4)]] { foo(); });
// expected-warning@+2 {{attribute 'intel_reqd_sub_group_size' is deprecated}}
// expected-note@+1 {{did you mean to use 'intel::reqd_sub_group_size' instead?}}
kernel<class kernel_name7>([]() [[cl::intel_reqd_sub_group_size(6)]]{});
kernel<class kernel_name7>([]() [[intel::reqd_sub_group_size(6)]]{});

Functor4 f4;
kernel<class kernel_name8>(f4);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/SemaSYCL/reqd-sub-group-size-host.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s
// expected-no-diagnostics

[[cl::intel_reqd_sub_group_size(8)]] void fun() {}
[[intel::reqd_sub_group_size(8)]] void fun() {}

class Functor {
public:
[[cl::intel_reqd_sub_group_size(16)]] void operator()() {}
[[intel::reqd_sub_group_size(16)]] void operator()() {}
};
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_empty.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
C[wiID] = 43;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_matrix_mult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_no_input_int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,16) %0(0,0)<1> 0x7:d"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_16_no_opts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
for (int i = 0; i < 10; ++i) {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("fence_sw");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_8_empty.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
C[wiID] = 43;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("");
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_8_no_input_int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
volatile int output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile("mov (M1,8) %0(0,0)<1> 0x7:d"
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_arbitrary_ops_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mad (M1, 8) %0(0, 0)<1> %3(0, 0)<1;1,0> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(D[wiID])
Expand Down
4 changes: 3 additions & 1 deletion sycl/test/inline-asm/asm_decl_in_scope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
// declaration of temp within and outside the scope
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("{\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_float_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(C[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_float_imm_arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
: "=rw"(B[wiID])
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_float_neg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
auto B = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mov (M1, 8) %0(0, 0)<1> (-)%1(0, 0)<1;1,0>"
: "=rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_if.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
int Output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_imm_arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=
](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2"
: "=rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm volatile(".decl P1 v_type=P num_elts=8\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_mul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 2>, WithOutputBuffer<T> {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("mul (M1, 8) %0(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>"
: "=rw"(C[wiID])
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_multiple_instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ struct KernelFunctor : WithInputBuffers<T, 3>, WithOutputBuffer<T> {
auto D = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("{\n"
"add (M1, 8) %1(0, 0)<1> %1(0, 0)<1;1,0> %2(0, 0)<1;1,0>\n"
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_no_operands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ int main() {
Queue.submit([&](cl::sycl::handler &cgh) {
// Executing kernel
cgh.parallel_for<no_operands_kernel>(
NumOfWorkItems, [=](cl::sycl::id<1> WIid) [[cl::intel_reqd_sub_group_size(8)]] {
NumOfWorkItems, [=](cl::sycl::id<1> WIid)
[[intel::reqd_sub_group_size(8)]] {
// clang-format off
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("barrier");
#endif
});
// clang-format on
});
}
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_no_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ struct KernelFunctor : WithOutputBuffer<T> {
void operator()(cl::sycl::handler &cgh) {
auto C = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);
cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
volatile int local_var = 47;
local_var += C[0];
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
5 changes: 4 additions & 1 deletion sycl/test/inline-asm/asm_plus_mod.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ struct KernelFunctor : WithInputBuffers<T, 1>, WithOutputBuffer<T> {
auto B = this->getOutputBuffer().template get_access<cl::sycl::access::mode::write>(cgh);

cgh.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()}, [=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
asm("add (M1, 16) %0(0, 0)<1> %0(0, 0)<1;1,0> %1(0, 0)<1;1,0>"
: "+rw"(B[wiID])
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/inline-asm/asm_switch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ template <typename T = DataType> struct KernelFunctor : WithOutputBuffer<T> {
// clang-format off
CGH.parallel_for<KernelFunctor<T>>(
cl::sycl::range<1>{this->getOutputBufferSize()},
[=](cl::sycl::id<1> wiID) [[cl::intel_reqd_sub_group_size(8)]] {
[=](cl::sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] {
// clang-format on
int Output = 0;
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
Expand Down
8 changes: 6 additions & 2 deletions sycl/test/inline-asm/letter_example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ int main() {
}
q.submit([&](cl::sycl::handler &cgh) {
cgh.parallel_for<kernel_name>(
cl::sycl::range<1>(problem_size), [=](cl::sycl::id<1> idx)
[[cl::intel_reqd_sub_group_size(16)]] {
// clang-format off
cl::sycl::range<1>(problem_size),
[=](cl::sycl::id<1> idx) [[intel::reqd_sub_group_size(16)]] {
// clang-format on
#if defined(INLINE_ASM) && defined(__SYCL_DEVICE_ONLY__)
int i = idx[0];
asm volatile("{\n.decl V52 v_type=G type=d num_elts=16 align=GRF\n"
Expand All @@ -39,7 +41,9 @@ int main() {
:
: "rw"(&a[i]));
#else
// clang-format off
a[idx[0]]++;
// clang-format on
#endif
});
Comment on lines +44 to 48
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clang-format makes unrelated change here. I have turned the format off.

}).wait();
Expand Down
Loading