Skip to content

Commit fba63e6

Browse files
committed
Merge remote-tracking branch 'intel_llvm/sycl' into llvmspirv_pulldown
2 parents c753ee4 + 06e4ebc commit fba63e6

File tree

7 files changed

+94
-35
lines changed

7 files changed

+94
-35
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,9 @@ is ignored on the host.
25912591
The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an
25922592
effect when applied to a function, and no effect otherwise.
25932593

2594+
If ``intel::use_stall_enable_clusters`` is applied to a function called from a device
2595+
kernel, the attribute is ignored and it is not propagated to the kernel.
2596+
25942597
.. code-block:: c++
25952598

25962599
class Foo {
@@ -2604,6 +2607,14 @@ effect when applied to a function, and no effect otherwise.
26042607
[[intel::use_stall_enable_clusters]] void operator()() const {}
26052608
};
26062609

2610+
class Functor
2611+
{
2612+
[[intel::use_stall_enable_clusters]] void operator()(item<1> item)
2613+
{
2614+
/* kernel code */
2615+
}
2616+
}
2617+
26072618
The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming
26082619
behavior when applied to a lambda in the type position.
26092620

@@ -2614,6 +2625,11 @@ behavior when applied to a lambda in the type position.
26142625
lambda();
26152626
}
26162627

2628+
kernel<class kernel_name>(
2629+
[]() [[intel::use_stall_enable_clusters]] {
2630+
/* kernel code */
2631+
});
2632+
26172633
}];
26182634
}
26192635

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,8 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
573573
llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
574574
return isa<SYCLIntelLoopFuseAttr, SYCLIntelFPGAMaxConcurrencyAttr,
575575
SYCLIntelFPGADisableLoopPipeliningAttr,
576-
SYCLIntelFPGAInitiationIntervalAttr>(A);
576+
SYCLIntelFPGAInitiationIntervalAttr,
577+
SYCLIntelUseStallEnableClustersAttr>(A);
577578
});
578579
}
579580
}
@@ -4077,6 +4078,7 @@ static void PropagateAndDiagnoseDeviceAttr(
40774078
case attr::Kind::SYCLIntelFPGAMaxConcurrency:
40784079
case attr::Kind::SYCLIntelFPGADisableLoopPipelining:
40794080
case attr::Kind::SYCLIntelFPGAInitiationInterval:
4081+
case attr::Kind::SYCLIntelUseStallEnableClusters:
40804082
SYCLKernel->addAttr(A);
40814083
break;
40824084
case attr::Kind::IntelNamedSubGroupSize:

clang/test/CodeGenSYCL/stall_enable_device.cpp

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,21 @@
22

33
// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device.
44
// The metadata to be attached to the functionDecl that the attribute is applied to.
5-
// The attributes do not get propagated to kernel metadata i.e. spir_kernel.
5+
// The attributes get propagated to the kernel metadata i.e. spir_kernel if directly applied
6+
// through functors/lambda function.
67

78
#include "sycl.hpp"
89

910
using namespace cl::sycl;
1011
queue q;
1112

12-
[[intel::use_stall_enable_clusters]] void test() {}
13+
[[intel::use_stall_enable_clusters]] void func() {}
1314

1415
struct FuncObj {
1516
[[intel::use_stall_enable_clusters]] void operator()() const {}
1617
};
1718

18-
void test1() {
19+
void func1() {
1920
auto lambda = []() [[intel::use_stall_enable_clusters]]{};
2021
lambda();
2122
}
@@ -25,30 +26,49 @@ class Foo {
2526
[[intel::use_stall_enable_clusters]] void operator()() const {}
2627
};
2728

29+
class Functor {
30+
public:
31+
[[intel::use_stall_enable_clusters]] void operator()() const {}
32+
};
33+
2834
int main() {
2935
q.submit([&](handler &h) {
30-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]]
31-
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]]
36+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]]
37+
// CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3238
h.single_task<class test_kernel1>(
3339
FuncObj());
3440

35-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]]
36-
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]]
41+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]]
42+
// CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
3743
Foo f;
3844
h.single_task<class test_kernel2>(f);
3945

40-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]]
41-
// CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]]
46+
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
47+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3()
48+
// CHECK-NOT: !stall_enable
49+
// CHECK-SAME: {
50+
// CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]]
4251
h.single_task<class test_kernel3>(
43-
[]() { test(); });
52+
[]() { func(); });
4453

45-
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]]
46-
// CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]]
54+
// Test attribute is not propagated to the kernel metadata i.e. spir_kernel.
55+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4()
56+
// CHECK-NOT: !stall_enable
57+
// CHECK-SAME: {
58+
// CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.{{.*}}func1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]]
4759
h.single_task<class test_kernel4>(
48-
[]() { test1(); });
60+
[]() { func1(); });
61+
62+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel5() {{.*}} !stall_enable ![[NUM4]]
63+
h.single_task<class test_kernel5>(
64+
[]() [[intel::use_stall_enable_clusters]]{});
65+
66+
// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel6() {{.*}} !stall_enable ![[NUM4]]
67+
// CHECK: define {{.*}}spir_func void @{{.*}}Functor{{.*}}(%class._ZTS7Functor.Functor addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]]
68+
Functor f1;
69+
h.single_task<class test_kernel6>(f1);
4970
});
5071
return 0;
5172
}
5273

53-
// CHECK: ![[NUM4]] = !{}
54-
// CHECK: ![[NUM5]] = !{i32 1}
74+
// CHECK: ![[NUM4]] = !{i32 1}

clang/test/SemaSYCL/stall_enable_device.cpp

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ using namespace cl::sycl;
99
queue q;
1010

1111
// Test attribute is presented on function definition.
12-
[[intel::use_stall_enable_clusters]] void test() {}
13-
// CHECK: FunctionDecl{{.*}}test
14-
// CHECK: SYCLIntelUseStallEnableClustersAttr
12+
[[intel::use_stall_enable_clusters]] void func() {}
13+
// CHECK: FunctionDecl{{.*}}used func 'void ()'
14+
// CHECK-NEXT: CompoundStmt{{.*}}
15+
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}
1516

1617
// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute.
1718
#ifdef TRIGGER_ERROR
@@ -24,7 +25,7 @@ struct FuncObj {
2425
[[intel::use_stall_enable_clusters]] void operator()() const {}
2526
// CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj
2627
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
27-
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr
28+
// CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr{{.*}}
2829
};
2930

3031
// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator).
@@ -33,28 +34,46 @@ void test3() {
3334
lambda();
3435
// CHECK: FunctionDecl{{.*}}test3
3536
// CHECK: LambdaExpr
36-
// CHECK: SYCLIntelUseStallEnableClustersAttr
37+
// CHECK: SYCLIntelUseStallEnableClustersAttr{{.*}}
3738
}
3839

40+
// Test attribute is presented on functor.
41+
// CHECK: CXXRecordDecl{{.*}}referenced class Functor definition
42+
// CHECK: CXXRecordDecl{{.*}} implicit class Functor
43+
// CHECK: AccessSpecDecl{{.*}} public
44+
// CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const'
45+
// CHECK-NEXT-NEXT: SYCLIntelUseStallEnableClustersAttr{{.*}}
46+
class Functor {
47+
public:
48+
[[intel::use_stall_enable_clusters]] void operator()() const {
49+
}
50+
};
51+
3952
int main() {
4053
q.submit([&](handler &h) {
41-
// Test attribute is not propagated to the kernel.
54+
// Test attribute is propagated to the kernel.
4255
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
43-
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
56+
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
4457
h.single_task<class test_kernel1>(
4558
FuncObj());
4659

47-
// Test attribute does not present on LambdaExpr called by kernel.
60+
// Test attribute is presented on LambdaExpr called by kernel.
4861
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
49-
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
62+
// CHECK-: SYCLIntelUseStallEnableClustersAttr {{.*}}
5063
h.single_task<class test_kernel2>(
5164
[]() [[intel::use_stall_enable_clusters]]{});
5265

5366
// Test attribute is not propagated to the kernel.
5467
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
5568
// CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}}
5669
h.single_task<class test_kernel3>(
57-
[]() { test(); });
70+
[]() { func(); });
71+
72+
// Test attribute is applied to kernel if directly applied through functor.
73+
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel4
74+
// CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}}
75+
Functor f2;
76+
h.single_task<class test_kernel4>(f2);
5877
});
5978
return 0;
6079
}

sycl/include/CL/sycl/handler.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2261,10 +2261,10 @@ class __SYCL_EXPORT handler {
22612261
/// if either \param Dest or \param Src is nullptr. The behavior is undefined
22622262
/// if any of the pointer parameters is invalid.
22632263
///
2264-
/// \param Dest is a USM pointer to the destination memory.
22652264
/// \param Src is a USM pointer to the source memory.
2265+
/// \param Dest is a USM pointer to the destination memory.
22662266
/// \param Count is a number of elements of type T to copy.
2267-
template <typename T> void copy(T *Dest, const T *Src, size_t Count) {
2267+
template <typename T> void copy(const T *Src, T *Dest, size_t Count) {
22682268
this->memcpy(Dest, Src, Count * sizeof(T));
22692269
}
22702270

sycl/include/CL/sycl/queue.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -446,11 +446,11 @@ class __SYCL_EXPORT queue {
446446
/// if either \param Dest or \param Src is nullptr. The behavior is undefined
447447
/// if any of the pointer parameters is invalid.
448448
///
449-
/// \param Dest is a USM pointer to the destination memory.
450449
/// \param Src is a USM pointer to the source memory.
450+
/// \param Dest is a USM pointer to the destination memory.
451451
/// \param Count is a number of elements of type T to copy.
452452
/// \return an event representing copy operation.
453-
template <typename T> event copy(T *Dest, const T *Src, size_t Count) {
453+
template <typename T> event copy(const T *Src, T *Dest, size_t Count) {
454454
return this->memcpy(Dest, Src, Count * sizeof(T));
455455
}
456456

@@ -460,13 +460,13 @@ class __SYCL_EXPORT queue {
460460
/// if either \param Dest or \param Src is nullptr. The behavior is undefined
461461
/// if any of the pointer parameters is invalid.
462462
///
463-
/// \param Dest is a USM pointer to the destination memory.
464463
/// \param Src is a USM pointer to the source memory.
464+
/// \param Dest is a USM pointer to the destination memory.
465465
/// \param Count is a number of elements of type T to copy.
466466
/// \param DepEvent is an event that specifies the kernel dependencies.
467467
/// \return an event representing copy operation.
468468
template <typename T>
469-
event copy(T *Dest, const T *Src, size_t Count, event DepEvent) {
469+
event copy(const T *Src, T *Dest, size_t Count, event DepEvent) {
470470
return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent);
471471
}
472472

@@ -476,13 +476,13 @@ class __SYCL_EXPORT queue {
476476
/// if either \param Dest or \param Src is nullptr. The behavior is undefined
477477
/// if any of the pointer parameters is invalid.
478478
///
479-
/// \param Dest is a USM pointer to the destination memory.
480479
/// \param Src is a USM pointer to the source memory.
480+
/// \param Dest is a USM pointer to the destination memory.
481481
/// \param Count is a number of elements of type T to copy.
482482
/// \param DepEvents is a vector of events that specifies the kernel
483483
/// \return an event representing copy operation.
484484
template <typename T>
485-
event copy(T *Dest, const T *Src, size_t Count,
485+
event copy(const T *Src, T *Dest, size_t Count,
486486
const vector_class<event> &DepEvents) {
487487
return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents);
488488
}

sycl/source/detail/global_handler.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,9 @@ void shutdown() {
151151
}
152152

153153
#ifdef _WIN32
154-
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) {
154+
extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
155+
DWORD fdwReason,
156+
LPVOID lpReserved) {
155157
// Perform actions based on the reason for calling.
156158
switch (fdwReason) {
157159
case DLL_PROCESS_DETACH:

0 commit comments

Comments
 (0)