Skip to content

Commit 27d973e

Browse files
committed
ast and md tests
1 parent a97bc95 commit 27d973e

File tree

2 files changed

+280
-0
lines changed

2 files changed

+280
-0
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// REQUIRES: cuda
2+
3+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -triple nvptx-unknown-unknown -target-cpu sm_90 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
4+
5+
#include "sycl.hpp"
6+
7+
using namespace sycl;
8+
queue q;
9+
10+
class Foo {
11+
public:
12+
[[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(2),
13+
intel::max_work_groups_per_mp(4)]] void
14+
operator()() const {}
15+
};
16+
17+
template <int N> class Functor {
18+
public:
19+
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
20+
intel::max_work_groups_per_mp(N)]] void
21+
operator()() const {}
22+
};
23+
24+
template <int N>
25+
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
26+
intel::max_work_groups_per_mp(N)]] void
27+
zoo() {}
28+
29+
[[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(2),
30+
intel::max_work_groups_per_mp(4)]] void
31+
bar() {}
32+
33+
int main() {
34+
q.submit([&](handler &h) {
35+
// Test attribute argument size.
36+
Foo boo;
37+
h.single_task<class kernel_name1>(boo);
38+
39+
// Test attribute is applied on lambda.
40+
h.single_task<class kernel_name2>(
41+
[]() [[intel::max_work_group_size(8, 8, 8),
42+
intel::min_work_groups_per_cu(2),
43+
intel::max_work_groups_per_mp(4)]] {});
44+
45+
// Test class template argument.
46+
Functor<6> f;
47+
h.single_task<class kernel_name3>(f);
48+
49+
// Test attribute is propagated.
50+
h.single_task<class kernel_name4>([]() { bar(); });
51+
52+
// Test function template argument.
53+
h.single_task<class kernel_name5>([]() { zoo<16>(); });
54+
});
55+
return 0;
56+
}
57+
58+
// CHECK: define dso_local void @{{.*}}kernel_name1() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
59+
// CHECK: define dso_local void @{{.*}}kernel_name2() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
60+
// CHECK: define dso_local void @{{.*}}kernel_name3() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM]] !max_work_group_size ![[MWGS_2:[0-9]+]]
61+
// CHECK: define dso_local void @{{.*}}kernel_name4() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC:[0-9]+]] !max_work_groups_per_mp ![[MWGPM:[0-9]+]] !max_work_group_size ![[MWGS:[0-9]+]]
62+
// CHECK: define dso_local void @{{.*}}kernel_name5() #0 {{.*}} !min_work_groups_per_cu ![[MWGPC_MWGPM_2:[0-9]+]] !max_work_groups_per_mp ![[MWGPC_MWGPM_2]] !max_work_group_size ![[MWGS_3:[0-9]+]]
63+
64+
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxntidx", i32 512}
65+
// CHECK: {{.*}}@{{.*}}kernel_name1, !"minnctapersm", i32 2}
66+
// CHECK: {{.*}}@{{.*}}kernel_name1, !"maxclusterrank", i32 4}
67+
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxntidx", i32 512}
68+
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"minnctapersm", i32 2}
69+
// CHECK: {{.*}}@{{.*}}Foo{{.*}}, !"maxclusterrank", i32 4}
70+
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxntidx", i32 512}
71+
// CHECK: {{.*}}@{{.*}}kernel_name2, !"minnctapersm", i32 2}
72+
// CHECK: {{.*}}@{{.*}}kernel_name2, !"maxclusterrank", i32 4}
73+
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxntidx", i32 512}
74+
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"minnctapersm", i32 2}
75+
// CHECK: {{.*}}@{{.*}}main{{.*}}, !"maxclusterrank", i32 4}
76+
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxntidx", i32 384}
77+
// CHECK: {{.*}}@{{.*}}kernel_name3, !"minnctapersm", i32 6}
78+
// CHECK: {{.*}}@{{.*}}kernel_name3, !"maxclusterrank", i32 6}
79+
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxntidx", i32 384}
80+
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"minnctapersm", i32 6}
81+
// CHECK: {{.*}}@{{.*}}Functor{{.*}}, !"maxclusterrank", i32 6}
82+
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxntidx", i32 512}
83+
// CHECK: {{.*}}@{{.*}}kernel_name4, !"minnctapersm", i32 2}
84+
// CHECK: {{.*}}@{{.*}}kernel_name4, !"maxclusterrank", i32 4}
85+
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxntidx", i32 512}
86+
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"minnctapersm", i32 2}
87+
// CHECK: {{.*}}@{{.*}}bar{{.*}}, !"maxclusterrank", i32 4}
88+
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxntidx", i32 1024}
89+
// CHECK: {{.*}}@{{.*}}kernel_name5, !"minnctapersm", i32 16}
90+
// CHECK: {{.*}}@{{.*}}kernel_name5, !"maxclusterrank", i32 16}
91+
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxntidx", i32 1024}
92+
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"minnctapersm", i32 16}
93+
// CHECK: {{.*}}@{{.*}}zoo{{.*}}, !"maxclusterrank", i32 16}
94+
95+
// CHECK: ![[MWGPC]] = !{i32 2}
96+
// CHECK: ![[MWGPM]] = !{i32 4}
97+
// CHECK: ![[MWGS]] = !{i32 8, i32 8, i32 8}
98+
// CHECK: ![[MWGPC_MWGPM]] = !{i32 6}
99+
// CHECK: ![[MWGS_2]] = !{i32 8, i32 8, i32 6}
100+
// CHECK: ![[MWGPC_MWGPM_2]] = !{i32 16}
101+
// CHECK: ![[MWGS_3]] = !{i32 8, i32 8, i32 16}

clang/test/SemaSYCL/lb_sm_90_ast.cpp

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// REQUIERS: cuda
2+
3+
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2017 -ast-dump -triple nvptx-unknown-unknown -target-cpu sm_90 %s | FileCheck %s
4+
5+
// Tests for AST of Intel max_work_group_size, min_work_groups_per_cu and
6+
// max_work_groups_per_mp attribute.
7+
8+
#include "sycl.hpp"
9+
10+
sycl::queue deviceQueue;
11+
12+
// CHECK: FunctionDecl {{.*}} func1 'void ()'
13+
// CHECK-NEXT: CompoundStmt
14+
// CHECK-NEXT: SYCLIntelMaxWorkGroupSizeAttr {{.*}}
15+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
16+
// CHECK-NEXT: value: Int 8
17+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
18+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
19+
// CHECK-NEXT: value: Int 8
20+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
21+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
22+
// CHECK-NEXT: value: Int 8
23+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
24+
// CHECK-NEXT: SYCLIntelMinWorkGroupsPerComputeUnitAttr {{.*}}
25+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
26+
// CHECK-NEXT: value: Int 4
27+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4
28+
// CHECK-NEXT: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr {{.*}}
29+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
30+
// CHECK-NEXT: value: Int 2
31+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 2
32+
[[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(4),
33+
intel::max_work_groups_per_mp(2)]] void
34+
func1() {}
35+
36+
// Test that checks template parameter support on function.
37+
// CHECK: FunctionTemplateDecl {{.*}} func2
38+
// CHECK: FunctionDecl {{.*}} func2 'void ()'
39+
// CHECK-NEXT: CompoundStmt
40+
// CHECK-NEXT: SYCLIntelMaxWorkGroupSizeAttr {{.*}}
41+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'N' 'int'
42+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
43+
// CHECK-NEXT: value: Int 8
44+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
45+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
46+
// CHECK-NEXT: value: Int 8
47+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
48+
// CHECK-NEXT: SYCLIntelMinWorkGroupsPerComputeUnitAttr {{.*}}
49+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'N' 'int'
50+
// CHECK-NEXT: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr {{.*}}
51+
// CHECK-NEXT: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'N' 'int'
52+
53+
// CHECK: FunctionDecl {{.*}} func2 'void ()'
54+
// CHECK-NEXT: TemplateArgument integral 6
55+
// CHECK-NEXT: CompoundStmt
56+
// CHECK-NEXT: SYCLIntelMaxWorkGroupSizeAttr {{.*}}
57+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
58+
// CHECK-NEXT: value: Int 6
59+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
60+
// CHECK-NEXT: NonTypeTemplateParmDecl
61+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6
62+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
63+
// CHECK-NEXT: value: Int 8
64+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
65+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
66+
// CHECK-NEXT: value: Int 8
67+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
68+
// CHECK-NEXT: SYCLIntelMinWorkGroupsPerComputeUnitAttr {{.*}}
69+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
70+
// CHECK-NEXT: value: Int 6
71+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
72+
// CHECK-NEXT: NonTypeTemplateParmDecl
73+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6
74+
// CHECK-NEXT: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr {{.*}}
75+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
76+
// CHECK-NEXT: value: Int 6
77+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
78+
// CHECK-NEXT: NonTypeTemplateParmDecl
79+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6
80+
template <int N>
81+
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
82+
intel::max_work_groups_per_mp(N)]] void func2() {}
83+
84+
class KernelFunctor {
85+
public:
86+
void operator()() const {
87+
func1();
88+
}
89+
};
90+
91+
// Test that checks template parameter support on class member function.
92+
template <int N>
93+
class KernelFunctor2 {
94+
public:
95+
[[intel::max_work_group_size(N, 8, 8), intel::min_work_groups_per_cu(N),
96+
intel::max_work_groups_per_mp(N)]] void operator()() const {
97+
}
98+
};
99+
100+
int main() {
101+
deviceQueue.submit([&](sycl::handler &h) {
102+
// CHECK-LABEL: FunctionDecl {{.*}}kernel_name_1
103+
// CHECK: SYCLIntelMaxWorkGroupSizeAttr
104+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
105+
// CHECK-NEXT: value: Int 8
106+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
107+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
108+
// CHECK-NEXT: value: Int 8
109+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
110+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
111+
// CHECK-NEXT: value: Int 8
112+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
113+
// CHECK: SYCLIntelMinWorkGroupsPerComputeUnitAttr
114+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
115+
// CHECK-NEXT: value: Int 4
116+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4
117+
// CHECK: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr
118+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
119+
// CHECK-NEXT: value: Int 2
120+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 2
121+
KernelFunctor f1;
122+
h.single_task<class kernel_name_1>(f1);
123+
124+
// CHECK-LABEL: FunctionDecl {{.*}}kernel_name_2
125+
// CHECK: SYCLIntelMaxWorkGroupSizeAttr
126+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
127+
// CHECK-NEXT: value: Int 3
128+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
129+
// CHECK-NEXT: NonTypeTemplateParmDecl
130+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3
131+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
132+
// CHECK-NEXT: value: Int 8
133+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
134+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
135+
// CHECK-NEXT: value: Int 8
136+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
137+
// CHECK: SYCLIntelMinWorkGroupsPerComputeUnitAttr
138+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
139+
// CHECK-NEXT: value: Int 3
140+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
141+
// CHECK-NEXT: NonTypeTemplateParmDecl
142+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3
143+
// CHECK: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr
144+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
145+
// CHECK-NEXT: value: Int 3
146+
// CHECK-NEXT: SubstNonTypeTemplateParmExpr
147+
// CHECK-NEXT: NonTypeTemplateParmDecl
148+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3
149+
KernelFunctor2<3> f2;
150+
h.single_task<class kernel_name_2>(f2);
151+
152+
// CHECK-LABEL: FunctionDecl {{.*}}kernel_name_3
153+
// CHECK: SYCLIntelMaxWorkGroupSizeAttr {{.*}}
154+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
155+
// CHECK-NEXT: value: Int 8
156+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
157+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
158+
// CHECK-NEXT: value: Int 8
159+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
160+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
161+
// CHECK-NEXT: value: Int 8
162+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 8
163+
// CHECK-NEXT: SYCLIntelMinWorkGroupsPerComputeUnitAttr {{.*}}
164+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
165+
// CHECK-NEXT: value: Int 4
166+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 4
167+
// CHECK-NEXT: SYCLIntelMaxWorkGroupsPerMultiprocessorAttr {{.*}}
168+
// CHECK-NEXT: ConstantExpr{{.*}}'int'
169+
// CHECK-NEXT: value: Int 6
170+
// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 6
171+
h.single_task<class kernel_name_3>(
172+
[]() [[intel::max_work_group_size(8, 8, 8), intel::min_work_groups_per_cu(4),
173+
intel::max_work_groups_per_mp(6)]]{});
174+
});
175+
176+
func2<6>();
177+
178+
return 0;
179+
}

0 commit comments

Comments
 (0)