Skip to content

Commit b9a41b9

Browse files
[NFC][OpenMP] Add test checking clang offload chunking policy (#83261)
Verify how clang handles `dist_schedule(static, block_chunk)` and `schedule(static, thread_chunk)` clauses for OpenMP offload loop workshare pragmas.
1 parent c7de4a3 commit b9a41b9

File tree

1 file changed

+252
-0
lines changed

1 file changed

+252
-0
lines changed
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
// RUN: %libomptarget-compile-run-and-check-generic
2+
// RUN: %libomptarget-compileopt-run-and-check-generic
3+
4+
// UNSUPPORTED: aarch64-unknown-linux-gnu
5+
// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
6+
// UNSUPPORTED: x86_64-pc-linux-gnu
7+
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
8+
9+
// clang-format off
10+
11+
#include <omp.h>
12+
#include <stdio.h>
13+
14+
#define N 100
15+
#define BLOCK_SHIFT 8
16+
17+
void print(int *A, int size) {
18+
for (int i = 0; i < size; ++i) {
19+
printf("B%dT%d ", A[i] >> BLOCK_SHIFT, A[i] % (1 << BLOCK_SHIFT));
20+
}
21+
printf("\n");
22+
}
23+
24+
int main() {
25+
int A[N];
26+
27+
#pragma omp target parallel for map(from:A) num_threads(10) schedule(static, 2)
28+
for (int i = 0; i < N; ++i) {
29+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
30+
}
31+
printf("omp target parallel for thread chunk size %d\n", 2);
32+
print(A, N);
33+
34+
#pragma omp target teams distribute map(from:A) num_teams(10) \
35+
dist_schedule(static, 2)
36+
for (int i = 0; i < N; ++i) {
37+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
38+
}
39+
printf("omp target teams distribute block chunk size %d\n", 2);
40+
print(A, N);
41+
42+
#pragma omp target teams distribute parallel for map(from:A) \
43+
num_teams(10) dist_schedule(static, 2)
44+
for (int i = 0; i < N; ++i) {
45+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
46+
}
47+
printf("omp target teams distribute parallel for block chunk size %d ", 2);
48+
printf("thread chunk size default\n");
49+
print(A, N);
50+
51+
#pragma omp target teams distribute parallel for map(from:A) \
52+
num_teams(10) dist_schedule(static, 2) schedule(static, 3)
53+
for (int i = 0; i < N; ++i) {
54+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
55+
}
56+
printf("omp target teams distribute parallel for block chunk size %d ", 2);
57+
printf("thread chunk size %d\n", 3);
58+
print(A, N);
59+
60+
#pragma omp target teams distribute parallel for map(from:A) \
61+
num_teams(10) dist_schedule(static, 3) schedule(static, 2)
62+
for (int i = 0; i < N; ++i) {
63+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
64+
}
65+
printf("omp target teams distribute parallel for block chunk size %d ", 3);
66+
printf("thread chunk size %d\n", 2);
67+
print(A, N);
68+
69+
#pragma omp target teams distribute parallel for map(from:A) \
70+
num_teams(10) dist_schedule(static, 5) schedule(static, 2)
71+
for (int i = 0; i < N; ++i) {
72+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
73+
}
74+
printf("omp target teams distribute parallel for block chunk size %d ", 5);
75+
printf("thread chunk size %d\n", 2);
76+
print(A, N);
77+
78+
#pragma omp target teams distribute parallel for map(from:A) num_teams(10) \
79+
dist_schedule(static, 49) schedule(static, 2)
80+
for (int i = 0; i < N; ++i) {
81+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
82+
}
83+
printf("omp target teams distribute parallel for block chunk size %d ", 49);
84+
printf("thread chunk size %d\n", 2);
85+
print(A, N);
86+
87+
#pragma omp target teams distribute parallel for map(from:A) \
88+
num_teams(10) num_threads(10) dist_schedule(static, 29)
89+
for (int i = 0; i < N; ++i) {
90+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
91+
}
92+
printf("omp target teams distribute parallel for block chunk size %d ", 29);
93+
printf("thread chunk size default\n");
94+
print(A, N);
95+
96+
#pragma omp target teams distribute parallel for map(from:A) \
97+
num_teams(10) num_threads(10) dist_schedule(static, 101)
98+
for (int i = 0; i < N; ++i) {
99+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
100+
}
101+
printf("omp target teams distribute parallel for block chunk size %d ", 101);
102+
printf("thread chunk size default\n");
103+
print(A, N);
104+
105+
#pragma omp target teams distribute parallel for map(from:A) \
106+
num_teams(9) num_threads(10) schedule(static, 101)
107+
for (int i = 0; i < N; ++i) {
108+
A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num();
109+
}
110+
printf("omp target teams distribute parallel for default block chunk size ");
111+
printf("thread chunk size %d\n", 101);
112+
print(A, N);
113+
return 0;
114+
}
115+
//CHECK: omp target parallel for thread chunk size 2
116+
117+
//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
118+
//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
119+
//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
120+
//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
121+
//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
122+
//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
123+
//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
124+
//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
125+
//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4
126+
//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9
127+
128+
//CHECK: omp target teams distribute block chunk size 2
129+
130+
//CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
131+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
132+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
133+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
134+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
135+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
136+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
137+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
138+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
139+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
140+
141+
//CHECK: omp target teams distribute parallel for
142+
//CHECK-SAME: block chunk size 2 thread chunk size default
143+
144+
//CHECK-NEXT: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
145+
//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
146+
//CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
147+
//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
148+
//CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1
149+
//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1
150+
151+
//CHECK: omp target teams distribute parallel for
152+
//CHECK-SAME block chunk size 2 thread chunk size 3
153+
154+
//CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
155+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
156+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
157+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
158+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
159+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
160+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
161+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
162+
//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0
163+
//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0
164+
165+
//CHECK: omp target teams distribute parallel for
166+
//CHECK-SAME: block chunk size 3 thread chunk size 2
167+
168+
//CHECK-NEXT: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
169+
//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
170+
//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
171+
//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
172+
//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
173+
//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
174+
//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
175+
//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
176+
//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1
177+
//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1
178+
//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1
179+
//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1
180+
//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 B3T0
181+
182+
//CHECK: omp target teams distribute parallel for
183+
//CHECK-SAME: block chunk size 5 thread chunk size 2
184+
185+
//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2
186+
//CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2
187+
//CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2
188+
//CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2
189+
//CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2
190+
//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2
191+
//CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2
192+
//CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2
193+
//CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2
194+
//CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2
195+
196+
//CHECK: omp target teams distribute parallel for
197+
//CHECK-SAME: block chunk size 49 thread chunk size 2
198+
199+
//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 B0T5 B0T5
200+
//CHECK-SAME: B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 B0T10 B0T10 B0T11 B0T11
201+
//CHECK-SAME: B0T12 B0T12 B0T13 B0T13 B0T14 B0T14 B0T15 B0T15 B0T16 B0T16
202+
//CHECK-SAME: B0T17 B0T17 B0T18 B0T18 B0T19 B0T19 B0T20 B0T20 B0T21 B0T21
203+
//CHECK-SAME: B0T22 B0T22 B0T23 B0T23 B0T24
204+
//CHECK-SAME: B1T0 B1T0 B1T1 B1T1 B1T2 B1T2 B1T3 B1T3 B1T4 B1T4 B1T5 B1T5
205+
//CHECK-SAME: B1T6 B1T6 B1T7 B1T7 B1T8 B1T8 B1T9 B1T9 B1T10 B1T10 B1T11 B1T11
206+
//CHECK-SAME: B1T12 B1T12 B1T13 B1T13 B1T14 B1T14 B1T15 B1T15 B1T16 B1T16
207+
//CHECK-SAME: B1T17 B1T17 B1T18 B1T18 B1T19 B1T19 B1T20 B1T20 B1T21 B1T21
208+
//CHECK-SAME: B1T22 B1T22 B1T23 B1T23 B1T24
209+
//CHECK-SAME: B2T0 B2T0
210+
211+
//CHECK: omp target teams distribute parallel for
212+
//CHECK-SAME: block chunk size 29 thread chunk size default
213+
214+
//CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
215+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
216+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8
217+
//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9
218+
//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9
219+
//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8
220+
//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9
221+
//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9
222+
//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8
223+
//CHECK-SAME: B3T0 B3T1 B3T2 B3T3 B3T4 B3T5 B3T6 B3T7 B3T8 B3T9
224+
//CHECK-SAME: B3T0 B3T1 B3T2
225+
226+
//CHECK: omp target teams distribute parallel for
227+
//CHECK-SAME: block chunk size 101 thread chunk size default
228+
229+
//CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
230+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
231+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
232+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
233+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
234+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
235+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
236+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
237+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
238+
//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9
239+
240+
//CHECK: omp target teams distribute parallel for
241+
//CHECK-SAME: default block chunk size thread chunk size 101
242+
243+
//CHECK-NEXT: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0
244+
//CHECK-SAME: B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0
245+
//CHECK-SAME: B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0
246+
//CHECK-SAME: B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0
247+
//CHECK-SAME: B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0
248+
//CHECK-SAME: B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0
249+
//CHECK-SAME: B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0
250+
//CHECK-SAME: B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0
251+
//CHECK-SAME: B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0
252+
//CHECK-SAME: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0

0 commit comments

Comments
 (0)