|
| 1 | +// RUN: %libomptarget-compile-run-and-check-generic |
| 2 | +// RUN: %libomptarget-compileopt-run-and-check-generic |
| 3 | + |
| 4 | +// UNSUPPORTED: aarch64-unknown-linux-gnu |
| 5 | +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO |
| 6 | +// UNSUPPORTED: x86_64-pc-linux-gnu |
| 7 | +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO |
| 8 | + |
| 9 | +// clang-format off |
| 10 | + |
| 11 | +#include <omp.h> |
| 12 | +#include <stdio.h> |
| 13 | + |
| 14 | +#define N 100 |
| 15 | +#define BLOCK_SHIFT 8 |
| 16 | + |
| 17 | +void print(int *A, int size) { |
| 18 | + for (int i = 0; i < size; ++i) { |
| 19 | + printf("B%dT%d ", A[i] >> BLOCK_SHIFT, A[i] % (1 << BLOCK_SHIFT)); |
| 20 | + } |
| 21 | + printf("\n"); |
| 22 | +} |
| 23 | + |
| 24 | +int main() { |
| 25 | + int A[N]; |
| 26 | + |
| 27 | +#pragma omp target parallel for map(from:A) num_threads(10) schedule(static, 2) |
| 28 | + for (int i = 0; i < N; ++i) { |
| 29 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 30 | + } |
| 31 | + printf("omp target parallel for thread chunk size %d\n", 2); |
| 32 | + print(A, N); |
| 33 | + |
| 34 | +#pragma omp target teams distribute map(from:A) num_teams(10) \ |
| 35 | + dist_schedule(static, 2) |
| 36 | + for (int i = 0; i < N; ++i) { |
| 37 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 38 | + } |
| 39 | + printf("omp target teams distribute block chunk size %d\n", 2); |
| 40 | + print(A, N); |
| 41 | + |
| 42 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 43 | + num_teams(10) dist_schedule(static, 2) |
| 44 | + for (int i = 0; i < N; ++i) { |
| 45 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 46 | + } |
| 47 | + printf("omp target teams distribute parallel for block chunk size %d ", 2); |
| 48 | + printf("thread chunk size default\n"); |
| 49 | + print(A, N); |
| 50 | + |
| 51 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 52 | + num_teams(10) dist_schedule(static, 2) schedule(static, 3) |
| 53 | + for (int i = 0; i < N; ++i) { |
| 54 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 55 | + } |
| 56 | + printf("omp target teams distribute parallel for block chunk size %d ", 2); |
| 57 | + printf("thread chunk size %d\n", 3); |
| 58 | + print(A, N); |
| 59 | + |
| 60 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 61 | + num_teams(10) dist_schedule(static, 3) schedule(static, 2) |
| 62 | + for (int i = 0; i < N; ++i) { |
| 63 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 64 | + } |
| 65 | + printf("omp target teams distribute parallel for block chunk size %d ", 3); |
| 66 | + printf("thread chunk size %d\n", 2); |
| 67 | + print(A, N); |
| 68 | + |
| 69 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 70 | + num_teams(10) dist_schedule(static, 5) schedule(static, 2) |
| 71 | + for (int i = 0; i < N; ++i) { |
| 72 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 73 | + } |
| 74 | + printf("omp target teams distribute parallel for block chunk size %d ", 5); |
| 75 | + printf("thread chunk size %d\n", 2); |
| 76 | + print(A, N); |
| 77 | + |
| 78 | +#pragma omp target teams distribute parallel for map(from:A) num_teams(10) \ |
| 79 | + dist_schedule(static, 49) schedule(static, 2) |
| 80 | + for (int i = 0; i < N; ++i) { |
| 81 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 82 | + } |
| 83 | + printf("omp target teams distribute parallel for block chunk size %d ", 49); |
| 84 | + printf("thread chunk size %d\n", 2); |
| 85 | + print(A, N); |
| 86 | + |
| 87 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 88 | + num_teams(10) num_threads(10) dist_schedule(static, 29) |
| 89 | + for (int i = 0; i < N; ++i) { |
| 90 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 91 | + } |
| 92 | + printf("omp target teams distribute parallel for block chunk size %d ", 29); |
| 93 | + printf("thread chunk size default\n"); |
| 94 | + print(A, N); |
| 95 | + |
| 96 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 97 | + num_teams(10) num_threads(10) dist_schedule(static, 101) |
| 98 | + for (int i = 0; i < N; ++i) { |
| 99 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 100 | + } |
| 101 | + printf("omp target teams distribute parallel for block chunk size %d ", 101); |
| 102 | + printf("thread chunk size default\n"); |
| 103 | + print(A, N); |
| 104 | + |
| 105 | +#pragma omp target teams distribute parallel for map(from:A) \ |
| 106 | + num_teams(9) num_threads(10) schedule(static, 101) |
| 107 | + for (int i = 0; i < N; ++i) { |
| 108 | + A[i] = (omp_get_team_num() << BLOCK_SHIFT) + omp_get_thread_num(); |
| 109 | + } |
| 110 | + printf("omp target teams distribute parallel for default block chunk size "); |
| 111 | + printf("thread chunk size %d\n", 101); |
| 112 | + print(A, N); |
| 113 | + return 0; |
| 114 | +} |
| 115 | +//CHECK: omp target parallel for thread chunk size 2 |
| 116 | + |
| 117 | +//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 |
| 118 | +//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 |
| 119 | +//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 |
| 120 | +//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 |
| 121 | +//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 |
| 122 | +//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 |
| 123 | +//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 |
| 124 | +//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 |
| 125 | +//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 |
| 126 | +//CHECK-SAME: B0T5 B0T5 B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 |
| 127 | + |
| 128 | +//CHECK: omp target teams distribute block chunk size 2 |
| 129 | + |
| 130 | +//CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 131 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 132 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 133 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 134 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 135 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 136 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 137 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 138 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 139 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 140 | + |
| 141 | +//CHECK: omp target teams distribute parallel for |
| 142 | +//CHECK-SAME: block chunk size 2 thread chunk size default |
| 143 | + |
| 144 | +//CHECK-NEXT: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1 |
| 145 | +//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1 |
| 146 | +//CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1 |
| 147 | +//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1 |
| 148 | +//CHECK-SAME: B0T0 B0T1 B1T0 B1T1 B2T0 B2T1 B3T0 B3T1 B4T0 B4T1 |
| 149 | +//CHECK-SAME: B5T0 B5T1 B6T0 B6T1 B7T0 B7T1 B8T0 B8T1 B9T0 B9T1 |
| 150 | + |
| 151 | +//CHECK: omp target teams distribute parallel for |
| 152 | +//CHECK-SAME block chunk size 2 thread chunk size 3 |
| 153 | + |
| 154 | +//CHECK-NEXT: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 155 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 156 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 157 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 158 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 159 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 160 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 161 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 162 | +//CHECK-SAME: B0T0 B0T0 B1T0 B1T0 B2T0 B2T0 B3T0 B3T0 B4T0 B4T0 |
| 163 | +//CHECK-SAME: B5T0 B5T0 B6T0 B6T0 B7T0 B7T0 B8T0 B8T0 B9T0 B9T0 |
| 164 | + |
| 165 | +//CHECK: omp target teams distribute parallel for |
| 166 | +//CHECK-SAME: block chunk size 3 thread chunk size 2 |
| 167 | + |
| 168 | +//CHECK-NEXT: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 |
| 169 | +//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1 |
| 170 | +//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1 |
| 171 | +//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1 |
| 172 | +//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 |
| 173 | +//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1 |
| 174 | +//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1 |
| 175 | +//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1 |
| 176 | +//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 |
| 177 | +//CHECK-SAME: B3T0 B3T0 B3T1 B4T0 B4T0 B4T1 |
| 178 | +//CHECK-SAME: B5T0 B5T0 B5T1 B6T0 B6T0 B6T1 B7T0 B7T0 B7T1 |
| 179 | +//CHECK-SAME: B8T0 B8T0 B8T1 B9T0 B9T0 B9T1 |
| 180 | +//CHECK-SAME: B0T0 B0T0 B0T1 B1T0 B1T0 B1T1 B2T0 B2T0 B2T1 B3T0 |
| 181 | + |
| 182 | +//CHECK: omp target teams distribute parallel for |
| 183 | +//CHECK-SAME: block chunk size 5 thread chunk size 2 |
| 184 | + |
| 185 | +//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2 |
| 186 | +//CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2 |
| 187 | +//CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2 |
| 188 | +//CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2 |
| 189 | +//CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2 |
| 190 | +//CHECK-SAME: B0T0 B0T0 B0T1 B0T1 B0T2 B1T0 B1T0 B1T1 B1T1 B1T2 |
| 191 | +//CHECK-SAME: B2T0 B2T0 B2T1 B2T1 B2T2 B3T0 B3T0 B3T1 B3T1 B3T2 |
| 192 | +//CHECK-SAME: B4T0 B4T0 B4T1 B4T1 B4T2 B5T0 B5T0 B5T1 B5T1 B5T2 |
| 193 | +//CHECK-SAME: B6T0 B6T0 B6T1 B6T1 B6T2 B7T0 B7T0 B7T1 B7T1 B7T2 |
| 194 | +//CHECK-SAME: B8T0 B8T0 B8T1 B8T1 B8T2 B9T0 B9T0 B9T1 B9T1 B9T2 |
| 195 | + |
| 196 | +//CHECK: omp target teams distribute parallel for |
| 197 | +//CHECK-SAME: block chunk size 49 thread chunk size 2 |
| 198 | + |
| 199 | +//CHECK-NEXT: B0T0 B0T0 B0T1 B0T1 B0T2 B0T2 B0T3 B0T3 B0T4 B0T4 B0T5 B0T5 |
| 200 | +//CHECK-SAME: B0T6 B0T6 B0T7 B0T7 B0T8 B0T8 B0T9 B0T9 B0T10 B0T10 B0T11 B0T11 |
| 201 | +//CHECK-SAME: B0T12 B0T12 B0T13 B0T13 B0T14 B0T14 B0T15 B0T15 B0T16 B0T16 |
| 202 | +//CHECK-SAME: B0T17 B0T17 B0T18 B0T18 B0T19 B0T19 B0T20 B0T20 B0T21 B0T21 |
| 203 | +//CHECK-SAME: B0T22 B0T22 B0T23 B0T23 B0T24 |
| 204 | +//CHECK-SAME: B1T0 B1T0 B1T1 B1T1 B1T2 B1T2 B1T3 B1T3 B1T4 B1T4 B1T5 B1T5 |
| 205 | +//CHECK-SAME: B1T6 B1T6 B1T7 B1T7 B1T8 B1T8 B1T9 B1T9 B1T10 B1T10 B1T11 B1T11 |
| 206 | +//CHECK-SAME: B1T12 B1T12 B1T13 B1T13 B1T14 B1T14 B1T15 B1T15 B1T16 B1T16 |
| 207 | +//CHECK-SAME: B1T17 B1T17 B1T18 B1T18 B1T19 B1T19 B1T20 B1T20 B1T21 B1T21 |
| 208 | +//CHECK-SAME: B1T22 B1T22 B1T23 B1T23 B1T24 |
| 209 | +//CHECK-SAME: B2T0 B2T0 |
| 210 | + |
| 211 | +//CHECK: omp target teams distribute parallel for |
| 212 | +//CHECK-SAME: block chunk size 29 thread chunk size default |
| 213 | + |
| 214 | +//CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 215 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 216 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 |
| 217 | +//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9 |
| 218 | +//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 B1T9 |
| 219 | +//CHECK-SAME: B1T0 B1T1 B1T2 B1T3 B1T4 B1T5 B1T6 B1T7 B1T8 |
| 220 | +//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9 |
| 221 | +//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 B2T9 |
| 222 | +//CHECK-SAME: B2T0 B2T1 B2T2 B2T3 B2T4 B2T5 B2T6 B2T7 B2T8 |
| 223 | +//CHECK-SAME: B3T0 B3T1 B3T2 B3T3 B3T4 B3T5 B3T6 B3T7 B3T8 B3T9 |
| 224 | +//CHECK-SAME: B3T0 B3T1 B3T2 |
| 225 | + |
| 226 | +//CHECK: omp target teams distribute parallel for |
| 227 | +//CHECK-SAME: block chunk size 101 thread chunk size default |
| 228 | + |
| 229 | +//CHECK-NEXT: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 230 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 231 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 232 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 233 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 234 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 235 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 236 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 237 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 238 | +//CHECK-SAME: B0T0 B0T1 B0T2 B0T3 B0T4 B0T5 B0T6 B0T7 B0T8 B0T9 |
| 239 | + |
| 240 | +//CHECK: omp target teams distribute parallel for |
| 241 | +//CHECK-SAME: default block chunk size thread chunk size 101 |
| 242 | + |
| 243 | +//CHECK-NEXT: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 |
| 244 | +//CHECK-SAME: B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 B1T0 |
| 245 | +//CHECK-SAME: B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 B2T0 |
| 246 | +//CHECK-SAME: B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 B3T0 |
| 247 | +//CHECK-SAME: B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 B4T0 |
| 248 | +//CHECK-SAME: B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 B5T0 |
| 249 | +//CHECK-SAME: B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 B6T0 |
| 250 | +//CHECK-SAME: B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 B7T0 |
| 251 | +//CHECK-SAME: B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 B8T0 |
| 252 | +//CHECK-SAME: B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 B0T0 |
0 commit comments