@@ -17,23 +17,15 @@ using namespace cl::sycl;
17
17
struct wa_half ;
18
18
19
19
template <typename T, int N>
20
- void check (queue &Queue, size_t G = 240 , size_t L = 60 ) {
20
+ void check (queue &Queue, size_t G = 256 , size_t L = 64 ) {
21
21
try {
22
22
nd_range<1 > NdRange (G, L);
23
- buffer<vec<T, N>> buf2 (G);
24
- buffer<vec<T, N>> buf2_up (G);
25
- buffer<vec<T, N>> buf2_down (G);
26
23
buffer<vec<T, N>> buf (G);
27
24
buffer<vec<T, N>> buf_up (G);
28
25
buffer<vec<T, N>> buf_down (G);
29
26
buffer<vec<T, N>> buf_xor (G);
30
27
buffer<size_t > sgsizebuf (1 );
31
28
Queue.submit ([&](handler &cgh) {
32
- auto acc2 = buf2.template get_access <access::mode::read_write>(cgh);
33
- auto acc2_up = buf2_up.template get_access <access::mode::read_write>(cgh);
34
- auto acc2_down =
35
- buf2_down.template get_access <access::mode::read_write>(cgh);
36
-
37
29
auto acc = buf.template get_access <access::mode::read_write>(cgh);
38
30
auto acc_up = buf_up.template get_access <access::mode::read_write>(cgh);
39
31
auto acc_down =
@@ -48,15 +40,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
48
40
vec<T, N> vwggid (wggid), vsgid (sgid);
49
41
if (wggid == 0 )
50
42
sgsizeacc[0 ] = SG.get_max_local_range ()[0 ];
51
- /* 1 for odd subgroups and 2 for even*/
52
- acc2[NdItem.get_global_id ()] =
53
- SG.shuffle (vec<T, N>(1 ), vec<T, N>(2 ),
54
- (sgid % 2 ) ? 1 : SG.get_max_local_range ()[0 ]);
55
- /* GID-SGID */
56
- acc2_up[NdItem.get_global_id ()] = SG.shuffle_up (vwggid, vwggid, sgid);
57
- /* GID-SGID or SGLID if GID+SGID > SGsize*/
58
- acc2_down[NdItem.get_global_id ()] =
59
- SG.shuffle_down (vwggid, vec<T, N>(SG.get_local_id ().get (0 )), sgid);
60
43
61
44
/* GID of middle element in every subgroup*/
62
45
acc[NdItem.get_global_id ()] =
@@ -73,9 +56,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
73
56
auto acc = buf.template get_access <access::mode::read_write>();
74
57
auto acc_up = buf_up.template get_access <access::mode::read_write>();
75
58
auto acc_down = buf_down.template get_access <access::mode::read_write>();
76
- auto acc2 = buf2.template get_access <access::mode::read_write>();
77
- auto acc2_up = buf2_up.template get_access <access::mode::read_write>();
78
- auto acc2_down = buf2_down.template get_access <access::mode::read_write>();
79
59
auto acc_xor = buf_xor.template get_access <access::mode::read_write>();
80
60
auto sgsizeacc = sgsizebuf.get_access <access::mode::read_write>();
81
61
@@ -98,28 +78,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
98
78
exit_if_not_equal_vec<T, N>(
99
79
acc[j], vec<T, N>(j / L * L + SGid * sg_size + sg_size / 2 ),
100
80
" shuffle" );
101
- /* 1 for odd subgroups and 2 for even*/
102
- exit_if_not_equal_vec<T, N>(acc2[j], vec<T, N>((SGid % 2 ) ? 1 : 2 ),
103
- " shuffle2" );
104
81
/* Value GID+SGID for all element except last SGID in SG*/
105
82
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
106
- exit_if_not_equal_vec (acc_down[j], vec<T, N>(j + SGid), " shuffle_down" );
107
- exit_if_not_equal_vec (acc2_down[j], vec<T, N>(j + SGid),
108
- " shuffle2_down" );
109
- } else { /* SGLID for GID+SGid */
110
- if (j % L + SGid < L) /* Do not go out LG*/
111
- exit_if_not_equal_vec<T, N>(acc2_down[j],
112
- vec<T, N>((j + SGid) % L % sg_size),
113
- " shuffle2_down" );
83
+ exit_if_not_equal_vec (acc_down[j], vec<T, N>(j + SGid % sg_size),
84
+ " shuffle_down" );
114
85
}
115
86
/* Value GID-SGID for all element except first SGID in SG*/
116
87
if (j % L % sg_size >= SGid) {
117
- exit_if_not_equal_vec (acc_up[j], vec<T, N>(j - SGid), " shuffle_up" );
118
- exit_if_not_equal_vec (acc2_up[j], vec<T, N>(j - SGid), " shuffle2_up" );
119
- } else { /* SGLID for GID-SGid */
120
- if (j % L - SGid + sg_size < L) /* Do not go out LG*/
121
- exit_if_not_equal_vec (acc2_up[j], vec<T, N>(j - SGid + sg_size),
122
- " shuffle2_up" );
88
+ exit_if_not_equal_vec (acc_up[j], vec<T, N>(j - SGid % sg_size),
89
+ " shuffle_up" );
123
90
}
124
91
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
125
92
exit_if_not_equal_vec (acc_xor[j],
@@ -133,23 +100,15 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
133
100
}
134
101
}
135
102
136
- template <typename T> void check (queue &Queue, size_t G = 240 , size_t L = 60 ) {
103
+ template <typename T> void check (queue &Queue, size_t G = 256 , size_t L = 64 ) {
137
104
try {
138
105
nd_range<1 > NdRange (G, L);
139
- buffer<T> buf2 (G);
140
- buffer<T> buf2_up (G);
141
- buffer<T> buf2_down (G);
142
106
buffer<T> buf (G);
143
107
buffer<T> buf_up (G);
144
108
buffer<T> buf_down (G);
145
109
buffer<T> buf_xor (G);
146
110
buffer<size_t > sgsizebuf (1 );
147
111
Queue.submit ([&](handler &cgh) {
148
- auto acc2 = buf2.template get_access <access::mode::read_write>(cgh);
149
- auto acc2_up = buf2_up.template get_access <access::mode::read_write>(cgh);
150
- auto acc2_down =
151
- buf2_down.template get_access <access::mode::read_write>(cgh);
152
-
153
112
auto acc = buf.template get_access <access::mode::read_write>(cgh);
154
113
auto acc_up = buf_up.template get_access <access::mode::read_write>(cgh);
155
114
auto acc_down =
@@ -163,14 +122,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
163
122
uint32_t sgid = SG.get_group_id ().get (0 );
164
123
if (wggid == 0 )
165
124
sgsizeacc[0 ] = SG.get_max_local_range ()[0 ];
166
- /* 1 for odd subgroups and 2 for even*/
167
- acc2[NdItem.get_global_id ()] =
168
- SG.shuffle <T>(1 , 2 , (sgid % 2 ) ? 1 : SG.get_max_local_range ()[0 ]);
169
- /* GID-SGID */
170
- acc2_up[NdItem.get_global_id ()] = SG.shuffle_up <T>(wggid, wggid, sgid);
171
- /* GID-SGID or SGLID if GID+SGID > SGsize*/
172
- acc2_down[NdItem.get_global_id ()] =
173
- SG.shuffle_down <T>(wggid, SG.get_local_id ().get (0 ), sgid);
174
125
175
126
/* GID of middle element in every subgroup*/
176
127
acc[NdItem.get_global_id ()] =
@@ -187,9 +138,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
187
138
auto acc = buf.template get_access <access::mode::read_write>();
188
139
auto acc_up = buf_up.template get_access <access::mode::read_write>();
189
140
auto acc_down = buf_down.template get_access <access::mode::read_write>();
190
- auto acc2 = buf2.template get_access <access::mode::read_write>();
191
- auto acc2_up = buf2_up.template get_access <access::mode::read_write>();
192
- auto acc2_down = buf2_down.template get_access <access::mode::read_write>();
193
141
auto acc_xor = buf_xor.template get_access <access::mode::read_write>();
194
142
auto sgsizeacc = sgsizebuf.get_access <access::mode::read_write>();
195
143
@@ -212,24 +160,13 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
212
160
/* GID of middle element in every subgroup*/
213
161
exit_if_not_equal<T>(acc[j], j / L * L + SGid * sg_size + sg_size / 2 ,
214
162
" shuffle" );
215
- /* 1 for odd subgroups and 2 for even*/
216
- exit_if_not_equal<T>(acc2[j], (SGid % 2 ) ? 1 : 2 , " shuffle2" );
217
163
/* Value GID+SGID for all element except last SGID in SG*/
218
164
if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) {
219
165
exit_if_not_equal<T>(acc_down[j], j + SGid, " shuffle_down" );
220
- exit_if_not_equal<T>(acc2_down[j], j + SGid, " shuffle2_down" );
221
- } else { /* SGLID for GID+SGid */
222
- if (j % L + SGid < L) /* Do not go out LG*/
223
- exit_if_not_equal<T>(acc2_down[j], (j + SGid) % L % sg_size,
224
- " shuffle2_down" );
225
166
}
226
167
/* Value GID-SGID for all element except first SGID in SG*/
227
168
if (j % L % sg_size >= SGid) {
228
169
exit_if_not_equal<T>(acc_up[j], j - SGid, " shuffle_up" );
229
- exit_if_not_equal<T>(acc2_up[j], j - SGid, " shuffle2_up" );
230
- } else { /* SGLID for GID-SGid */
231
- if (j % L - SGid + sg_size < L) /* Do not go out LG*/
232
- exit_if_not_equal<T>(acc2_up[j], j - SGid + sg_size, " shuffle2_up" );
233
170
}
234
171
/* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */
235
172
exit_if_not_equal<T>(acc_xor[j], SGBeginGid + (SGLid ^ (SGid % sg_size)),
0 commit comments