83
83
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i16" Export
84
84
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i32" Export
85
85
86
+ #include " common.hpp"
87
+ #include < iostream>
86
88
#include < string.h>
87
89
#include < sycl/sycl.hpp>
88
- #include < iostream>
89
- #include " common.hpp"
90
90
91
91
using namespace sycl ;
92
92
93
93
template <typename TYPE>
94
- __attribute__ ((optnone, noinline)) TYPE reference_reverse(TYPE a, const int bitlength) {
94
+ __attribute__ ((optnone, noinline)) TYPE reference_reverse(TYPE a,
95
+ const int bitlength) {
95
96
TYPE ret = 0 ;
96
- for (auto i = 0 ; i< bitlength; i++) {
97
- ret<<=1 ;
97
+ for (auto i = 0 ; i < bitlength; i++) {
98
+ ret <<= 1 ;
98
99
ret |= a & 0x1 ;
99
- a>>=1 ;
100
+ a >>= 1 ;
100
101
}
101
102
return ret;
102
103
}
103
104
104
105
template <typename TYPE>
105
106
__attribute__ ((noinline)) TYPE reverse(TYPE a, int bitlength) {
106
- if (bitlength== 8 ) {
107
+ if (bitlength == 8 ) {
107
108
// Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 8-bit types.
108
109
a = ((0x55 & a) << 1 ) | (0x55 & (a >> 1 ));
109
110
a = ((0x33 & a) << 2 ) | (0x33 & (a >> 2 ));
110
111
return (a << 4 ) | (a >> 4 );
111
- } else if (bitlength==16 ) {
112
- // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 16-bit types.
112
+ } else if (bitlength == 16 ) {
113
+ // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 16-bit
114
+ // types.
113
115
a = ((0x5555 & a) << 1 ) | (0x5555 & (a >> 1 ));
114
116
a = ((0x3333 & a) << 2 ) | (0x3333 & (a >> 2 ));
115
117
a = ((0x0F0F & a) << 4 ) | (0x0F0F & (a >> 4 ));
@@ -122,83 +124,89 @@ template <class T> class BitreverseTest;
122
124
123
125
#define NUM_TESTS 1024
124
126
125
- template <typename TYPE>
126
- void do_scalar_bitreverse_test () {
127
+ template <typename TYPE> void do_scalar_bitreverse_test () {
127
128
queue q;
128
129
129
- TYPE *Input = (TYPE *) malloc_shared (sizeof (TYPE) * NUM_TESTS, q.get_device (), q.get_context ());
130
- TYPE *Output = (TYPE *) malloc_shared (sizeof (TYPE) * NUM_TESTS, q.get_device (), q.get_context ());
130
+ TYPE *Input = (TYPE *)malloc_shared (sizeof (TYPE) * NUM_TESTS, q.get_device (),
131
+ q.get_context ());
132
+ TYPE *Output = (TYPE *)malloc_shared (sizeof (TYPE) * NUM_TESTS, q.get_device (),
133
+ q.get_context ());
131
134
132
- for (unsigned i= 0 ; i< NUM_TESTS; i++)
135
+ for (unsigned i = 0 ; i < NUM_TESTS; i++)
133
136
Input[i] = get_rand<TYPE>();
134
137
q.submit ([=](handler &cgh) {
135
- cgh.single_task <BitreverseTest<TYPE>> ([=]() {
136
- for (unsigned i= 0 ; i< NUM_TESTS; i++)
137
- Output[i] = reverse (Input[i],sizeof (TYPE)* 8 );
138
+ cgh.single_task <BitreverseTest<TYPE>>([=]() {
139
+ for (unsigned i = 0 ; i < NUM_TESTS; i++)
140
+ Output[i] = reverse (Input[i], sizeof (TYPE) * 8 );
138
141
});
139
142
});
140
143
q.wait ();
141
- for (unsigned i=0 ; i<NUM_TESTS; i++)
142
- if (Output[i]!=reference_reverse (Input[i],sizeof (TYPE)*8 )) {
143
- std::cerr << " Failed for scalar " << std::hex << Input[i] << " sizeof=" << sizeof (TYPE) << " \n " ;
144
+ for (unsigned i = 0 ; i < NUM_TESTS; i++)
145
+ if (Output[i] != reference_reverse (Input[i], sizeof (TYPE) * 8 )) {
146
+ std::cerr << " Failed for scalar " << std::hex << Input[i]
147
+ << " sizeof=" << sizeof (TYPE) << " \n " ;
144
148
exit (-1 );
145
149
}
146
150
147
151
free (Input, q.get_context ());
148
152
free (Output, q.get_context ());
149
153
}
150
154
151
- template <typename VTYPE>
152
- void do_vector_bitreverse_test () {
155
+ template <typename VTYPE> void do_vector_bitreverse_test () {
153
156
queue q;
154
157
155
- VTYPE *Input = (VTYPE *) malloc_shared (sizeof (VTYPE) * NUM_TESTS, q.get_device (), q.get_context ());
156
- VTYPE *Output = (VTYPE *) malloc_shared (sizeof (VTYPE) * NUM_TESTS, q.get_device (), q.get_context ());
158
+ VTYPE *Input = (VTYPE *)malloc_shared (sizeof (VTYPE) * NUM_TESTS,
159
+ q.get_device (), q.get_context ());
160
+ VTYPE *Output = (VTYPE *)malloc_shared (sizeof (VTYPE) * NUM_TESTS,
161
+ q.get_device (), q.get_context ());
157
162
158
- for (unsigned i=0 ; i<NUM_TESTS; i++)
159
- for (unsigned j=0 ; j<__builtin_vectorelements (VTYPE); j++)
160
- Input[i][j] = get_rand<typename std::decay<decltype (Input[0 ][0 ])>::type>();
163
+ for (unsigned i = 0 ; i < NUM_TESTS; i++)
164
+ for (unsigned j = 0 ; j < __builtin_vectorelements (VTYPE); j++)
165
+ Input[i][j] =
166
+ get_rand<typename std::decay<decltype (Input[0 ][0 ])>::type>();
161
167
162
168
q.submit ([=](handler &cgh) {
163
- cgh.single_task <BitreverseTest<VTYPE>> ([=]() {
164
- for (unsigned i= 0 ; i< NUM_TESTS; i++)
165
- Output[i] = reverse (Input[i],sizeof (Input[0 ][0 ])* 8 );
169
+ cgh.single_task <BitreverseTest<VTYPE>>([=]() {
170
+ for (unsigned i = 0 ; i < NUM_TESTS; i++)
171
+ Output[i] = reverse (Input[i], sizeof (Input[0 ][0 ]) * 8 );
166
172
});
167
173
});
168
174
q.wait ();
169
- for (unsigned i=0 ; i<NUM_TESTS; i++) {
170
- auto Reference=reference_reverse (Input[i],sizeof (Input[0 ][0 ])*8 );
171
- for (unsigned j=0 ; j<__builtin_vectorelements (VTYPE); j++)
172
- if (Output[i][j]!=Reference[j]) {
173
- std::cerr << " Failed for vector " << std::hex << Input[i][j] << " sizeof=" << sizeof (Input[0 ][0 ]) << " elements=" << __builtin_vectorelements (VTYPE) << " \n " ;
175
+ for (unsigned i = 0 ; i < NUM_TESTS; i++) {
176
+ auto Reference = reference_reverse (Input[i], sizeof (Input[0 ][0 ]) * 8 );
177
+ for (unsigned j = 0 ; j < __builtin_vectorelements (VTYPE); j++)
178
+ if (Output[i][j] != Reference[j]) {
179
+ std::cerr << " Failed for vector " << std::hex << Input[i][j]
180
+ << " sizeof=" << sizeof (Input[0 ][0 ])
181
+ << " elements=" << __builtin_vectorelements (VTYPE) << " \n " ;
174
182
exit (-1 );
175
183
}
176
184
}
177
185
free (Input, q.get_context ());
178
186
free (Output, q.get_context ());
179
187
}
180
188
181
- using uint8_t2 = uint8_t __attribute__ ((ext_vector_type(2 )));
189
+ using uint8_t2 = uint8_t __attribute__ ((ext_vector_type(2 )));
182
190
using uint16_t2 = uint16_t __attribute__ ((ext_vector_type(2 )));
183
191
using uint32_t2 = uint32_t __attribute__ ((ext_vector_type(2 )));
184
192
using uint64_t2 = uint64_t __attribute__ ((ext_vector_type(2 )));
185
193
186
- using uint8_t3 = uint8_t __attribute__ ((ext_vector_type(3 )));
194
+ using uint8_t3 = uint8_t __attribute__ ((ext_vector_type(3 )));
187
195
using uint16_t3 = uint16_t __attribute__ ((ext_vector_type(3 )));
188
196
using uint32_t3 = uint32_t __attribute__ ((ext_vector_type(3 )));
189
197
using uint64_t3 = uint64_t __attribute__ ((ext_vector_type(3 )));
190
198
191
- using uint8_t4 = uint8_t __attribute__ ((ext_vector_type(4 )));
199
+ using uint8_t4 = uint8_t __attribute__ ((ext_vector_type(4 )));
192
200
using uint16_t4 = uint16_t __attribute__ ((ext_vector_type(4 )));
193
201
using uint32_t4 = uint32_t __attribute__ ((ext_vector_type(4 )));
194
202
using uint64_t4 = uint64_t __attribute__ ((ext_vector_type(4 )));
195
203
196
- using uint8_t8 = uint8_t __attribute__ ((ext_vector_type(8 )));
204
+ using uint8_t8 = uint8_t __attribute__ ((ext_vector_type(8 )));
197
205
using uint16_t8 = uint16_t __attribute__ ((ext_vector_type(8 )));
198
206
using uint32_t8 = uint32_t __attribute__ ((ext_vector_type(8 )));
199
207
using uint64_t8 = uint64_t __attribute__ ((ext_vector_type(8 )));
200
208
201
- using uint8_t16 = uint8_t __attribute__ ((ext_vector_type(16 )));
209
+ using uint8_t16 = uint8_t __attribute__ ((ext_vector_type(16 )));
202
210
using uint16_t16 = uint16_t __attribute__ ((ext_vector_type(16 )));
203
211
using uint32_t16 = uint32_t __attribute__ ((ext_vector_type(16 )));
204
212
using uint64_t16 = uint64_t __attribute__ ((ext_vector_type(16 )));
@@ -218,7 +226,7 @@ int main() {
218
226
do_vector_bitreverse_test<uint8_t3>();
219
227
do_vector_bitreverse_test<uint16_t3>();
220
228
do_vector_bitreverse_test<uint32_t3>();
221
-
229
+
222
230
do_vector_bitreverse_test<uint8_t4>();
223
231
do_vector_bitreverse_test<uint16_t4>();
224
232
do_vector_bitreverse_test<uint32_t4>();
@@ -233,4 +241,3 @@ int main() {
233
241
234
242
return 0 ;
235
243
}
236
-
0 commit comments