7
7
*/
8
8
9
9
#include < algorithm>
10
+ #include < array>
10
11
#include < cmath>
11
- #include < iostream>
12
- #include < random>
12
+ #include < type_traits>
13
+ #include < utility>
14
+ #include < vector>
13
15
14
16
#include < gtest/gtest.h>
15
17
16
18
#include < executorch/extension/llm/custom_ops/spinquant/fast_hadamard_transform.h>
17
- #include < executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h>
18
- #include < executorch/extension/llm/custom_ops/spinquant/third-party/FFHT/dumb_fht.h>
19
+ #include < executorch/extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_test_impl.h>
19
20
20
- namespace {
21
- void reference_fht_impl (float * buf, int n) {
22
- dumb_fht (buf, std::log2<int >(n));
23
- const auto root_n = std::sqrt (n);
24
- for (int ii = 0 ; ii < n; ++ii) {
25
- buf[ii] /= root_n;
26
- }
27
- }
28
-
29
- // Alternate implementation of fast_hadamard_transform_28N to mutation
30
- // test against. Benchmarking suggests this one is slower, which is
31
- // why it's in the test and the strided implementation is in the
32
- // header.
33
- template <typename T>
34
- void fast_hadamard_transform_28N_with_transpose (T* vec, int log2_vec_size) {
35
- const int vec_size = (1 << log2_vec_size);
36
- for (int ii = 0 ; ii < 28 ; ++ii) {
37
- executorch::fast_hadamard_transform (&vec[ii * vec_size], log2_vec_size);
38
- }
39
- std::unique_ptr<T[]> transposed = std::make_unique<T[]>(28 * vec_size);
40
- for (int ii = 0 ; ii < 28 ; ++ii) {
41
- for (int jj = 0 ; jj < vec_size; ++jj) {
42
- transposed[jj * 28 + ii] = vec[ii * vec_size + jj];
43
- }
44
- }
45
- for (int ii = 0 ; ii < vec_size; ++ii) {
46
- hadamard_mult_28 (&transposed[ii * 28 ]);
47
- }
48
- for (int jj = 0 ; jj < vec_size; ++jj) {
49
- for (int ii = 0 ; ii < 28 ; ++ii) {
50
- vec[ii * vec_size + jj] = transposed[jj * 28 + ii];
51
- }
52
- }
53
- }
54
-
55
- std::vector<float > randomFloats (int howMany) {
56
- std::random_device rd;
57
- std::mt19937 gen (rd ());
58
- std::normal_distribution<float > dist;
59
- std::vector<float > data (howMany);
60
- for (int ii = 0 ; ii < data.size (); ++ii) {
61
- data[ii] = dist (gen);
62
- }
63
- return data;
64
- }
65
- } // namespace
21
+ using executorch::runtime::testing::fast_hadamard_transform_28N_with_transpose;
22
+ using executorch::runtime::testing::random_floats;
23
+ using executorch::runtime::testing::reference_fht_impl;
66
24
67
25
TEST (FastHadamardTransformTest, SingleElement) {
68
26
// FHT of a single element is a no-op.
69
- float data[ 1 ] = {42 };
70
- executorch::fast_hadamard_transform (data, 0 );
27
+ std::array< float , 1 > data = {{ 42 } };
28
+ executorch::fast_hadamard_transform (data. data () , 0 );
71
29
EXPECT_EQ (data[0 ], 42 );
72
30
}
73
31
74
32
TEST (FastHadamardTransformTest, LargerInput) {
75
- std::vector<float > data = randomFloats (4096 );
33
+ std::vector<float > data = random_floats (4096 );
76
34
77
35
auto expected = data;
78
36
reference_fht_impl (expected.data (), expected.size ());
@@ -86,7 +44,7 @@ TEST(FastHadamardTransformTest, LargerInput) {
86
44
}
87
45
88
46
TEST (FastHadamardTransform28NTest, Basic) {
89
- std::vector<float > data = randomFloats (1024 * 28 );
47
+ std::vector<float > data = random_floats (1024 * 28 );
90
48
91
49
auto expected = data;
92
50
fast_hadamard_transform_28N_with_transpose (expected.data (), 10 );
@@ -150,7 +108,7 @@ std::vector<float> dequantize(const std::vector<T>& data, float scale) {
150
108
#define EXPECT_CLOSE (a, b ) EXPECT_CLOSE_IMPL(a, b, 2e-4 , 1e-4 )
151
109
152
110
void testQuantizedFastHadamardTransform (int logN) {
153
- std::vector<float > data = randomFloats (1 << logN);
111
+ std::vector<float > data = random_floats (1 << logN);
154
112
155
113
auto [qdata, scale] = quantize<int16_t >(data);
156
114
@@ -179,7 +137,7 @@ TEST(QuantizedFastHadamardTransformTest, OddLogN) {
179
137
}
180
138
181
139
TEST (QuantizedFastHadamardTransform28NTest, Basic) {
182
- std::vector<float > data = randomFloats (1024 * 28 );
140
+ std::vector<float > data = random_floats (1024 * 28 );
183
141
184
142
auto [qdata, scale] = quantize<int16_t >(data);
185
143
@@ -192,8 +150,6 @@ TEST(QuantizedFastHadamardTransform28NTest, Basic) {
192
150
actual.data (), 10 );
193
151
194
152
for (int ii = 0 ; ii < expected.size (); ++ii) {
195
- std::cerr << " element " << ii << " : actual: " << actual[ii]
196
- << " , expected: " << expected[ii] << std::endl;
197
153
EXPECT_CLOSE (
198
154
dequantize (actual[ii], scale), dequantize (expected[ii], scale));
199
155
}
0 commit comments