|
| 1 | +// We're testing x86 target specific features |
| 2 | +//@only-target: x86_64 i686 |
| 3 | +//@compile-flags: -C target-feature=+vpclmulqdq,+avx512f |
| 4 | + |
| 5 | +// The constants in the tests below are just bit patterns. They should not |
| 6 | +// be interpreted as integers; signedness does not make sense for them, but |
| 7 | +// __mXXXi happens to be defined in terms of signed integers. |
| 8 | +#![allow(overflowing_literals)] |
| 9 | +#![feature(avx512_target_feature)] |
| 10 | +#![feature(stdarch_x86_avx512)] |
| 11 | + |
| 12 | +#[cfg(target_arch = "x86")] |
| 13 | +use std::arch::x86::*; |
| 14 | +#[cfg(target_arch = "x86_64")] |
| 15 | +use std::arch::x86_64::*; |
| 16 | +use std::mem::transmute; |
| 17 | + |
| 18 | +fn main() { |
| 19 | + // Mostly copied from library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs |
| 20 | + |
| 21 | + assert!(is_x86_feature_detected!("pclmulqdq")); |
| 22 | + assert!(is_x86_feature_detected!("vpclmulqdq")); |
| 23 | + assert!(is_x86_feature_detected!("avx512f")); |
| 24 | + |
| 25 | + unsafe { |
| 26 | + test_mm256_clmulepi64_epi128(); |
| 27 | + test_mm512_clmulepi64_epi128(); |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +macro_rules! verify_kat_pclmul { |
| 32 | + ($broadcast:ident, $clmul:ident, $assert:ident) => { |
| 33 | + // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf |
| 34 | + let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); |
| 35 | + let a = $broadcast(a); |
| 36 | + let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); |
| 37 | + let b = $broadcast(b); |
| 38 | + let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); |
| 39 | + let r00 = $broadcast(r00); |
| 40 | + let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); |
| 41 | + let r01 = $broadcast(r01); |
| 42 | + let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); |
| 43 | + let r10 = $broadcast(r10); |
| 44 | + let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); |
| 45 | + let r11 = $broadcast(r11); |
| 46 | + |
| 47 | + $assert($clmul::<0x00>(a, b), r00); |
| 48 | + $assert($clmul::<0x10>(a, b), r01); |
| 49 | + $assert($clmul::<0x01>(a, b), r10); |
| 50 | + $assert($clmul::<0x11>(a, b), r11); |
| 51 | + |
| 52 | + let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); |
| 53 | + let a0 = $broadcast(a0); |
| 54 | + let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); |
| 55 | + let r = $broadcast(r); |
| 56 | + $assert($clmul::<0x00>(a0, a0), r); |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +// this function tests one of the possible 4 instances |
| 61 | +// with different inputs across lanes for the 512-bit version |
| 62 | +#[target_feature(enable = "vpclmulqdq,avx512f")] |
| 63 | +unsafe fn verify_512_helper( |
| 64 | + linear: unsafe fn(__m128i, __m128i) -> __m128i, |
| 65 | + vectorized: unsafe fn(__m512i, __m512i) -> __m512i, |
| 66 | +) { |
| 67 | + let a = _mm512_set_epi64( |
| 68 | + 0xDCB4DB3657BF0B7D, |
| 69 | + 0x18DB0601068EDD9F, |
| 70 | + 0xB76B908233200DC5, |
| 71 | + 0xE478235FA8E22D5E, |
| 72 | + 0xAB05CFFA2621154C, |
| 73 | + 0x1171B47A186174C9, |
| 74 | + 0x8C6B6C0E7595CEC9, |
| 75 | + 0xBE3E7D4934E961BD, |
| 76 | + ); |
| 77 | + let b = _mm512_set_epi64( |
| 78 | + 0x672F6F105A94CEA7, |
| 79 | + 0x8298B8FFCA5F829C, |
| 80 | + 0xA3927047B3FB61D8, |
| 81 | + 0x978093862CDE7187, |
| 82 | + 0xB1927AB22F31D0EC, |
| 83 | + 0xA9A5DA619BE4D7AF, |
| 84 | + 0xCA2590F56884FDC6, |
| 85 | + 0x19BE9F660038BDB5, |
| 86 | + ); |
| 87 | + |
| 88 | + let a_decomp = transmute::<_, [__m128i; 4]>(a); |
| 89 | + let b_decomp = transmute::<_, [__m128i; 4]>(b); |
| 90 | + |
| 91 | + let r = vectorized(a, b); |
| 92 | + |
| 93 | + let e_decomp = [ |
| 94 | + linear(a_decomp[0], b_decomp[0]), |
| 95 | + linear(a_decomp[1], b_decomp[1]), |
| 96 | + linear(a_decomp[2], b_decomp[2]), |
| 97 | + linear(a_decomp[3], b_decomp[3]), |
| 98 | + ]; |
| 99 | + let e = transmute::<_, __m512i>(e_decomp); |
| 100 | + |
| 101 | + assert_eq_m512i(r, e) |
| 102 | +} |
| 103 | + |
| 104 | +// this function tests one of the possible 4 instances |
| 105 | +// with different inputs across lanes for the 256-bit version |
| 106 | +#[target_feature(enable = "vpclmulqdq")] |
| 107 | +unsafe fn verify_256_helper( |
| 108 | + linear: unsafe fn(__m128i, __m128i) -> __m128i, |
| 109 | + vectorized: unsafe fn(__m256i, __m256i) -> __m256i, |
| 110 | +) { |
| 111 | + let a = _mm256_set_epi64x( |
| 112 | + 0xDCB4DB3657BF0B7D, |
| 113 | + 0x18DB0601068EDD9F, |
| 114 | + 0xB76B908233200DC5, |
| 115 | + 0xE478235FA8E22D5E, |
| 116 | + ); |
| 117 | + let b = _mm256_set_epi64x( |
| 118 | + 0x672F6F105A94CEA7, |
| 119 | + 0x8298B8FFCA5F829C, |
| 120 | + 0xA3927047B3FB61D8, |
| 121 | + 0x978093862CDE7187, |
| 122 | + ); |
| 123 | + |
| 124 | + let a_decomp = transmute::<_, [__m128i; 2]>(a); |
| 125 | + let b_decomp = transmute::<_, [__m128i; 2]>(b); |
| 126 | + |
| 127 | + let r = vectorized(a, b); |
| 128 | + |
| 129 | + let e_decomp = [linear(a_decomp[0], b_decomp[0]), linear(a_decomp[1], b_decomp[1])]; |
| 130 | + let e = transmute::<_, __m256i>(e_decomp); |
| 131 | + |
| 132 | + assert_eq_m256i(r, e) |
| 133 | +} |
| 134 | + |
| 135 | +#[target_feature(enable = "vpclmulqdq,avx512f")] |
| 136 | +unsafe fn test_mm512_clmulepi64_epi128() { |
| 137 | + verify_kat_pclmul!(_mm512_broadcast_i32x4, _mm512_clmulepi64_epi128, assert_eq_m512i); |
| 138 | + |
| 139 | + verify_512_helper( |
| 140 | + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), |
| 141 | + |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b), |
| 142 | + ); |
| 143 | + verify_512_helper( |
| 144 | + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), |
| 145 | + |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b), |
| 146 | + ); |
| 147 | + verify_512_helper( |
| 148 | + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), |
| 149 | + |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b), |
| 150 | + ); |
| 151 | + verify_512_helper( |
| 152 | + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), |
| 153 | + |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b), |
| 154 | + ); |
| 155 | +} |
| 156 | + |
| 157 | +#[target_feature(enable = "vpclmulqdq")] |
| 158 | +unsafe fn test_mm256_clmulepi64_epi128() { |
| 159 | + verify_kat_pclmul!(_mm256_broadcastsi128_si256, _mm256_clmulepi64_epi128, assert_eq_m256i); |
| 160 | + |
| 161 | + verify_256_helper( |
| 162 | + |a, b| _mm_clmulepi64_si128::<0x00>(a, b), |
| 163 | + |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b), |
| 164 | + ); |
| 165 | + verify_256_helper( |
| 166 | + |a, b| _mm_clmulepi64_si128::<0x01>(a, b), |
| 167 | + |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b), |
| 168 | + ); |
| 169 | + verify_256_helper( |
| 170 | + |a, b| _mm_clmulepi64_si128::<0x10>(a, b), |
| 171 | + |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b), |
| 172 | + ); |
| 173 | + verify_256_helper( |
| 174 | + |a, b| _mm_clmulepi64_si128::<0x11>(a, b), |
| 175 | + |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b), |
| 176 | + ); |
| 177 | +} |
| 178 | + |
| 179 | +#[track_caller] |
| 180 | +#[target_feature(enable = "avx512f")] |
| 181 | +unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { |
| 182 | + assert_eq!(transmute::<_, [u64; 8]>(a), transmute::<_, [u64; 8]>(b)) |
| 183 | +} |
| 184 | + |
| 185 | +#[track_caller] |
| 186 | +#[target_feature(enable = "avx")] |
| 187 | +unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { |
| 188 | + assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b)) |
| 189 | +} |
0 commit comments