|
14 | 14 | #ifndef __AVX10_2SATCVTINTRIN_H
|
15 | 15 | #define __AVX10_2SATCVTINTRIN_H
|
16 | 16 |
|
17 |
| -#define _mm_ipcvtnebf16_epi8(A) \ |
18 |
| - ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A))) |
| 17 | +#define _mm_ipcvtbf16_epi8(A) \ |
| 18 | + ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A))) |
19 | 19 |
|
20 |
| -#define _mm_mask_ipcvtnebf16_epi8(W, U, A) \ |
| 20 | +#define _mm_mask_ipcvtbf16_epi8(W, U, A) \ |
21 | 21 | ((__m128i)__builtin_ia32_selectw_128( \
|
22 |
| - (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W))) |
| 22 | + (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epi8(A), (__v8hi)(__m128i)(W))) |
23 | 23 |
|
24 |
| -#define _mm_maskz_ipcvtnebf16_epi8(U, A) \ |
| 24 | +#define _mm_maskz_ipcvtbf16_epi8(U, A) \ |
25 | 25 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
26 |
| - (__v8hi)_mm_ipcvtnebf16_epi8(A), \ |
| 26 | + (__v8hi)_mm_ipcvtbf16_epi8(A), \ |
27 | 27 | (__v8hi)_mm_setzero_si128()))
|
28 | 28 |
|
29 |
| -#define _mm256_ipcvtnebf16_epi8(A) \ |
30 |
| - ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A))) |
| 29 | +#define _mm256_ipcvtbf16_epi8(A) \ |
| 30 | + ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A))) |
31 | 31 |
|
32 |
| -#define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \ |
| 32 | +#define _mm256_mask_ipcvtbf16_epi8(W, U, A) \ |
33 | 33 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
34 |
| - (__v16hi)_mm256_ipcvtnebf16_epi8(A), \ |
| 34 | + (__v16hi)_mm256_ipcvtbf16_epi8(A), \ |
35 | 35 | (__v16hi)(__m256i)(W)))
|
36 | 36 |
|
37 |
| -#define _mm256_maskz_ipcvtnebf16_epi8(U, A) \ |
| 37 | +#define _mm256_maskz_ipcvtbf16_epi8(U, A) \ |
38 | 38 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
39 |
| - (__v16hi)_mm256_ipcvtnebf16_epi8(A), \ |
| 39 | + (__v16hi)_mm256_ipcvtbf16_epi8(A), \ |
40 | 40 | (__v16hi)_mm256_setzero_si256()))
|
41 | 41 |
|
42 |
| -#define _mm_ipcvtnebf16_epu8(A) \ |
43 |
| - ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A))) |
| 42 | +#define _mm_ipcvtbf16_epu8(A) \ |
| 43 | + ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A))) |
44 | 44 |
|
45 |
| -#define _mm_mask_ipcvtnebf16_epu8(W, U, A) \ |
| 45 | +#define _mm_mask_ipcvtbf16_epu8(W, U, A) \ |
46 | 46 | ((__m128i)__builtin_ia32_selectw_128( \
|
47 |
| - (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W))) |
| 47 | + (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epu8(A), (__v8hi)(__m128i)(W))) |
48 | 48 |
|
49 |
| -#define _mm_maskz_ipcvtnebf16_epu8(U, A) \ |
| 49 | +#define _mm_maskz_ipcvtbf16_epu8(U, A) \ |
50 | 50 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
51 |
| - (__v8hi)_mm_ipcvtnebf16_epu8(A), \ |
| 51 | + (__v8hi)_mm_ipcvtbf16_epu8(A), \ |
52 | 52 | (__v8hi)_mm_setzero_si128()))
|
53 | 53 |
|
54 |
| -#define _mm256_ipcvtnebf16_epu8(A) \ |
55 |
| - ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A))) |
| 54 | +#define _mm256_ipcvtbf16_epu8(A) \ |
| 55 | + ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A))) |
56 | 56 |
|
57 |
| -#define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \ |
| 57 | +#define _mm256_mask_ipcvtbf16_epu8(W, U, A) \ |
58 | 58 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
59 |
| - (__v16hi)_mm256_ipcvtnebf16_epu8(A), \ |
| 59 | + (__v16hi)_mm256_ipcvtbf16_epu8(A), \ |
60 | 60 | (__v16hi)(__m256i)(W)))
|
61 | 61 |
|
62 |
| -#define _mm256_maskz_ipcvtnebf16_epu8(U, A) \ |
| 62 | +#define _mm256_maskz_ipcvtbf16_epu8(U, A) \ |
63 | 63 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
64 |
| - (__v16hi)_mm256_ipcvtnebf16_epu8(A), \ |
| 64 | + (__v16hi)_mm256_ipcvtbf16_epu8(A), \ |
65 | 65 | (__v16hi)_mm256_setzero_si256()))
|
66 | 66 |
|
67 | 67 | #define _mm_ipcvtph_epi8(A) \
|
|
228 | 228 | (__v8su)_mm256_setzero_si256(), \
|
229 | 229 | (__mmask8)(U), (const int)R))
|
230 | 230 |
|
231 |
| -#define _mm_ipcvttnebf16_epi8(A) \ |
232 |
| - ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A))) |
| 231 | +#define _mm_ipcvttbf16_epi8(A) \ |
| 232 | + ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A))) |
233 | 233 |
|
234 |
| -#define _mm_mask_ipcvttnebf16_epi8(W, U, A) \ |
| 234 | +#define _mm_mask_ipcvttbf16_epi8(W, U, A) \ |
235 | 235 | ((__m128i)__builtin_ia32_selectw_128( \
|
236 |
| - (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W))) |
| 236 | + (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epi8(A), (__v8hi)(__m128i)(W))) |
237 | 237 |
|
238 |
| -#define _mm_maskz_ipcvttnebf16_epi8(U, A) \ |
| 238 | +#define _mm_maskz_ipcvttbf16_epi8(U, A) \ |
239 | 239 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
240 |
| - (__v8hi)_mm_ipcvttnebf16_epi8(A), \ |
| 240 | + (__v8hi)_mm_ipcvttbf16_epi8(A), \ |
241 | 241 | (__v8hi)_mm_setzero_si128()))
|
242 | 242 |
|
243 |
| -#define _mm256_ipcvttnebf16_epi8(A) \ |
244 |
| - ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A))) |
| 243 | +#define _mm256_ipcvttbf16_epi8(A) \ |
| 244 | + ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A))) |
245 | 245 |
|
246 |
| -#define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \ |
| 246 | +#define _mm256_mask_ipcvttbf16_epi8(W, U, A) \ |
247 | 247 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
248 |
| - (__v16hi)_mm256_ipcvttnebf16_epi8(A), \ |
| 248 | + (__v16hi)_mm256_ipcvttbf16_epi8(A), \ |
249 | 249 | (__v16hi)(__m256i)(W)))
|
250 | 250 |
|
251 |
| -#define _mm256_maskz_ipcvttnebf16_epi8(U, A) \ |
| 251 | +#define _mm256_maskz_ipcvttbf16_epi8(U, A) \ |
252 | 252 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
253 |
| - (__v16hi)_mm256_ipcvttnebf16_epi8(A), \ |
| 253 | + (__v16hi)_mm256_ipcvttbf16_epi8(A), \ |
254 | 254 | (__v16hi)_mm256_setzero_si256()))
|
255 | 255 |
|
256 |
| -#define _mm_ipcvttnebf16_epu8(A) \ |
257 |
| - ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A))) |
| 256 | +#define _mm_ipcvttbf16_epu8(A) \ |
| 257 | + ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A))) |
258 | 258 |
|
259 |
| -#define _mm_mask_ipcvttnebf16_epu8(W, U, A) \ |
| 259 | +#define _mm_mask_ipcvttbf16_epu8(W, U, A) \ |
260 | 260 | ((__m128i)__builtin_ia32_selectw_128( \
|
261 |
| - (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W))) |
| 261 | + (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epu8(A), (__v8hi)(__m128i)(W))) |
262 | 262 |
|
263 |
| -#define _mm_maskz_ipcvttnebf16_epu8(U, A) \ |
| 263 | +#define _mm_maskz_ipcvttbf16_epu8(U, A) \ |
264 | 264 | ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
|
265 |
| - (__v8hi)_mm_ipcvttnebf16_epu8(A), \ |
| 265 | + (__v8hi)_mm_ipcvttbf16_epu8(A), \ |
266 | 266 | (__v8hi)_mm_setzero_si128()))
|
267 | 267 |
|
268 |
| -#define _mm256_ipcvttnebf16_epu8(A) \ |
269 |
| - ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A))) |
| 268 | +#define _mm256_ipcvttbf16_epu8(A) \ |
| 269 | + ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A))) |
270 | 270 |
|
271 |
| -#define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \ |
| 271 | +#define _mm256_mask_ipcvttbf16_epu8(W, U, A) \ |
272 | 272 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
273 |
| - (__v16hi)_mm256_ipcvttnebf16_epu8(A), \ |
| 273 | + (__v16hi)_mm256_ipcvttbf16_epu8(A), \ |
274 | 274 | (__v16hi)(__m256i)(W)))
|
275 | 275 |
|
276 |
| -#define _mm256_maskz_ipcvttnebf16_epu8(U, A) \ |
| 276 | +#define _mm256_maskz_ipcvttbf16_epu8(U, A) \ |
277 | 277 | ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
|
278 |
| - (__v16hi)_mm256_ipcvttnebf16_epu8(A), \ |
| 278 | + (__v16hi)_mm256_ipcvttbf16_epu8(A), \ |
279 | 279 | (__v16hi)_mm256_setzero_si256()))
|
280 | 280 |
|
281 | 281 | #define _mm_ipcvttph_epi8(A) \
|
|
0 commit comments