Skip to content

Commit a74886b

Browse files
author
Daniel Smith
committed
Add cmplt_ep(i|u)32
1 parent 33a4dd5 commit a74886b

File tree

5 files changed

+163
-3
lines changed

5 files changed

+163
-3
lines changed

crates/core_arch/src/simd.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,12 @@ simd_ty!(i32x16[i32]:
198198
| x0, x1, x2, x3, x4, x5, x6, x7,
199199
x8, x9, x10, x11, x12, x13, x14, x15);
200200

201+
simd_ty!(u32x16[u32]:
202+
u32, u32, u32, u32, u32, u32, u32, u32,
203+
u32, u32, u32, u32, u32, u32, u32, u32
204+
| x0, x1, x2, x3, x4, x5, x6, x7,
205+
x8, x9, x10, x11, x12, x13, x14, x15);
206+
201207
simd_ty!(i64x8[i64]:
202208
i64, i64, i64, i64, i64, i64, i64, i64
203209
| x0, x1, x2, x3, x4, x5, x6, x7);

crates/core_arch/src/x86/avx512f.rs

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,67 @@ pub unsafe fn _mm512_setr_epi32(
8787
transmute(r)
8888
}
8989

90+
/// Sets packed 32-bit integers in `dst` with the supplied values.
91+
#[inline]
92+
#[target_feature(enable = "avx512f")]
93+
pub unsafe fn _mm512_set_epi32(
94+
e15: i32,
95+
e14: i32,
96+
e13: i32,
97+
e12: i32,
98+
e11: i32,
99+
e10: i32,
100+
e9: i32,
101+
e8: i32,
102+
e7: i32,
103+
e6: i32,
104+
e5: i32,
105+
e4: i32,
106+
e3: i32,
107+
e2: i32,
108+
e1: i32,
109+
e0: i32,
110+
) -> __m512i {
111+
_mm512_setr_epi32(
112+
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
113+
)
114+
}
115+
116+
/// Broadcast 32-bit integer `a` to all elements of `dst`.
117+
#[inline]
118+
#[target_feature(enable = "avx512f")]
119+
pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
120+
transmute(i32x16::splat(a))
121+
}
122+
90123
/// Broadcast 64-bit integer `a` to all elements of `dst`.
91124
#[inline]
92125
#[target_feature(enable = "avx512f")]
93126
pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
94127
transmute(i64x8::splat(a))
95128
}
96129

130+
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
131+
///
132+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
133+
#[inline]
134+
#[target_feature(enable = "avx512f")]
135+
#[cfg_attr(test, assert_instr(vpcmp))]
136+
pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
137+
simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
138+
}
139+
140+
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
141+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
142+
///
143+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
144+
#[inline]
145+
#[target_feature(enable = "avx512f")]
146+
#[cfg_attr(test, assert_instr(vpcmp))]
147+
pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
148+
_mm512_cmplt_epu32_mask(a, b) & m
149+
}
150+
97151
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
98152
///
99153
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
@@ -261,6 +315,27 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask(
261315
transmute(r)
262316
}
263317

318+
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
319+
///
320+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
321+
#[inline]
322+
#[target_feature(enable = "avx512f")]
323+
#[cfg_attr(test, assert_instr(vpcmp))]
324+
pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
325+
simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
326+
}
327+
328+
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
329+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
330+
///
331+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
332+
#[inline]
333+
#[target_feature(enable = "avx512f")]
334+
#[cfg_attr(test, assert_instr(vpcmp))]
335+
pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
336+
_mm512_cmplt_epi32_mask(a, b) & m
337+
}
338+
264339
/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
265340
///
266341
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
@@ -556,4 +631,75 @@ mod tests {
556631
);
557632
assert_eq_m512i(r, e);
558633
}
634+
635+
#[simd_test(enable = "avx512f")]
636+
unsafe fn test_mm512_cmplt_epu32_mask() {
637+
#[rustfmt::skip]
638+
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
639+
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
640+
let b = _mm512_set1_epi32(-1);
641+
let m = _mm512_cmplt_epu32_mask(a, b);
642+
assert_eq!(m, 0b11001111_11001111);
643+
}
644+
645+
#[simd_test(enable = "avx512f")]
646+
unsafe fn test_mm512_mask_cmplt_epu32_mask() {
647+
#[rustfmt::skip]
648+
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
649+
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
650+
let b = _mm512_set1_epi32(-1);
651+
let mask = 0b01111010_01111010;
652+
let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
653+
assert_eq!(r, 0b01001010_01001010);
654+
}
655+
656+
#[simd_test(enable = "avx512f")]
657+
unsafe fn test_mm512_cmplt_epi32_mask() {
658+
#[rustfmt::skip]
659+
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
660+
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
661+
let b = _mm512_set1_epi32(-1);
662+
let m = _mm512_cmplt_epi32_mask(a, b);
663+
assert_eq!(m, 0b00000101_00000101);
664+
}
665+
666+
#[simd_test(enable = "avx512f")]
667+
unsafe fn test_mm512_mask_cmplt_epi32_mask() {
668+
#[rustfmt::skip]
669+
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
670+
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
671+
let b = _mm512_set1_epi32(-1);
672+
let mask = 0b01100110_01100110;
673+
let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
674+
assert_eq!(r, 0b00000100_00000100);
675+
}
676+
677+
#[simd_test(enable = "avx512f")]
678+
unsafe fn test_mm512_set_epi32() {
679+
let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
680+
assert_eq_m512i(
681+
r,
682+
_mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
683+
)
684+
}
685+
686+
#[simd_test(enable = "avx512f")]
687+
unsafe fn test_mm512_setr_epi32() {
688+
let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
689+
assert_eq_m512i(
690+
r,
691+
_mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
692+
)
693+
}
694+
695+
#[simd_test(enable = "avx512f")]
696+
unsafe fn test_mm512_set1_epi32() {
697+
let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
698+
assert_eq_m512i(r, _mm512_set1_epi32(2));
699+
}
700+
701+
#[simd_test(enable = "avx512f")]
702+
unsafe fn test_mm512_setzero_si512() {
703+
assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
704+
}
559705
}

crates/core_arch/src/x86/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,11 @@ impl m256iExt for __m256i {
513513
pub(crate) trait m512iExt: Sized {
514514
fn as_m512i(self) -> __m512i;
515515

516+
#[inline]
517+
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
518+
unsafe { transmute(self.as_m512i()) }
519+
}
520+
516521
#[inline]
517522
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
518523
unsafe { transmute(self.as_m512i()) }

crates/core_arch/src/x86_64/avx512f.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,4 +302,10 @@ mod tests {
302302
let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
303303
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
304304
}
305+
306+
#[simd_test(enable = "avx512f")]
307+
unsafe fn test_mm512_set1_epi64() {
308+
let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
309+
assert_eq_m512i(r, _mm512_set1_epi64(2));
310+
}
305311
}

crates/stdarch-verify/tests/x86-intel.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,6 @@ fn verify_all_signatures() {
213213
"_mm256_undefined_si256",
214214
"_bextr2_u32",
215215
"_mm_tzcnt_32",
216-
"_mm512_setzero_si512",
217-
"_mm512_setr_epi32",
218-
"_mm512_set1_epi64",
219216
"_m_paddb",
220217
"_m_paddw",
221218
"_m_paddd",

0 commit comments

Comments
 (0)