Skip to content

Commit adc34a3

Browse files
p32bloBurntSushi
authored andcommitted
Add _mm_packus_epi32 and _mm_cmpeq_epi64 intrinsics
1 parent 308ddc5 commit adc34a3

File tree

1 file changed

+41
-4
lines changed

1 file changed

+41
-4
lines changed

src/x86/sse41.rs

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
211211
/// values in dst.
212212
#[inline(always)]
213213
#[target_feature = "+sse4.1"]
214-
#[cfg_attr(test, assert_instr(pmaxsb, imm8 = 0))]
214+
#[cfg_attr(test, assert_instr(pmaxsb))]
215215
pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
216216
pmaxsb(a, b)
217217
}
@@ -220,7 +220,7 @@ pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
220220
/// maximum.
221221
#[inline(always)]
222222
#[target_feature = "+sse4.1"]
223-
#[cfg_attr(test, assert_instr(pmaxuw, imm8 = 0))]
223+
#[cfg_attr(test, assert_instr(pmaxuw))]
224224
pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
225225
pmaxuw(a, b)
226226
}
@@ -229,7 +229,7 @@ pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
229229
/// values.
230230
#[inline(always)]
231231
#[target_feature = "+sse4.1"]
232-
#[cfg_attr(test, assert_instr(pmaxsd, imm8 = 0))]
232+
#[cfg_attr(test, assert_instr(pmaxsd))]
233233
pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
234234
pmaxsd(a, b)
235235
}
@@ -238,11 +238,28 @@ pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
238238
/// maximum values.
239239
#[inline(always)]
240240
#[target_feature = "+sse4.1"]
241-
#[cfg_attr(test, assert_instr(pmaxud, imm8 = 0))]
241+
#[cfg_attr(test, assert_instr(pmaxud))]
242242
pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
243243
pmaxud(a, b)
244244
}
245245

246+
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers using unsigned saturation
247+
#[inline(always)]
248+
#[target_feature = "+sse4.1"]
249+
#[cfg_attr(test, assert_instr(packusdw))]
250+
pub unsafe fn _mm_packus_epi32(a: i32x4, b: i32x4) -> u16x8 {
251+
packusdw(a, b)
252+
}
253+
254+
/// Compare packed 64-bit integers in `a` and `b` for equality
255+
#[inline(always)]
256+
#[target_feature = "+sse4.1"]
257+
#[cfg_attr(test, assert_instr(pcmpeqq))]
258+
pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
259+
a.eq(b)
260+
}
261+
262+
246263
/// Returns the dot product of two f64x2 vectors.
247264
///
248265
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -510,6 +527,8 @@ extern "C" {
510527
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
511528
#[link_name = "llvm.x86.sse41.pmaxud"]
512529
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
530+
#[link_name = "llvm.x86.sse41.packusdw"]
531+
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
513532
#[link_name = "llvm.x86.sse41.dppd"]
514533
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
515534
#[link_name = "llvm.x86.sse41.dpps"]
@@ -723,6 +742,24 @@ mod tests {
723742
assert_eq!(r, e);
724743
}
725744

745+
#[simd_test = "sse4.1"]
746+
unsafe fn _mm_packus_epi32() {
747+
let a = i32x4::new(1, 2, 3, 4);
748+
let b = i32x4::new(-1, -2, -3, -4);
749+
let r = sse41::_mm_packus_epi32(a, b);
750+
let e = u16x8::new(1, 2, 3, 4, 0, 0, 0, 0);
751+
assert_eq!(r, e);
752+
}
753+
754+
#[simd_test = "sse4.1"]
755+
unsafe fn _mm_cmpeq_epi64() {
756+
let a = i64x2::new(0, 1);
757+
let b = i64x2::new(0, 0);
758+
let r = sse41::_mm_cmpeq_epi64(a, b);
759+
let e = i64x2::new(0xFFFFFFFFFFFFFFFF, 0x0);
760+
assert_eq!(r, e);
761+
}
762+
726763
#[simd_test = "sse4.1"]
727764
unsafe fn _mm_dp_pd() {
728765
let a = f64x2::new(2.0, 3.0);

0 commit comments

Comments
 (0)