Skip to content

Commit 8a0e6eb

Browse files
p32blognzlbg
authored andcommitted
Add _mm_mul_epi32 and _mm_mullo_epi32
1 parent 377126d commit 8a0e6eb

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

src/x86/sse41.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,24 @@ pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
394394
simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2()
395395
}
396396

397+
/// Multiply the low 32-bit integers from each packed 64-bit element in `a` and `b`
398+
#[inline(always)]
399+
#[target_feature = "+sse4.1"]
400+
#[cfg_attr(test, assert_instr(pmuldq))]
401+
pub unsafe fn _mm_mul_epi32(a: i32x4, b:i32x4) -> i64x2 {
402+
pmuldq(a, b)
403+
}
404+
405+
406+
/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
407+
/// and return the low 32 bits of the intermediate integers.
408+
#[inline(always)]
409+
#[target_feature = "+sse4.1"]
410+
#[cfg_attr(test, assert_instr(pmulld))]
411+
pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
412+
a * b
413+
}
414+
397415
/// Returns the dot product of two f64x2 vectors.
398416
///
399417
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -704,6 +722,8 @@ extern "C" {
704722
fn pminud(a: u32x4, b: u32x4) -> u32x4;
705723
#[link_name = "llvm.x86.sse41.packusdw"]
706724
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
725+
#[link_name = "llvm.x86.sse41.pmuldq"]
726+
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
707727
#[link_name = "llvm.x86.sse41.dppd"]
708728
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
709729
#[link_name = "llvm.x86.sse41.dpps"]
@@ -1137,6 +1157,24 @@ mod tests {
11371157
assert_eq!(r, e);
11381158
}
11391159

1160+
#[simd_test = "sse4.1"]
1161+
unsafe fn _mm_mul_epi32() {
1162+
let a = i32x4::new(1, 1, 1, 1);
1163+
let b = i32x4::new(1, 2, 3, 4);
1164+
let r = sse41::_mm_mul_epi32(a, b);
1165+
let e = i64x2::new(1, 3);
1166+
assert_eq!(r, e);
1167+
}
1168+
1169+
#[simd_test = "sse4.1"]
1170+
unsafe fn _mm_mullo_epi32() {
1171+
let a = i32x4::new(1, 1, 1, 1);
1172+
let b = i32x4::new(1, 2, 3, 4);
1173+
let r = sse41::_mm_mullo_epi32(a, b);
1174+
let e = i32x4::new(1, 2, 3, 4);
1175+
assert_eq!(r, e);
1176+
}
1177+
11401178
#[simd_test = "sse4.1"]
11411179
unsafe fn _mm_dp_pd() {
11421180
let a = f64x2::new(2.0, 3.0);

0 commit comments

Comments
 (0)