Skip to content

Commit 3655a4d

Browse files
p32blognzlbg
authored andcommitted
Add _mm_testz_si128, _mm_testc_si128 and _mm_testnzc_si128
This should work for any 128 bit sized vector, but it only accepts i64x2 for now
1 parent 8a0e6eb commit 3655a4d

File tree

1 file changed

+79
-1
lines changed

1 file changed

+79
-1
lines changed

src/x86/sse41.rs

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,6 @@ pub unsafe fn _mm_mul_epi32(a: i32x4, b:i32x4) -> i64x2 {
402402
pmuldq(a, b)
403403
}
404404

405-
406405
/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
407406
/// and return the low 32 bits of the intermediate integers.
408407
#[inline(always)]
@@ -412,6 +411,27 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
412411
a * b
413412
}
414413

414+
#[inline(always)]
415+
#[target_feature = "+sse4.1"]
416+
#[cfg_attr(test, assert_instr(ptest))]
417+
pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
418+
ptestz(a, mask)
419+
}
420+
421+
#[inline(always)]
422+
#[target_feature = "+sse4.1"]
423+
#[cfg_attr(test, assert_instr(ptest))]
424+
pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
425+
ptestc(a, mask)
426+
}
427+
428+
#[inline(always)]
429+
#[target_feature = "+sse4.1"]
430+
#[cfg_attr(test, assert_instr(ptest))]
431+
pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
432+
ptestnzc(a, mask)
433+
}
434+
415435
/// Returns the dot product of two f64x2 vectors.
416436
///
417437
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -724,6 +744,12 @@ extern "C" {
724744
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
725745
#[link_name = "llvm.x86.sse41.pmuldq"]
726746
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
747+
#[link_name = "llvm.x86.sse41.ptestz"]
748+
fn ptestz(a: i64x2, mask: i64x2) -> i32;
749+
#[link_name = "llvm.x86.sse41.ptestc"]
750+
fn ptestc(a: i64x2, mask: i64x2) -> i32;
751+
#[link_name = "llvm.x86.sse41.ptestnzc"]
752+
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
727753
#[link_name = "llvm.x86.sse41.dppd"]
728754
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
729755
#[link_name = "llvm.x86.sse41.dpps"]
@@ -1175,6 +1201,58 @@ mod tests {
11751201
assert_eq!(r, e);
11761202
}
11771203

1204+
#[simd_test = "sse4.1"]
1205+
unsafe fn _mm_testz_si128() {
1206+
let a = i64x2::splat(1);
1207+
let mask = i64x2::splat(0);
1208+
let r = sse41::_mm_testz_si128(a, mask);
1209+
assert_eq!(r, 1);
1210+
let a = i64x2::splat(0b101);
1211+
let mask = i64x2::splat(0b110);
1212+
let r = sse41::_mm_testz_si128(a, mask);
1213+
assert_eq!(r, 0);
1214+
let a = i64x2::splat(0b011);
1215+
let mask = i64x2::splat(0b100);
1216+
let r = sse41::_mm_testz_si128(a, mask);
1217+
assert_eq!(r, 1);
1218+
}
1219+
1220+
#[simd_test = "sse4.1"]
1221+
unsafe fn _mm_testc_si128() {
1222+
let a = i64x2::splat(-1);
1223+
let mask = i64x2::splat(0);
1224+
let r = sse41::_mm_testc_si128(a, mask);
1225+
assert_eq!(r, 1);
1226+
let a = i64x2::splat(0b101);
1227+
let mask = i64x2::splat(0b110);
1228+
let r = sse41::_mm_testc_si128(a, mask);
1229+
assert_eq!(r, 0);
1230+
let a = i64x2::splat(0b101);
1231+
let mask = i64x2::splat(0b100);
1232+
let r = sse41::_mm_testc_si128(a, mask);
1233+
assert_eq!(r, 1);
1234+
}
1235+
1236+
#[simd_test = "sse4.1"]
1237+
unsafe fn _mm_testnzc_si128() {
1238+
let a = i64x2::splat(0);
1239+
let mask = i64x2::splat(1);
1240+
let r = sse41::_mm_testnzc_si128(a, mask);
1241+
assert_eq!(r, 0);
1242+
let a = i64x2::splat(-1);
1243+
let mask = i64x2::splat(0);
1244+
let r = sse41::_mm_testnzc_si128(a, mask);
1245+
assert_eq!(r, 0);
1246+
let a = i64x2::splat(0b101);
1247+
let mask = i64x2::splat(0b110);
1248+
let r = sse41::_mm_testnzc_si128(a, mask);
1249+
assert_eq!(r, 1);
1250+
let a = i64x2::splat(0b101);
1251+
let mask = i64x2::splat(0b101);
1252+
let r = sse41::_mm_testnzc_si128(a, mask);
1253+
assert_eq!(r, 0);
1254+
}
1255+
11781256
#[simd_test = "sse4.1"]
11791257
unsafe fn _mm_dp_pd() {
11801258
let a = f64x2::new(2.0, 3.0);

0 commit comments

Comments
 (0)