@@ -402,7 +402,6 @@ pub unsafe fn _mm_mul_epi32(a: i32x4, b:i32x4) -> i64x2 {
402
402
pmuldq ( a, b)
403
403
}
404
404
405
-
406
405
/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
407
406
/// and return the low 32 bits of the intermediate integers.
408
407
#[ inline( always) ]
@@ -412,6 +411,27 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
412
411
a * b
413
412
}
414
413
414
+ #[ inline( always) ]
415
+ #[ target_feature = "+sse4.1" ]
416
+ #[ cfg_attr( test, assert_instr( ptest) ) ]
417
+ pub unsafe fn _mm_testz_si128 ( a : i64x2 , mask : i64x2 ) -> i32 {
418
+ ptestz ( a, mask)
419
+ }
420
+
421
+ #[ inline( always) ]
422
+ #[ target_feature = "+sse4.1" ]
423
+ #[ cfg_attr( test, assert_instr( ptest) ) ]
424
+ pub unsafe fn _mm_testc_si128 ( a : i64x2 , mask : i64x2 ) -> i32 {
425
+ ptestc ( a, mask)
426
+ }
427
+
428
+ #[ inline( always) ]
429
+ #[ target_feature = "+sse4.1" ]
430
+ #[ cfg_attr( test, assert_instr( ptest) ) ]
431
+ pub unsafe fn _mm_testnzc_si128 ( a : i64x2 , mask : i64x2 ) -> i32 {
432
+ ptestnzc ( a, mask)
433
+ }
434
+
415
435
/// Returns the dot product of two f64x2 vectors.
416
436
///
417
437
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -724,6 +744,12 @@ extern "C" {
724
744
fn packusdw ( a : i32x4 , b : i32x4 ) -> u16x8 ;
725
745
#[ link_name = "llvm.x86.sse41.pmuldq" ]
726
746
fn pmuldq ( a : i32x4 , b : i32x4 ) -> i64x2 ;
747
+ #[ link_name = "llvm.x86.sse41.ptestz" ]
748
+ fn ptestz ( a : i64x2 , mask : i64x2 ) -> i32 ;
749
+ #[ link_name = "llvm.x86.sse41.ptestc" ]
750
+ fn ptestc ( a : i64x2 , mask : i64x2 ) -> i32 ;
751
+ #[ link_name = "llvm.x86.sse41.ptestnzc" ]
752
+ fn ptestnzc ( a : i64x2 , mask : i64x2 ) -> i32 ;
727
753
#[ link_name = "llvm.x86.sse41.dppd" ]
728
754
fn dppd ( a : f64x2 , b : f64x2 , imm8 : u8 ) -> f64x2 ;
729
755
#[ link_name = "llvm.x86.sse41.dpps" ]
@@ -1175,6 +1201,58 @@ mod tests {
1175
1201
assert_eq ! ( r, e) ;
1176
1202
}
1177
1203
1204
+ #[ simd_test = "sse4.1" ]
1205
+ unsafe fn _mm_testz_si128 ( ) {
1206
+ let a = i64x2:: splat ( 1 ) ;
1207
+ let mask = i64x2:: splat ( 0 ) ;
1208
+ let r = sse41:: _mm_testz_si128 ( a, mask) ;
1209
+ assert_eq ! ( r, 1 ) ;
1210
+ let a = i64x2:: splat ( 0b101 ) ;
1211
+ let mask = i64x2:: splat ( 0b110 ) ;
1212
+ let r = sse41:: _mm_testz_si128 ( a, mask) ;
1213
+ assert_eq ! ( r, 0 ) ;
1214
+ let a = i64x2:: splat ( 0b011 ) ;
1215
+ let mask = i64x2:: splat ( 0b100 ) ;
1216
+ let r = sse41:: _mm_testz_si128 ( a, mask) ;
1217
+ assert_eq ! ( r, 1 ) ;
1218
+ }
1219
+
1220
+ #[ simd_test = "sse4.1" ]
1221
+ unsafe fn _mm_testc_si128 ( ) {
1222
+ let a = i64x2:: splat ( -1 ) ;
1223
+ let mask = i64x2:: splat ( 0 ) ;
1224
+ let r = sse41:: _mm_testc_si128 ( a, mask) ;
1225
+ assert_eq ! ( r, 1 ) ;
1226
+ let a = i64x2:: splat ( 0b101 ) ;
1227
+ let mask = i64x2:: splat ( 0b110 ) ;
1228
+ let r = sse41:: _mm_testc_si128 ( a, mask) ;
1229
+ assert_eq ! ( r, 0 ) ;
1230
+ let a = i64x2:: splat ( 0b101 ) ;
1231
+ let mask = i64x2:: splat ( 0b100 ) ;
1232
+ let r = sse41:: _mm_testc_si128 ( a, mask) ;
1233
+ assert_eq ! ( r, 1 ) ;
1234
+ }
1235
+
1236
+ #[ simd_test = "sse4.1" ]
1237
+ unsafe fn _mm_testnzc_si128 ( ) {
1238
+ let a = i64x2:: splat ( 0 ) ;
1239
+ let mask = i64x2:: splat ( 1 ) ;
1240
+ let r = sse41:: _mm_testnzc_si128 ( a, mask) ;
1241
+ assert_eq ! ( r, 0 ) ;
1242
+ let a = i64x2:: splat ( -1 ) ;
1243
+ let mask = i64x2:: splat ( 0 ) ;
1244
+ let r = sse41:: _mm_testnzc_si128 ( a, mask) ;
1245
+ assert_eq ! ( r, 0 ) ;
1246
+ let a = i64x2:: splat ( 0b101 ) ;
1247
+ let mask = i64x2:: splat ( 0b110 ) ;
1248
+ let r = sse41:: _mm_testnzc_si128 ( a, mask) ;
1249
+ assert_eq ! ( r, 1 ) ;
1250
+ let a = i64x2:: splat ( 0b101 ) ;
1251
+ let mask = i64x2:: splat ( 0b101 ) ;
1252
+ let r = sse41:: _mm_testnzc_si128 ( a, mask) ;
1253
+ assert_eq ! ( r, 0 ) ;
1254
+ }
1255
+
1178
1256
#[ simd_test = "sse4.1" ]
1179
1257
unsafe fn _mm_dp_pd ( ) {
1180
1258
let a = f64x2:: new ( 2.0 , 3.0 ) ;
0 commit comments