@@ -394,6 +394,24 @@ pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
394
394
simd_shuffle2 :: < _ , :: v64:: u32x2 > ( a, a, [ 0 , 1 ] ) . as_i64x2 ( )
395
395
}
396
396
397
+ /// Multiply the low 32-bit integers from each packed 64-bit element in `a` and `b`
398
+ #[ inline( always) ]
399
+ #[ target_feature = "+sse4.1" ]
400
+ #[ cfg_attr( test, assert_instr( pmuldq) ) ]
401
+ pub unsafe fn _mm_mul_epi32 ( a : i32x4 , b : i32x4 ) -> i64x2 {
402
+ pmuldq ( a, b)
403
+ }
404
+
405
+
406
+ /// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
407
+ /// and return the low 32 bits of the intermediate integers.
408
+ #[ inline( always) ]
409
+ #[ target_feature = "+sse4.1" ]
410
+ #[ cfg_attr( test, assert_instr( pmulld) ) ]
411
+ pub unsafe fn _mm_mullo_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
412
+ a * b
413
+ }
414
+
397
415
/// Returns the dot product of two f64x2 vectors.
398
416
///
399
417
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -704,6 +722,8 @@ extern "C" {
704
722
fn pminud ( a : u32x4 , b : u32x4 ) -> u32x4 ;
705
723
#[ link_name = "llvm.x86.sse41.packusdw" ]
706
724
fn packusdw ( a : i32x4 , b : i32x4 ) -> u16x8 ;
725
+ #[ link_name = "llvm.x86.sse41.pmuldq" ]
726
+ fn pmuldq ( a : i32x4 , b : i32x4 ) -> i64x2 ;
707
727
#[ link_name = "llvm.x86.sse41.dppd" ]
708
728
fn dppd ( a : f64x2 , b : f64x2 , imm8 : u8 ) -> f64x2 ;
709
729
#[ link_name = "llvm.x86.sse41.dpps" ]
@@ -1137,6 +1157,24 @@ mod tests {
1137
1157
assert_eq ! ( r, e) ;
1138
1158
}
1139
1159
1160
+ #[ simd_test = "sse4.1" ]
1161
+ unsafe fn _mm_mul_epi32 ( ) {
1162
+ let a = i32x4:: new ( 1 , 1 , 1 , 1 ) ;
1163
+ let b = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
1164
+ let r = sse41:: _mm_mul_epi32 ( a, b) ;
1165
+ let e = i64x2:: new ( 1 , 3 ) ;
1166
+ assert_eq ! ( r, e) ;
1167
+ }
1168
+
1169
+ #[ simd_test = "sse4.1" ]
1170
+ unsafe fn _mm_mullo_epi32 ( ) {
1171
+ let a = i32x4:: new ( 1 , 1 , 1 , 1 ) ;
1172
+ let b = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
1173
+ let r = sse41:: _mm_mullo_epi32 ( a, b) ;
1174
+ let e = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
1175
+ assert_eq ! ( r, e) ;
1176
+ }
1177
+
1140
1178
#[ simd_test = "sse4.1" ]
1141
1179
unsafe fn _mm_dp_pd ( ) {
1142
1180
let a = f64x2:: new ( 2.0 , 3.0 ) ;
0 commit comments