@@ -211,7 +211,7 @@ pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
211
211
/// values in dst.
212
212
#[ inline( always) ]
213
213
#[ target_feature = "+sse4.1" ]
214
- #[ cfg_attr( test, assert_instr( pmaxsb, imm8 = 0 ) ) ]
214
+ #[ cfg_attr( test, assert_instr( pmaxsb) ) ]
215
215
pub unsafe fn _mm_max_epi8 ( a : i8x16 , b : i8x16 ) -> i8x16 {
216
216
pmaxsb ( a, b)
217
217
}
@@ -220,7 +220,7 @@ pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
220
220
/// maximum.
221
221
#[ inline( always) ]
222
222
#[ target_feature = "+sse4.1" ]
223
- #[ cfg_attr( test, assert_instr( pmaxuw, imm8 = 0 ) ) ]
223
+ #[ cfg_attr( test, assert_instr( pmaxuw) ) ]
224
224
pub unsafe fn _mm_max_epu16 ( a : u16x8 , b : u16x8 ) -> u16x8 {
225
225
pmaxuw ( a, b)
226
226
}
@@ -229,7 +229,7 @@ pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
229
229
/// values.
230
230
#[ inline( always) ]
231
231
#[ target_feature = "+sse4.1" ]
232
- #[ cfg_attr( test, assert_instr( pmaxsd, imm8 = 0 ) ) ]
232
+ #[ cfg_attr( test, assert_instr( pmaxsd) ) ]
233
233
pub unsafe fn _mm_max_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
234
234
pmaxsd ( a, b)
235
235
}
@@ -238,11 +238,28 @@ pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
238
238
/// maximum values.
239
239
#[ inline( always) ]
240
240
#[ target_feature = "+sse4.1" ]
241
- #[ cfg_attr( test, assert_instr( pmaxud, imm8 = 0 ) ) ]
241
+ #[ cfg_attr( test, assert_instr( pmaxud) ) ]
242
242
pub unsafe fn _mm_max_epu32 ( a : u32x4 , b : u32x4 ) -> u32x4 {
243
243
pmaxud ( a, b)
244
244
}
245
245
246
+ /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers using unsigned saturation
247
+ #[ inline( always) ]
248
+ #[ target_feature = "+sse4.1" ]
249
+ #[ cfg_attr( test, assert_instr( packusdw) ) ]
250
+ pub unsafe fn _mm_packus_epi32 ( a : i32x4 , b : i32x4 ) -> u16x8 {
251
+ packusdw ( a, b)
252
+ }
253
+
254
+ /// Compare packed 64-bit integers in `a` and `b` for equality
255
+ #[ inline( always) ]
256
+ #[ target_feature = "+sse4.1" ]
257
+ #[ cfg_attr( test, assert_instr( pcmpeqq) ) ]
258
+ pub unsafe fn _mm_cmpeq_epi64 ( a : i64x2 , b : i64x2 ) -> i64x2 {
259
+ a. eq ( b)
260
+ }
261
+
262
+
246
263
/// Returns the dot product of two f64x2 vectors.
247
264
///
248
265
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -510,6 +527,8 @@ extern "C" {
510
527
fn pmaxsd ( a : i32x4 , b : i32x4 ) -> i32x4 ;
511
528
#[ link_name = "llvm.x86.sse41.pmaxud" ]
512
529
fn pmaxud ( a : u32x4 , b : u32x4 ) -> u32x4 ;
530
+ #[ link_name = "llvm.x86.sse41.packusdw" ]
531
+ fn packusdw ( a : i32x4 , b : i32x4 ) -> u16x8 ;
513
532
#[ link_name = "llvm.x86.sse41.dppd" ]
514
533
fn dppd ( a : f64x2 , b : f64x2 , imm8 : u8 ) -> f64x2 ;
515
534
#[ link_name = "llvm.x86.sse41.dpps" ]
@@ -723,6 +742,24 @@ mod tests {
723
742
assert_eq ! ( r, e) ;
724
743
}
725
744
745
+ #[ simd_test = "sse4.1" ]
746
+ unsafe fn _mm_packus_epi32 ( ) {
747
+ let a = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
748
+ let b = i32x4:: new ( -1 , -2 , -3 , -4 ) ;
749
+ let r = sse41:: _mm_packus_epi32 ( a, b) ;
750
+ let e = u16x8:: new ( 1 , 2 , 3 , 4 , 0 , 0 , 0 , 0 ) ;
751
+ assert_eq ! ( r, e) ;
752
+ }
753
+
754
+ #[ simd_test = "sse4.1" ]
755
+ unsafe fn _mm_cmpeq_epi64 ( ) {
756
+ let a = i64x2:: new ( 0 , 1 ) ;
757
+ let b = i64x2:: new ( 0 , 0 ) ;
758
+ let r = sse41:: _mm_cmpeq_epi64 ( a, b) ;
759
+ let e = i64x2:: new ( 0xFFFFFFFFFFFFFFFF , 0x0 ) ;
760
+ assert_eq ! ( r, e) ;
761
+ }
762
+
726
763
#[ simd_test = "sse4.1" ]
727
764
unsafe fn _mm_dp_pd ( ) {
728
765
let a = f64x2:: new ( 2.0 , 3.0 ) ;
0 commit comments