@@ -4,7 +4,7 @@ use std::mem;
4
4
5
5
#[ cfg( test) ]
6
6
use stdsimd_test:: assert_instr;
7
- use simd_llvm:: { simd_cast, simd_shuffle4, simd_shuffle8} ;
7
+ use simd_llvm:: { simd_cast, simd_shuffle2 , simd_shuffle4, simd_shuffle8} ;
8
8
9
9
use v128:: * ;
10
10
@@ -261,20 +261,32 @@ pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
261
261
}
262
262
263
263
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
264
+ #[ inline( always) ]
264
265
#[ target_feature = "+sse4.1" ]
265
266
#[ cfg_attr( test, assert_instr( pmovsxbw) ) ]
266
267
pub unsafe fn _mm_cvtepi8_epi16 ( a : i8x16 ) -> i16x8 {
267
268
simd_cast :: < :: v64:: i8x8 , _ > ( simd_shuffle8 ( a, a, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) )
268
269
}
269
270
270
271
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
272
+ #[ inline( always) ]
271
273
#[ target_feature = "+sse4.1" ]
272
274
#[ cfg_attr( test, assert_instr( pmovsxbd) ) ]
273
275
pub unsafe fn _mm_cvtepi8_epi32 ( a : i8x16 ) -> i32x4 {
274
276
let cast = simd_cast :: < _ , :: v512:: i32x16 > ( a) ;
275
277
simd_shuffle4 ( cast, cast, [ 0 , 1 , 2 , 3 ] )
276
278
}
277
279
280
+ /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed 64-bit integers
281
+ /*
282
+ #[inline(always)]
283
+ #[target_feature = "+sse4.1"]
284
+ #[cfg_attr(test, assert_instr(pmovsxbq))]
285
+ pub unsafe fn _mm_cvtepi8_epi64(a: i8x16) -> i64x2 {
286
+ simd_cast::<::v16::i8x2, _>(simd_shuffle2(a, a, [0, 1]))
287
+ }
288
+ */
289
+
278
290
/// Returns the dot product of two f64x2 vectors.
279
291
///
280
292
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -799,6 +811,20 @@ mod tests {
799
811
assert_eq ! ( r, e) ;
800
812
}
801
813
814
+ /*
815
+ #[simd_test = "sse4.1"]
816
+ unsafe fn _mm_cvtepi8_epi64() {
817
+ let a = i8x16::splat(10);
818
+ let r = sse41::_mm_cvtepi8_epi64(a);
819
+ let e = i64x2::splat(10);
820
+ assert_eq!(r, e);
821
+ let a = i8x16::splat(-10);
822
+ let r = sse41::_mm_cvtepi8_epi64(a);
823
+ let e = i64x2::splat(-10);
824
+ assert_eq!(r, e);
825
+ }
826
+ */
827
+
802
828
#[ simd_test = "sse4.1" ]
803
829
unsafe fn _mm_dp_pd ( ) {
804
830
let a = f64x2:: new ( 2.0 , 3.0 ) ;
0 commit comments