Add _mm_cvtepi8_epi16

p32blo · BurntSushi · commit e4c17a71e864 · 2017-11-06T07:17:27.000-05:00
diff --git a/src/x86/sse41.rs b/src/x86/sse41.rs
@@ -4,6 +4,7 @@ use std::mem;
 
 #[cfg(test)]
 use stdsimd_test::assert_instr;
+use simd_llvm::{simd_cast, simd_shuffle8};
 
 use v128::*;
 
@@ -259,6 +260,12 @@ pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
     a.eq(b)
 }
 
+/// Sign extend packed 8-bit integers in a to packed 16-bit integers
+#[target_feature = "+sse4.1"]
+#[cfg_attr(test, assert_instr(pmovsxbw))]
+pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 {
+    simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
+}
 
 /// Returns the dot product of two f64x2 vectors.
 ///
@@ -756,7 +763,19 @@ mod tests {
         let a = i64x2::new(0, 1);
         let b = i64x2::new(0, 0);
         let r = sse41::_mm_cmpeq_epi64(a, b);
-        let e = i64x2::new(0xFFFFFFFFFFFFFFFF, 0x0);
+        let e = i64x2::new(-1, 0);
+        assert_eq!(r, e);
+    }
+
+     #[simd_test = "sse4.1"]
+    unsafe fn _mm_cvtepi8_epi16() {
+        let a = i8x16::splat(10);
+        let r = sse41::_mm_cvtepi8_epi16(a);
+        let e = i16x8::splat(10);
+        assert_eq!(r, e);
+        let a = i8x16::splat(-10);
+        let r = sse41::_mm_cvtepi8_epi16(a);
+        let e = i16x8::splat(-10);
         assert_eq!(r, e);
     }