Skip to content

Commit 321bd80

Browse files
p32bloBurntSushi
authored andcommitted
Add _mm_cvtepi8_epi32
- This might be wrong since the cast and the shuffle nedded to be inverted
1 parent e4c17a7 commit 321bd80

File tree

1 file changed

+22
-2
lines changed

1 file changed

+22
-2
lines changed

src/x86/sse41.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::mem;
44

55
#[cfg(test)]
66
use stdsimd_test::assert_instr;
7-
use simd_llvm::{simd_cast, simd_shuffle8};
7+
use simd_llvm::{simd_cast, simd_shuffle4, simd_shuffle8};
88

99
use v128::*;
1010

@@ -260,13 +260,21 @@ pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
260260
a.eq(b)
261261
}
262262

263-
/// Sign extend packed 8-bit integers in a to packed 16-bit integers
263+
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
264264
#[target_feature = "+sse4.1"]
265265
#[cfg_attr(test, assert_instr(pmovsxbw))]
266266
pub unsafe fn _mm_cvtepi8_epi16(a: i8x16) -> i16x8 {
267267
simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
268268
}
269269

270+
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
271+
#[target_feature = "+sse4.1"]
272+
#[cfg_attr(test, assert_instr(pmovsxbd))]
273+
pub unsafe fn _mm_cvtepi8_epi32(a: i8x16) -> i32x4 {
274+
let cast = simd_cast::<_, ::v512::i32x16>(a);
275+
simd_shuffle4(cast, cast, [0, 1, 2, 3])
276+
}
277+
270278
/// Returns the dot product of two f64x2 vectors.
271279
///
272280
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -779,6 +787,18 @@ mod tests {
779787
assert_eq!(r, e);
780788
}
781789

790+
#[simd_test = "sse4.1"]
791+
unsafe fn _mm_cvtepi8_epi32() {
792+
let a = i8x16::splat(10);
793+
let r = sse41::_mm_cvtepi8_epi32(a);
794+
let e = i32x4::splat(10);
795+
assert_eq!(r, e);
796+
let a = i8x16::splat(-10);
797+
let r = sse41::_mm_cvtepi8_epi32(a);
798+
let e = i32x4::splat(-10);
799+
assert_eq!(r, e);
800+
}
801+
782802
#[simd_test = "sse4.1"]
783803
unsafe fn _mm_dp_pd() {
784804
let a = f64x2::new(2.0, 3.0);

0 commit comments

Comments
 (0)