Skip to content

Commit 9e9185a

Browse files
p32blognzlbg
authored andcommitted
Add _mm_cvtepi32_epi64 and fix typo
1 parent 46aceed commit 9e9185a

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

src/x86/sse41.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,14 +330,22 @@ pub unsafe fn _mm_cvtepi16_epi32(a: i16x8) -> i32x4 {
330330
simd_shuffle4::<_, ::v64::i16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
331331
}
332332

333-
/// Sign extend packed 16-bit integers in a to packed 64-bit integers
333+
/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
334334
#[inline(always)]
335335
#[target_feature = "+sse4.1"]
336336
#[cfg_attr(test, assert_instr(pmovsxwq))]
337337
pub unsafe fn _mm_cvtepi16_epi64(a: i16x8) -> i64x2 {
338338
simd_shuffle2::<_, ::v32::i16x2>(a, a, [0, 1]).as_i64x2()
339339
}
340340

341+
/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
342+
#[inline(always)]
343+
#[target_feature = "+sse4.1"]
344+
#[cfg_attr(test, assert_instr(pmovsxdq))]
345+
pub unsafe fn _mm_cvtepi32_epi64(a: i32x4) -> i64x2 {
346+
simd_shuffle2::<_, ::v64::i32x2>(a, a, [0, 1]).as_i64x2()
347+
}
348+
341349
/// Returns the dot product of two f64x2 vectors.
342350
///
343351
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -1020,6 +1028,18 @@ mod tests {
10201028
let e = i64x2::splat(-10);
10211029
assert_eq!(r, e);
10221030
}
1031+
1032+
#[simd_test = "sse4.1"]
1033+
unsafe fn _mm_cvtepi32_epi64() {
1034+
let a = i32x4::splat(10);
1035+
let r = sse41::_mm_cvtepi32_epi64(a);
1036+
let e = i64x2::splat(10);
1037+
assert_eq!(r, e);
1038+
let a = i32x4::splat(-10);
1039+
let r = sse41::_mm_cvtepi32_epi64(a);
1040+
let e = i64x2::splat(-10);
1041+
assert_eq!(r, e);
1042+
}
10231043

10241044
#[simd_test = "sse4.1"]
10251045
unsafe fn _mm_dp_pd() {

0 commit comments

Comments
 (0)