Skip to content

Commit 377126d

Browse files
p32blognzlbg
authored andcommitted
Add remaining _mm_cvtep* intrinsics
1 parent d9794fc commit 377126d

File tree

3 files changed

+55
-6
lines changed

3 files changed

+55
-6
lines changed

src/lib.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,17 +169,19 @@ mod v32 {
169169

170170
define_ty! { i16x2, i16, i16 }
171171
define_impl! { i16x2, i16, 2, i16x2, x0, x1 }
172+
define_ty! { u16x2, u16, u16 }
173+
define_impl! { u16x2, u16, 2, i16x2, x0, x1 }
172174

173175
define_ty! { i8x4, i8, i8, i8, i8 }
174176
define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 }
175-
176177
define_ty! { u8x4, u8, u8, u8, u8 }
177178
define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 }
178179

179180
define_casts!(
181+
(i16x2, i64x2, as_i64x2),
182+
(u16x2, i64x2, as_i64x2),
180183
(i8x4, i32x4, as_i32x4),
181-
(u8x4, i32x4, as_i32x4),
182-
(i16x2, i64x2, as_i64x2)
184+
(u8x4, i32x4, as_i32x4)
183185
);
184186
}
185187

@@ -189,7 +191,6 @@ mod v16 {
189191

190192
define_ty! { i8x2, i8, i8 }
191193
define_impl! { i8x2, i8, 2, i8x2, x0, x1 }
192-
193194
define_ty! { u8x2, u8, u8 }
194195
define_impl! { u8x2, u8, 2, i8x2, x0, x1 }
195196

src/v64.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ define_casts!(
6565
(i32x2, i64x2, as_i64x2),
6666
(u8x8, u16x8, as_u16x8),
6767
(u16x4, u32x4, as_u32x4),
68-
(u32x2, u64x2, as_u64x2)
68+
(u16x4, i32x4, as_i32x4),
69+
(u32x2, u64x2, as_u64x2),
70+
(u32x2, i64x2, as_i64x2)
6971
);
7072

7173
#[cfg(test)]

src/x86/sse41.rs

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,30 @@ pub unsafe fn _mm_cvtepu8_epi64(a: u8x16) -> i64x2 {
370370
simd_shuffle2::<_, ::v16::u8x2>(a, a, [0, 1]).as_i64x2()
371371
}
372372

373+
/// Zero extend packed unsigned 16-bit integers in `a` to packed 32-bit integers
374+
#[inline(always)]
375+
#[target_feature = "+sse4.1"]
376+
#[cfg_attr(test, assert_instr(pmovzxwd))]
377+
pub unsafe fn _mm_cvtepu16_epi32(a: u16x8) -> i32x4 {
378+
simd_shuffle4::<_, ::v64::u16x4>(a, a, [0, 1, 2, 3]).as_i32x4()
379+
}
380+
381+
/// Zero extend packed unsigned 16-bit integers in `a` to packed 64-bit integers
382+
#[inline(always)]
383+
#[target_feature = "+sse4.1"]
384+
#[cfg_attr(test, assert_instr(pmovzxwq))]
385+
pub unsafe fn _mm_cvtepu16_epi64(a: u16x8) -> i64x2 {
386+
simd_shuffle2::<_, ::v32::u16x2>(a, a, [0, 1]).as_i64x2()
387+
}
388+
389+
/// Zero extend packed unsigned 32-bit integers in `a` to packed 64-bit integers
390+
#[inline(always)]
391+
#[target_feature = "+sse4.1"]
392+
#[cfg_attr(test, assert_instr(pmovzxdq))]
393+
pub unsafe fn _mm_cvtepu32_epi64(a: u32x4) -> i64x2 {
394+
simd_shuffle2::<_, ::v64::u32x2>(a, a, [0, 1]).as_i64x2()
395+
}
396+
373397
/// Returns the dot product of two f64x2 vectors.
374398
///
375399
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@@ -1081,15 +1105,37 @@ mod tests {
10811105
assert_eq!(r, e);
10821106
}
10831107

1084-
#[simd_test = "sse4.1"]
1108+
#[simd_test = "sse4.1"]
10851109
unsafe fn _mm_cvtepu8_epi64() {
10861110
let a = u8x16::splat(10);
10871111
let r = sse41::_mm_cvtepu8_epi64(a);
10881112
let e = i64x2::splat(10);
10891113
assert_eq!(r, e);
10901114
}
10911115

1116+
#[simd_test = "sse4.1"]
1117+
unsafe fn _mm_cvtepu16_epi32() {
1118+
let a = u16x8::splat(10);
1119+
let r = sse41::_mm_cvtepu16_epi32(a);
1120+
let e = i32x4::splat(10);
1121+
assert_eq!(r, e);
1122+
}
10921123

1124+
#[simd_test = "sse4.1"]
1125+
unsafe fn _mm_cvtepu16_epi64() {
1126+
let a = u16x8::splat(10);
1127+
let r = sse41::_mm_cvtepu16_epi64(a);
1128+
let e = i64x2::splat(10);
1129+
assert_eq!(r, e);
1130+
}
1131+
1132+
#[simd_test = "sse4.1"]
1133+
unsafe fn _mm_cvtepu32_epi64() {
1134+
let a = u32x4::splat(10);
1135+
let r = sse41::_mm_cvtepu32_epi64(a);
1136+
let e = i64x2::splat(10);
1137+
assert_eq!(r, e);
1138+
}
10931139

10941140
#[simd_test = "sse4.1"]
10951141
unsafe fn _mm_dp_pd() {

0 commit comments

Comments
 (0)