Skip to content

Commit 06e4f74

Browse files
TSPMPalexcrichton
authored andcommitted
Implement all addition MMX intrinsics (rust-lang#266)
* Implement `_mm_add_pi16` * Implement `_mm_add_pi8` * Implement `_mm_add_pi32` * Implement `_mm_adds_pi16` * Implement `_mm_adds_pi8` * Implement `_mm_adds_pu8` * Implement `_mm_adds_pu16`
1 parent af8d8f5 commit 06e4f74

File tree

1 file changed

+140
-1
lines changed

1 file changed

+140
-1
lines changed

coresimd/src/x86/i686/mmx.rs

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,62 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
2424
mem::transmute(0_i64)
2525
}
2626

27+
/// Add packed 8-bit integers in `a` and `b`.
28+
#[inline(always)]
29+
#[target_feature = "+mmx"]
30+
#[cfg_attr(test, assert_instr(paddb))]
31+
pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
32+
paddb(a, b)
33+
}
34+
35+
/// Add packed 16-bit integers in `a` and `b`.
36+
#[inline(always)]
37+
#[target_feature = "+mmx"]
38+
#[cfg_attr(test, assert_instr(paddw))]
39+
pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
40+
paddw(a, b)
41+
}
42+
43+
/// Add packed 32-bit integers in `a` and `b`.
44+
#[inline(always)]
45+
#[target_feature = "+mmx"]
46+
#[cfg_attr(test, assert_instr(paddd))]
47+
pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
48+
paddd(a, b)
49+
}
50+
51+
/// Add packed 8-bit integers in `a` and `b` using saturation.
52+
#[inline(always)]
53+
#[target_feature = "+mmx"]
54+
#[cfg_attr(test, assert_instr(paddsb))]
55+
pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
56+
paddsb(a, b)
57+
}
58+
59+
/// Add packed 16-bit integers in `a` and `b` using saturation.
60+
#[inline(always)]
61+
#[target_feature = "+mmx"]
62+
#[cfg_attr(test, assert_instr(paddsw))]
63+
pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
64+
paddsw(a, b)
65+
}
66+
67+
/// Add packed unsigned 8-bit integers in `a` and `b` using saturation.
68+
#[inline(always)]
69+
#[target_feature = "+mmx"]
70+
#[cfg_attr(test, assert_instr(paddusb))]
71+
pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
72+
paddusb(a, b)
73+
}
74+
75+
/// Add packed unsigned 16-bit integers in `a` and `b` using saturation.
76+
#[inline(always)]
77+
#[target_feature = "+mmx"]
78+
#[cfg_attr(test, assert_instr(paddusw))]
79+
pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
80+
paddusw(a, b)
81+
}
82+
2783
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
2884
/// using signed saturation.
2985
///
@@ -131,6 +187,20 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
131187

132188
#[allow(improper_ctypes)]
133189
extern "C" {
190+
#[link_name = "llvm.x86.mmx.padd.b"]
191+
fn paddb(a: __m64, b: __m64) -> __m64;
192+
#[link_name = "llvm.x86.mmx.padd.w"]
193+
fn paddw(a: __m64, b: __m64) -> __m64;
194+
#[link_name = "llvm.x86.mmx.padd.d"]
195+
fn paddd(a: __m64, b: __m64) -> __m64;
196+
#[link_name = "llvm.x86.mmx.padds.b"]
197+
fn paddsb(a: __m64, b: __m64) -> __m64;
198+
#[link_name = "llvm.x86.mmx.padds.w"]
199+
fn paddsw(a: __m64, b: __m64) -> __m64;
200+
#[link_name = "llvm.x86.mmx.paddus.b"]
201+
fn paddusb(a: __m64, b: __m64) -> __m64;
202+
#[link_name = "llvm.x86.mmx.paddus.w"]
203+
fn paddusw(a: __m64, b: __m64) -> __m64;
134204
#[link_name = "llvm.x86.mmx.packsswb"]
135205
fn packsswb(a: __m64, b: __m64) -> __m64;
136206
#[link_name = "llvm.x86.mmx.packssdw"]
@@ -157,7 +227,7 @@ extern "C" {
157227

158228
#[cfg(test)]
159229
mod tests {
160-
use v64::{__m64, i16x4, i32x2, i8x8};
230+
use v64::{__m64, i16x4, i32x2, i8x8, u16x4, u8x8};
161231
use x86::i686::mmx;
162232
use stdsimd_test::simd_test;
163233

@@ -167,6 +237,75 @@ mod tests {
167237
assert_eq!(r, mmx::_mm_setzero_si64());
168238
}
169239

240+
#[simd_test = "mmx"]
241+
unsafe fn _mm_add_pi8() {
242+
let a = i8x8::new(-1, -1, 1, 1, -1, 0, 1, 0);
243+
let b = i8x8::new(-127, 101, 99, 126, 0, -1, 0, 1);
244+
let r = i8x8::from(mmx::_mm_add_pi8(a.into(), b.into()));
245+
let e = i8x8::new(-128, 100, 100, 127, -1, -1, 1, 1);
246+
assert_eq!(r, e);
247+
}
248+
249+
#[simd_test = "mmx"]
250+
unsafe fn _mm_add_pi16() {
251+
let a = i16x4::new(-1, -1, 1, 1);
252+
let b = i16x4::new(
253+
i16::min_value() + 1,
254+
30001,
255+
-30001,
256+
i16::max_value() - 1,
257+
);
258+
let r = i16x4::from(mmx::_mm_add_pi16(a.into(), b.into()));
259+
let e = i16x4::new(i16::min_value(), 30000, -30000, i16::max_value());
260+
assert_eq!(r, e);
261+
}
262+
263+
#[simd_test = "mmx"]
264+
unsafe fn _mm_add_pi32() {
265+
let a = i32x2::new(1, -1);
266+
let b = i32x2::new(i32::max_value() - 1, i32::min_value() + 1);
267+
let r = i32x2::from(mmx::_mm_add_pi32(a.into(), b.into()));
268+
let e = i32x2::new(i32::max_value(), i32::min_value());
269+
assert_eq!(r, e);
270+
}
271+
272+
#[simd_test = "mmx"]
273+
unsafe fn _mm_adds_pi8() {
274+
let a = i8x8::new(-100, -1, 1, 100, -1, 0, 1, 0);
275+
let b = i8x8::new(-100, 1, -1, 100, 0, -1, 0, 1);
276+
let r = i8x8::from(mmx::_mm_adds_pi8(a.into(), b.into()));
277+
let e =
278+
i8x8::new(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1);
279+
assert_eq!(r, e);
280+
}
281+
282+
#[simd_test = "mmx"]
283+
unsafe fn _mm_adds_pi16() {
284+
let a = i16x4::new(-32000, 32000, 4, 0);
285+
let b = i16x4::new(-32000, 32000, -5, 1);
286+
let r = i16x4::from(mmx::_mm_adds_pi16(a.into(), b.into()));
287+
let e = i16x4::new(i16::min_value(), i16::max_value(), -1, 1);
288+
assert_eq!(r, e);
289+
}
290+
291+
#[simd_test = "mmx"]
292+
unsafe fn _mm_adds_pu8() {
293+
let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 200);
294+
let b = u8x8::new(0, 10, 20, 30, 40, 50, 60, 200);
295+
let r = u8x8::from(mmx::_mm_adds_pu8(a.into(), b.into()));
296+
let e = u8x8::new(0, 11, 22, 33, 44, 55, 66, u8::max_value());
297+
assert_eq!(r, e);
298+
}
299+
300+
#[simd_test = "mmx"]
301+
unsafe fn _mm_adds_pu16() {
302+
let a = u16x4::new(0, 1, 2, 60000);
303+
let b = u16x4::new(0, 10, 20, 60000);
304+
let r = u16x4::from(mmx::_mm_adds_pu16(a.into(), b.into()));
305+
let e = u16x4::new(0, 11, 22, u16::max_value());
306+
assert_eq!(r, e);
307+
}
308+
170309
#[simd_test = "mmx"]
171310
unsafe fn _mm_packs_pi16() {
172311
let a = i16x4::new(-1, 2, -3, 4);

0 commit comments

Comments
 (0)