|
16 | 16 | //! - [x] __sadd8
|
17 | 17 | //! - [x] __shadd8
|
18 | 18 | //! - [x] __shsub8
|
19 |
| -//! - [ ] __ssub8 |
| 19 | +//! - [x] __ssub8 |
20 | 20 | //! - [ ] __uadd8
|
21 | 21 | //! - [ ] __uhadd8
|
22 | 22 | //! - [ ] __uhsub8
|
23 | 23 | //! - [ ] __uqadd8
|
24 | 24 | //! - [ ] __uqsub8
|
25 |
| -//! - [ ] __usub8 |
| 25 | +//! - [x] __usub8 |
26 | 26 | //! - [x] __usad8
|
27 | 27 | //! - [x] __usada8
|
28 | 28 | //! - [x] __qadd16
|
@@ -130,6 +130,12 @@ extern "C" {
|
130 | 130 | #[link_name = "llvm.arm.shsub8"]
|
131 | 131 | fn arm_shsub8(a: i32, b: i32) -> i32;
|
132 | 132 |
|
| 133 | + #[link_name = "llvm.arm.ssub8"] |
| 134 | + fn arm_ssub8(a: i32, b: i32) -> i32; |
| 135 | + |
| 136 | + #[link_name = "llvm.arm.usub8"] |
| 137 | + fn arm_usub8(a: i32, b: i32) -> i32; |
| 138 | + |
133 | 139 | #[link_name = "llvm.arm.shsub16"]
|
134 | 140 | fn arm_shsub16(a: i32, b: i32) -> i32;
|
135 | 141 |
|
@@ -337,6 +343,39 @@ pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
|
337 | 343 | dsp_call!(arm_shsub8, a, b)
|
338 | 344 | }
|
339 | 345 |
|
| 346 | +/// Inserts a `USUB8` instruction. |
| 347 | +/// |
| 348 | +/// Returns the 8-bit unsigned equivalent of |
| 349 | +/// |
| 350 | +/// res\[0\] = a\[0\] - a\[0\] |
| 351 | +/// res\[1\] = a\[1\] - a\[1\] |
| 352 | +/// res\[2\] = a\[2\] - a\[2\] |
| 353 | +/// res\[3\] = a\[3\] - a\[3\] |
| 354 | +/// |
| 355 | +/// where [0] is the lower 8 bits and [3] is the upper 8 bits. |
| 356 | +#[inline] |
| 357 | +#[cfg_attr(test, assert_instr(usub8))] |
| 358 | +pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t { |
| 359 | + dsp_call!(arm_usub8, a, b) |
| 360 | +} |
| 361 | + |
| 362 | +/// Inserts a `SSUB8` instruction. |
| 363 | +/// |
| 364 | +/// Returns the 8-bit signed equivalent of |
| 365 | +/// |
| 366 | +/// res\[0\] = a\[0\] - a\[0\] |
| 367 | +/// res\[1\] = a\[1\] - a\[1\] |
| 368 | +/// res\[2\] = a\[2\] - a\[2\] |
| 369 | +/// res\[3\] = a\[3\] - a\[3\] |
| 370 | +/// |
| 371 | +/// where [0] is the lower 8 bits and [3] is the upper 8 bits. |
| 372 | +/// The GE bits of the APSR are set. |
| 373 | +#[inline] |
| 374 | +#[cfg_attr(test, assert_instr(ssub8))] |
| 375 | +pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { |
| 376 | + dsp_call!(arm_ssub8, a, b) |
| 377 | +} |
| 378 | + |
340 | 379 | /// Signed halving parallel halfword-wise subtraction.
|
341 | 380 | ///
|
342 | 381 | /// Returns the 16-bit signed equivalent of
|
@@ -427,7 +466,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
|
427 | 466 |
|
428 | 467 | #[cfg(test)]
|
429 | 468 | mod tests {
|
430 |
| - use crate::core_arch::simd::{i16x2, i8x4}; |
| 469 | + use crate::core_arch::simd::{i16x2, i8x4, u8x4}; |
431 | 470 | use std::mem::transmute;
|
432 | 471 | use stdsimd_test::simd_test;
|
433 | 472 |
|
@@ -596,6 +635,28 @@ mod tests {
|
596 | 635 | }
|
597 | 636 | }
|
598 | 637 |
|
| 638 | + #[test] |
| 639 | + fn ssub8() { |
| 640 | + unsafe { |
| 641 | + let a = i8x4::new(1, 2, 3, 4); |
| 642 | + let b = i8x4::new(5, 4, 3, 2); |
| 643 | + let c = i8x4::new(-4, -2, 0, 2); |
| 644 | + let r: i8x4 = dsp_call!(super::__ssub8, a, b); |
| 645 | + assert_eq!(r, c); |
| 646 | + } |
| 647 | + } |
| 648 | + |
| 649 | + #[test] |
| 650 | + fn usub8() { |
| 651 | + unsafe { |
| 652 | + let a = u8x4::new(1, 2, 3, 4); |
| 653 | + let b = u8x4::new(5, 4, 3, 2); |
| 654 | + let c = u8x4::new(252, 254, 0, 2); |
| 655 | + let r: u8x4 = dsp_call!(super::__usub8, a, b); |
| 656 | + assert_eq!(r, c); |
| 657 | + } |
| 658 | + } |
| 659 | + |
599 | 660 | #[test]
|
600 | 661 | fn shsub16() {
|
601 | 662 | unsafe {
|
|
0 commit comments