Skip to content

Commit f0c7a7d

Browse files
paolotetignzlbg
authored andcommitted
ACLE/SIMD32: add ssub8 and usub8
- add `ssub8` and `usub8` - bump instruction limit to 29
1 parent c0c0d43 commit f0c7a7d

File tree

2 files changed

+65
-4
lines changed

2 files changed

+65
-4
lines changed

crates/core_arch/src/acle/simd32.rs

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
//! - [x] __sadd8
1717
//! - [x] __shadd8
1818
//! - [x] __shsub8
19-
//! - [ ] __ssub8
19+
//! - [x] __ssub8
2020
//! - [ ] __uadd8
2121
//! - [ ] __uhadd8
2222
//! - [ ] __uhsub8
2323
//! - [ ] __uqadd8
2424
//! - [ ] __uqsub8
25-
//! - [ ] __usub8
25+
//! - [x] __usub8
2626
//! - [x] __usad8
2727
//! - [x] __usada8
2828
//! - [x] __qadd16
@@ -130,6 +130,12 @@ extern "C" {
130130
#[link_name = "llvm.arm.shsub8"]
131131
fn arm_shsub8(a: i32, b: i32) -> i32;
132132

133+
#[link_name = "llvm.arm.ssub8"]
134+
fn arm_ssub8(a: i32, b: i32) -> i32;
135+
136+
#[link_name = "llvm.arm.usub8"]
137+
fn arm_usub8(a: i32, b: i32) -> i32;
138+
133139
#[link_name = "llvm.arm.shsub16"]
134140
fn arm_shsub16(a: i32, b: i32) -> i32;
135141

@@ -337,6 +343,39 @@ pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
337343
dsp_call!(arm_shsub8, a, b)
338344
}
339345

346+
/// Inserts a `USUB8` instruction.
347+
///
348+
/// Returns the 8-bit unsigned equivalent of
349+
///
350+
/// res\[0\] = a\[0\] - a\[0\]
351+
/// res\[1\] = a\[1\] - a\[1\]
352+
/// res\[2\] = a\[2\] - a\[2\]
353+
/// res\[3\] = a\[3\] - a\[3\]
354+
///
355+
/// where [0] is the lower 8 bits and [3] is the upper 8 bits.
356+
#[inline]
357+
#[cfg_attr(test, assert_instr(usub8))]
358+
pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {
359+
dsp_call!(arm_usub8, a, b)
360+
}
361+
362+
/// Inserts a `SSUB8` instruction.
363+
///
364+
/// Returns the 8-bit signed equivalent of
365+
///
366+
/// res\[0\] = a\[0\] - a\[0\]
367+
/// res\[1\] = a\[1\] - a\[1\]
368+
/// res\[2\] = a\[2\] - a\[2\]
369+
/// res\[3\] = a\[3\] - a\[3\]
370+
///
371+
/// where [0] is the lower 8 bits and [3] is the upper 8 bits.
372+
/// The GE bits of the APSR are set.
373+
#[inline]
374+
#[cfg_attr(test, assert_instr(ssub8))]
375+
pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
376+
dsp_call!(arm_ssub8, a, b)
377+
}
378+
340379
/// Signed halving parallel halfword-wise subtraction.
341380
///
342381
/// Returns the 16-bit signed equivalent of
@@ -427,7 +466,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 {
427466

428467
#[cfg(test)]
429468
mod tests {
430-
use crate::core_arch::simd::{i16x2, i8x4};
469+
use crate::core_arch::simd::{i16x2, i8x4, u8x4};
431470
use std::mem::transmute;
432471
use stdsimd_test::simd_test;
433472

@@ -596,6 +635,28 @@ mod tests {
596635
}
597636
}
598637

638+
#[test]
639+
fn ssub8() {
640+
unsafe {
641+
let a = i8x4::new(1, 2, 3, 4);
642+
let b = i8x4::new(5, 4, 3, 2);
643+
let c = i8x4::new(-4, -2, 0, 2);
644+
let r: i8x4 = dsp_call!(super::__ssub8, a, b);
645+
assert_eq!(r, c);
646+
}
647+
}
648+
649+
#[test]
650+
fn usub8() {
651+
unsafe {
652+
let a = u8x4::new(1, 2, 3, 4);
653+
let b = u8x4::new(5, 4, 3, 2);
654+
let c = u8x4::new(252, 254, 0, 2);
655+
let r: u8x4 = dsp_call!(super::__usub8, a, b);
656+
assert_eq!(r, c);
657+
}
658+
}
659+
599660
#[test]
600661
fn shsub16() {
601662
unsafe {

crates/stdsimd-test/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
153153

154154
// core_arch/src/acle/simd32
155155
"usad8" => 27,
156-
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" => 29,
156+
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
157157

158158
// Original limit was 20 instructions, but ARM DSP Intrinsics
159159
// are exactly 20 instructions long. So, bump the limit to 22

0 commit comments

Comments
 (0)