Skip to content

Commit 1ec97dc

Browse files
authored
Add more addition Arm intrinsics (#939)
1 parent a9e9acb commit 1ec97dc

File tree

3 files changed

+3513
-1542
lines changed

3 files changed

+3513
-1542
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,40 @@ extern "C" {
7979
#[link_name = "llvm.aarch64.neon.abs.v2i64"]
8080
fn vabsq_s64_(a: int64x2_t) -> int64x2_t;
8181

82+
#[link_name = "llvm.aarch64.neon.suqadd.v8i8"]
83+
fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
84+
#[link_name = "llvm.aarch64.neon.suqadd.v16i8"]
85+
fn vuqaddq_s8_(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
86+
#[link_name = "llvm.aarch64.neon.suqadd.v4i16"]
87+
fn vuqadd_s16_(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
88+
#[link_name = "llvm.aarch64.neon.suqadd.v8i16"]
89+
fn vuqaddq_s16_(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
90+
#[link_name = "llvm.aarch64.neon.suqadd.v2i32"]
91+
fn vuqadd_s32_(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
92+
#[link_name = "llvm.aarch64.neon.suqadd.v4i32"]
93+
fn vuqaddq_s32_(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
94+
#[link_name = "llvm.aarch64.neon.suqadd.v1i64"]
95+
fn vuqadd_s64_(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
96+
#[link_name = "llvm.aarch64.neon.suqadd.v2i64"]
97+
fn vuqaddq_s64_(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
98+
99+
#[link_name = "llvm.aarch64.neon.usqadd.v8i8"]
100+
fn vsqadd_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
101+
#[link_name = "llvm.aarch64.neon.usqadd.v16i8"]
102+
fn vsqaddq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
103+
#[link_name = "llvm.aarch64.neon.usqadd.v4i16"]
104+
fn vsqadd_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
105+
#[link_name = "llvm.aarch64.neon.usqadd.v8i16"]
106+
fn vsqaddq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
107+
#[link_name = "llvm.aarch64.neon.usqadd.v2i32"]
108+
fn vsqadd_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
109+
#[link_name = "llvm.aarch64.neon.usqadd.v4i32"]
110+
fn vsqaddq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
111+
#[link_name = "llvm.aarch64.neon.usqadd.v1i64"]
112+
fn vsqadd_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
113+
#[link_name = "llvm.aarch64.neon.usqadd.v2i64"]
114+
fn vsqaddq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
115+
82116
#[link_name = "llvm.aarch64.neon.pmull64"]
83117
fn vmull_p64_(a: i64, b: i64) -> int8x16_t;
84118

@@ -348,6 +382,120 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
348382
vabsq_s64_(a)
349383
}
350384

385+
/// Signed saturating Accumulate of Unsigned value.
386+
#[inline]
387+
#[target_feature(enable = "neon")]
388+
#[cfg_attr(test, assert_instr(suqadd))]
389+
pub unsafe fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
390+
vuqadd_s8_(a, b)
391+
}
392+
/// Signed saturating Accumulate of Unsigned value.
393+
#[inline]
394+
#[target_feature(enable = "neon")]
395+
#[cfg_attr(test, assert_instr(suqadd))]
396+
pub unsafe fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
397+
vuqaddq_s8_(a, b)
398+
}
399+
/// Signed saturating Accumulate of Unsigned value.
400+
#[inline]
401+
#[target_feature(enable = "neon")]
402+
#[cfg_attr(test, assert_instr(suqadd))]
403+
pub unsafe fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
404+
vuqadd_s16_(a, b)
405+
}
406+
/// Signed saturating Accumulate of Unsigned value.
407+
#[inline]
408+
#[target_feature(enable = "neon")]
409+
#[cfg_attr(test, assert_instr(suqadd))]
410+
pub unsafe fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
411+
vuqaddq_s16_(a, b)
412+
}
413+
/// Signed saturating Accumulate of Unsigned value.
414+
#[inline]
415+
#[target_feature(enable = "neon")]
416+
#[cfg_attr(test, assert_instr(suqadd))]
417+
pub unsafe fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
418+
vuqadd_s32_(a, b)
419+
}
420+
/// Signed saturating Accumulate of Unsigned value.
421+
#[inline]
422+
#[target_feature(enable = "neon")]
423+
#[cfg_attr(test, assert_instr(suqadd))]
424+
pub unsafe fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
425+
vuqaddq_s32_(a, b)
426+
}
427+
/// Signed saturating Accumulate of Unsigned value.
428+
#[inline]
429+
#[target_feature(enable = "neon")]
430+
#[cfg_attr(test, assert_instr(suqadd))]
431+
pub unsafe fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
432+
vuqadd_s64_(a, b)
433+
}
434+
/// Signed saturating Accumulate of Unsigned value.
435+
#[inline]
436+
#[target_feature(enable = "neon")]
437+
#[cfg_attr(test, assert_instr(suqadd))]
438+
pub unsafe fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
439+
vuqaddq_s64_(a, b)
440+
}
441+
442+
/// Unsigned saturating Accumulate of Signed value.
443+
#[inline]
444+
#[target_feature(enable = "neon")]
445+
#[cfg_attr(test, assert_instr(usqadd))]
446+
pub unsafe fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
447+
vsqadd_u8_(a, b)
448+
}
449+
/// Unsigned saturating Accumulate of Signed value.
450+
#[inline]
451+
#[target_feature(enable = "neon")]
452+
#[cfg_attr(test, assert_instr(usqadd))]
453+
pub unsafe fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
454+
vsqaddq_u8_(a, b)
455+
}
456+
/// Unsigned saturating Accumulate of Signed value.
457+
#[inline]
458+
#[target_feature(enable = "neon")]
459+
#[cfg_attr(test, assert_instr(usqadd))]
460+
pub unsafe fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
461+
vsqadd_u16_(a, b)
462+
}
463+
/// Unsigned saturating Accumulate of Signed value.
464+
#[inline]
465+
#[target_feature(enable = "neon")]
466+
#[cfg_attr(test, assert_instr(usqadd))]
467+
pub unsafe fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
468+
vsqaddq_u16_(a, b)
469+
}
470+
/// Unsigned saturating Accumulate of Signed value.
471+
#[inline]
472+
#[target_feature(enable = "neon")]
473+
#[cfg_attr(test, assert_instr(usqadd))]
474+
pub unsafe fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
475+
vsqadd_u32_(a, b)
476+
}
477+
/// Unsigned saturating Accumulate of Signed value.
478+
#[inline]
479+
#[target_feature(enable = "neon")]
480+
#[cfg_attr(test, assert_instr(usqadd))]
481+
pub unsafe fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
482+
vsqaddq_u32_(a, b)
483+
}
484+
/// Unsigned saturating Accumulate of Signed value.
485+
#[inline]
486+
#[target_feature(enable = "neon")]
487+
#[cfg_attr(test, assert_instr(usqadd))]
488+
pub unsafe fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
489+
vsqadd_u64_(a, b)
490+
}
491+
/// Unsigned saturating Accumulate of Signed value.
492+
#[inline]
493+
#[target_feature(enable = "neon")]
494+
#[cfg_attr(test, assert_instr(usqadd))]
495+
pub unsafe fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
496+
vsqaddq_u64_(a, b)
497+
}
498+
351499
/// Add pairwise
352500
#[inline]
353501
#[target_feature(enable = "neon")]
@@ -2328,6 +2476,170 @@ mod tests {
23282476
assert_eq!(r, e);
23292477
}
23302478

2479+
#[simd_test(enable = "neon")]
2480+
unsafe fn test_vuqadd_s8() {
2481+
let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);
2482+
let b = u8x8::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7);
2483+
let e = i8x8::new(i8::MAX, -2, 0, 2, 4, 6, 8, i8::MAX);
2484+
let r: i8x8 = transmute(vuqadd_s8(transmute(a), transmute(b)));
2485+
assert_eq!(r, e);
2486+
}
2487+
#[simd_test(enable = "neon")]
2488+
unsafe fn test_vuqaddq_s8() {
2489+
let a = i8x16::new(
2490+
i8::MIN,
2491+
-7,
2492+
-6,
2493+
-5,
2494+
-4,
2495+
-3,
2496+
-2,
2497+
-1,
2498+
0,
2499+
1,
2500+
2,
2501+
3,
2502+
4,
2503+
5,
2504+
6,
2505+
i8::MAX,
2506+
);
2507+
let b = u8x16::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2508+
let e = i8x16::new(
2509+
i8::MAX,
2510+
-6,
2511+
-4,
2512+
-2,
2513+
0,
2514+
2,
2515+
4,
2516+
6,
2517+
8,
2518+
10,
2519+
12,
2520+
14,
2521+
16,
2522+
18,
2523+
20,
2524+
i8::MAX,
2525+
);
2526+
let r: i8x16 = transmute(vuqaddq_s8(transmute(a), transmute(b)));
2527+
assert_eq!(r, e);
2528+
}
2529+
#[simd_test(enable = "neon")]
2530+
unsafe fn test_vuqadd_s16() {
2531+
let a = i16x4::new(i16::MIN, -1, 0, i16::MAX);
2532+
let b = u16x4::new(u16::MAX, 1, 2, 3);
2533+
let e = i16x4::new(i16::MAX, 0, 2, i16::MAX);
2534+
let r: i16x4 = transmute(vuqadd_s16(transmute(a), transmute(b)));
2535+
assert_eq!(r, e);
2536+
}
2537+
#[simd_test(enable = "neon")]
2538+
unsafe fn test_vuqaddq_s16() {
2539+
let a = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, i16::MAX);
2540+
let b = u16x8::new(u16::MAX, 1, 2, 3, 4, 5, 6, 7);
2541+
let e = i16x8::new(i16::MAX, -2, 0, 2, 4, 6, 8, i16::MAX);
2542+
let r: i16x8 = transmute(vuqaddq_s16(transmute(a), transmute(b)));
2543+
assert_eq!(r, e);
2544+
}
2545+
#[simd_test(enable = "neon")]
2546+
unsafe fn test_vuqadd_s32() {
2547+
let a = i32x2::new(i32::MIN, i32::MAX);
2548+
let b = u32x2::new(u32::MAX, 1);
2549+
let e = i32x2::new(i32::MAX, i32::MAX);
2550+
let r: i32x2 = transmute(vuqadd_s32(transmute(a), transmute(b)));
2551+
assert_eq!(r, e);
2552+
}
2553+
#[simd_test(enable = "neon")]
2554+
unsafe fn test_vuqaddq_s32() {
2555+
let a = i32x4::new(i32::MIN, -1, 0, i32::MAX);
2556+
let b = u32x4::new(u32::MAX, 1, 2, 3);
2557+
let e = i32x4::new(i32::MAX, 0, 2, i32::MAX);
2558+
let r: i32x4 = transmute(vuqaddq_s32(transmute(a), transmute(b)));
2559+
assert_eq!(r, e);
2560+
}
2561+
#[simd_test(enable = "neon")]
2562+
unsafe fn test_vuqadd_s64() {
2563+
let a = i64x1::new(i64::MIN);
2564+
let b = u64x1::new(u64::MAX);
2565+
let e = i64x1::new(i64::MAX);
2566+
let r: i64x1 = transmute(vuqadd_s64(transmute(a), transmute(b)));
2567+
assert_eq!(r, e);
2568+
}
2569+
#[simd_test(enable = "neon")]
2570+
unsafe fn test_vuqaddq_s64() {
2571+
let a = i64x2::new(i64::MIN, i64::MAX);
2572+
let b = u64x2::new(u64::MAX, 1);
2573+
let e = i64x2::new(i64::MAX, i64::MAX);
2574+
let r: i64x2 = transmute(vuqaddq_s64(transmute(a), transmute(b)));
2575+
assert_eq!(r, e);
2576+
}
2577+
2578+
#[simd_test(enable = "neon")]
2579+
unsafe fn test_vsqadd_u8() {
2580+
let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX);
2581+
let b = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, 3);
2582+
let e = u8x8::new(0, 0, 0, 2, 4, 6, 8, u8::MAX);
2583+
let r: u8x8 = transmute(vsqadd_u8(transmute(a), transmute(b)));
2584+
assert_eq!(r, e);
2585+
}
2586+
#[simd_test(enable = "neon")]
2587+
unsafe fn test_vsqaddq_u8() {
2588+
let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX);
2589+
let b = i8x16::new(i8::MIN, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7);
2590+
let e = u8x16::new(0, 0, 0, 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, u8::MAX);
2591+
let r: u8x16 = transmute(vsqaddq_u8(transmute(a), transmute(b)));
2592+
assert_eq!(r, e);
2593+
}
2594+
#[simd_test(enable = "neon")]
2595+
unsafe fn test_vsqadd_u16() {
2596+
let a = u16x4::new(0, 1, 2, u16::MAX);
2597+
let b = i16x4::new(i16::MIN, -1, 0, 1);
2598+
let e = u16x4::new(0, 0, 2, u16::MAX);
2599+
let r: u16x4 = transmute(vsqadd_u16(transmute(a), transmute(b)));
2600+
assert_eq!(r, e);
2601+
}
2602+
#[simd_test(enable = "neon")]
2603+
unsafe fn test_vsqaddq_u16() {
2604+
let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX);
2605+
let b = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, 3);
2606+
let e = u16x8::new(0, 0, 0, 2, 4, 6, 8, u16::MAX);
2607+
let r: u16x8 = transmute(vsqaddq_u16(transmute(a), transmute(b)));
2608+
assert_eq!(r, e);
2609+
}
2610+
#[simd_test(enable = "neon")]
2611+
unsafe fn test_vsqadd_u32() {
2612+
let a = u32x2::new(0, u32::MAX);
2613+
let b = i32x2::new(i32::MIN, 1);
2614+
let e = u32x2::new(0, u32::MAX);
2615+
let r: u32x2 = transmute(vsqadd_u32(transmute(a), transmute(b)));
2616+
assert_eq!(r, e);
2617+
}
2618+
#[simd_test(enable = "neon")]
2619+
unsafe fn test_vsqaddq_u32() {
2620+
let a = u32x4::new(0, 1, 2, u32::MAX);
2621+
let b = i32x4::new(i32::MIN, -1, 0, 1);
2622+
let e = u32x4::new(0, 0, 2, u32::MAX);
2623+
let r: u32x4 = transmute(vsqaddq_u32(transmute(a), transmute(b)));
2624+
assert_eq!(r, e);
2625+
}
2626+
#[simd_test(enable = "neon")]
2627+
unsafe fn test_vsqadd_u64() {
2628+
let a = u64x1::new(0);
2629+
let b = i64x1::new(i64::MIN);
2630+
let e = u64x1::new(0);
2631+
let r: u64x1 = transmute(vsqadd_u64(transmute(a), transmute(b)));
2632+
assert_eq!(r, e);
2633+
}
2634+
#[simd_test(enable = "neon")]
2635+
unsafe fn test_vsqaddq_u64() {
2636+
let a = u64x2::new(0, u64::MAX);
2637+
let b = i64x2::new(i64::MIN, 1);
2638+
let e = u64x2::new(0, u64::MAX);
2639+
let r: u64x2 = transmute(vsqaddq_u64(transmute(a), transmute(b)));
2640+
assert_eq!(r, e);
2641+
}
2642+
23312643
#[simd_test(enable = "neon")]
23322644
unsafe fn test_vpaddq_s16() {
23332645
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);

0 commit comments

Comments
 (0)