Skip to content

Commit 281da2c

Browse files
committed
Add vrndn neon instructions
This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers.
1 parent d385078 commit 281da2c

File tree

3 files changed

+105
-1
lines changed

3 files changed

+105
-1
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6705,6 +6705,32 @@ pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
67056705
vqabsq_s64_(a)
67066706
}
67076707

6708+
/// Floating-point round to integral, to nearest with ties to even
6709+
#[inline]
6710+
#[target_feature(enable = "neon")]
6711+
#[cfg_attr(test, assert_instr(frintn))]
6712+
pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
6713+
#[allow(improper_ctypes)]
6714+
extern "C" {
6715+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
6716+
fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
6717+
}
6718+
vrndn_f64_(a)
6719+
}
6720+
6721+
/// Floating-point round to integral, to nearest with ties to even
6722+
#[inline]
6723+
#[target_feature(enable = "neon")]
6724+
#[cfg_attr(test, assert_instr(frintn))]
6725+
pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
6726+
#[allow(improper_ctypes)]
6727+
extern "C" {
6728+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
6729+
fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
6730+
}
6731+
vrndnq_f64_(a)
6732+
}
6733+
67086734
#[cfg(test)]
67096735
mod test {
67106736
use super::*;
@@ -12790,4 +12816,20 @@ mod test {
1279012816
let r: i64x2 = transmute(vqabsq_s64(transmute(a)));
1279112817
assert_eq!(r, e);
1279212818
}
12819+
12820+
#[simd_test(enable = "neon")]
12821+
unsafe fn test_vrndn_f64() {
12822+
let a: f64 = -1.5;
12823+
let e: f64 = -2.0;
12824+
let r: f64 = transmute(vrndn_f64(transmute(a)));
12825+
assert_eq!(r, e);
12826+
}
12827+
12828+
#[simd_test(enable = "neon")]
12829+
unsafe fn test_vrndnq_f64() {
12830+
let a: f64x2 = f64x2::new(-1.5, 2.5);
12831+
let e: f64x2 = f64x2::new(-2.0, 2.0);
12832+
let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
12833+
assert_eq!(r, e);
12834+
}
1279312835
}

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11679,6 +11679,38 @@ pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
1167911679
vqabsq_s32_(a)
1168011680
}
1168111681

11682+
/// Floating-point round to integral, to nearest with ties to even
11683+
#[inline]
11684+
#[target_feature(enable = "neon")]
11685+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
11686+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
11687+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
11688+
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
11689+
#[allow(improper_ctypes)]
11690+
extern "C" {
11691+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
11692+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
11693+
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
11694+
}
11695+
vrndn_f32_(a)
11696+
}
11697+
11698+
/// Floating-point round to integral, to nearest with ties to even
11699+
#[inline]
11700+
#[target_feature(enable = "neon")]
11701+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
11702+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
11703+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
11704+
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
11705+
#[allow(improper_ctypes)]
11706+
extern "C" {
11707+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
11708+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
11709+
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
11710+
}
11711+
vrndnq_f32_(a)
11712+
}
11713+
1168211714
#[cfg(test)]
1168311715
#[allow(overflowing_literals)]
1168411716
mod test {
@@ -19883,4 +19915,20 @@ mod test {
1988319915
let r: i32x4 = transmute(vqabsq_s32(transmute(a)));
1988419916
assert_eq!(r, e);
1988519917
}
19918+
19919+
#[simd_test(enable = "neon")]
19920+
unsafe fn test_vrndn_f32() {
19921+
let a: f32x2 = f32x2::new(-1.5, 2.5);
19922+
let e: f32x2 = f32x2::new(-2.0, 2.0);
19923+
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
19924+
assert_eq!(r, e);
19925+
}
19926+
19927+
#[simd_test(enable = "neon")]
19928+
unsafe fn test_vrndnq_f32() {
19929+
let a: f32x4 = f32x4::new(-1.5, 2.5, -3.5, 4.5);
19930+
let e: f32x4 = f32x4::new(-2.0, 2.0, -4.0, 4.0);
19931+
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
19932+
assert_eq!(r, e);
19933+
}
1988619934
}

crates/stdarch-gen/neon.spec

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3901,4 +3901,18 @@ validate MAX, 7
39013901

39023902
aarch64 = sqabs
39033903
link-aarch64 = sqabs._EXT_
3904-
generate int64x*_t
3904+
generate int64x*_t
3905+
3906+
/// Floating-point round to integral, to nearest with ties to even
3907+
name = vrndn
3908+
a = -1.5, 2.5, -3.5, 4.5
3909+
validate -2.0, 2.0, -4.0, 4.0
3910+
3911+
aarch64 = frintn
3912+
link-aarch64 = frintn._EXT_
3913+
generate float64x*_t
3914+
3915+
target = fp-armv8
3916+
arm = vrintn
3917+
link-arm = vrintn._EXT_
3918+
generate float*_t

0 commit comments

Comments
 (0)