Skip to content

Commit 8a682cb

Browse files
committed
Add vrndn neon instructions
This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers.
1 parent 37d9253 commit 8a682cb

File tree

2 files changed

+93
-0
lines changed

2 files changed

+93
-0
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,58 @@ pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
18331833
vrecpeq_f64_(a)
18341834
}
18351835

1836+
/// Floating-point round to integral, to nearest with ties to even
1837+
#[inline]
1838+
#[target_feature(enable = "neon")]
1839+
#[cfg_attr(test, assert_instr(frintn))]
1840+
pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
1841+
#[allow(improper_ctypes)]
1842+
extern "C" {
1843+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
1844+
fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
1845+
}
1846+
vrndn_f64_(a)
1847+
}
1848+
1849+
/// Floating-point round to integral, to nearest with ties to even
1850+
#[inline]
1851+
#[target_feature(enable = "neon")]
1852+
#[cfg_attr(test, assert_instr(frintn))]
1853+
pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
1854+
#[allow(improper_ctypes)]
1855+
extern "C" {
1856+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
1857+
fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
1858+
}
1859+
vrndnq_f64_(a)
1860+
}
1861+
1862+
/// Floating-point round to integral, to nearest with ties to even
1863+
#[inline]
1864+
#[target_feature(enable = "neon")]
1865+
#[cfg_attr(test, assert_instr(frintn))]
1866+
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
1867+
#[allow(improper_ctypes)]
1868+
extern "C" {
1869+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
1870+
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
1871+
}
1872+
vrndn_f32_(a)
1873+
}
1874+
1875+
/// Floating-point round to integral, to nearest with ties to even
1876+
#[inline]
1877+
#[target_feature(enable = "neon")]
1878+
#[cfg_attr(test, assert_instr(frintn))]
1879+
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
1880+
#[allow(improper_ctypes)]
1881+
extern "C" {
1882+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
1883+
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
1884+
}
1885+
vrndnq_f32_(a)
1886+
}
1887+
18361888
#[cfg(test)]
18371889
mod test {
18381890
use super::*;
@@ -3449,4 +3501,36 @@ mod test {
34493501
let r: f64x2 = transmute(vrecpeq_f64(transmute(a)));
34503502
assert_eq!(r, e);
34513503
}
3504+
3505+
#[simd_test(enable = "neon")]
3506+
unsafe fn test_vrndn_f64() {
3507+
let a: f64 = -1.5;
3508+
let e: f64 = -2.0;
3509+
let r: f64 = transmute(vrndn_f64(transmute(a)));
3510+
assert_eq!(r, e);
3511+
}
3512+
3513+
#[simd_test(enable = "neon")]
3514+
unsafe fn test_vrndnq_f64() {
3515+
let a: f64x2 = f64x2::new(-1.5, 2.5);
3516+
let e: f64x2 = f64x2::new(-2.0, 2.0);
3517+
let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
3518+
assert_eq!(r, e);
3519+
}
3520+
3521+
#[simd_test(enable = "neon")]
3522+
unsafe fn test_vrndn_f32() {
3523+
let a: f32x2 = f32x2::new(-1.5, 2.5);
3524+
let e: f32x2 = f32x2::new(-2.0, 2.0);
3525+
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
3526+
assert_eq!(r, e);
3527+
}
3528+
3529+
#[simd_test(enable = "neon")]
3530+
unsafe fn test_vrndnq_f32() {
3531+
let a: f32x4 = f32x4::new(-1.5, 2.5, -3.5, 4.5);
3532+
let e: f32x4 = f32x4::new(-2.0, 2.0, -4.0, 4.0);
3533+
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
3534+
assert_eq!(r, e);
3535+
}
34523536
}

crates/stdarch-gen/neon.spec

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,12 @@ generate float64x*_t
10611061
arm = vrecpe
10621062
link-arm = vrecpe._EXT_
10631063
generate float*_t
1064+
1065+
/// Floating-point round to integral, to nearest with ties to even
1066+
name = vrndn
1067+
a = -1.5, 2.5, -3.5, 4.5
1068+
validate -2.0, 2.0, -4.0, 4.0
1069+
1070+
aarch64 = frintn
1071+
link-aarch64 = frintn._EXT_
1072+
generate float64x*_t, float*_t

0 commit comments

Comments
 (0)