Add vrndn neon instructions

CryZe · CryZe · commit 8a682cb69826 · 2021-03-17T16:20:03.000+01:00
This adds the neon instructions for lane-wise rounding without actually
converting the lanes to integers.
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -1833,6 +1833,58 @@ pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
     vrecpeq_f64_(a)
 }
 
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
+        fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrndn_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
+        fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrndnq_f64_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
+        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
+    }
+    vrndn_f32_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
+        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+    vrndnq_f32_(a)
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
@@ -3449,4 +3501,36 @@ mod test {
         let r: f64x2 = transmute(vrecpeq_f64(transmute(a)));
         assert_eq!(r, e);
     }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndn_f64() {
+        let a: f64 = -1.5;
+        let e: f64 = -2.0;
+        let r: f64 = transmute(vrndn_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndnq_f64() {
+        let a: f64x2 = f64x2::new(-1.5, 2.5);
+        let e: f64x2 = f64x2::new(-2.0, 2.0);
+        let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndn_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 2.5);
+        let e: f32x2 = f32x2::new(-2.0, 2.0);
+        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndnq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 2.5, -3.5, 4.5);
+        let e: f32x4 = f32x4::new(-2.0, 2.0, -4.0, 4.0);
+        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
 }
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
@@ -1061,3 +1061,12 @@ generate float64x*_t
 arm = vrecpe
 link-arm = vrecpe._EXT_
 generate float*_t
+
+/// Floating-point round to integral, to nearest with ties to even
+name = vrndn
+a = -1.5, 2.5, -3.5, 4.5
+validate -2.0, 2.0, -4.0, 4.0
+
+aarch64 = frintn
+link-aarch64 = frintn._EXT_
+generate float64x*_t, float*_t