Add vcvtq_u32_f32 and vcvtq_s32_f32

jrmuizel · jrmuizel · commit e5e85f386d1b · 2020-09-10T10:07:11.000-04:00
These intrinsics are implemented differently for aarch64 and arm
in clang. i.e. aarch64 uses the llvm.aarch64.neon.fcvtzs.v4i32.v4f32
intrinsic. However, there didn't seem to be any advantage to using
that intrinsic instead of just sharing code.
diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs
@@ -1813,6 +1813,28 @@ pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
     transmute(f32x4::new(v, v, v, v))
 }
 
+/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.s32.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
+pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
+    use crate::core_arch::simd::{f32x4, i32x4};
+    transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
+}
+
+/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.u32.f32"))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
+pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    use crate::core_arch::simd::{f32x4, u32x4};
+    transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1878,6 +1900,22 @@ mod tests {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn vcvtq_s32_f32() {
+        let e = i32x4::new(-1, 2, 3, 4);
+        let f = f32x4::new(-1., 2., 3., 4.);
+        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn vcvtq_u32_f32() {
+        let e = u32x4::new(1, 2, 3, 4);
+        let f = f32x4::new(1., 2., 3., 4.);
+        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vget_lane_u8() {
         let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);