Skip to content

Commit e5e85f3

Browse files
committed
Add vcvtq_u32_f32 and vcvtq_s32_f32
These intrinsics are implemented differently for aarch64 and arm in clang. i.e. aarch64 uses the llvm.aarch64.neon.fcvtzs.v4i32.v4f32 intrinsic. However, there didn't seem to be any advantage to using that intrinsic instead of just sharing code.
1 parent 8503943 commit e5e85f3

File tree

1 file changed

+38
-0
lines changed
  • crates/core_arch/src/arm/neon

1 file changed

+38
-0
lines changed

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,6 +1813,28 @@ pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
18131813
transmute(f32x4::new(v, v, v, v))
18141814
}
18151815

1816+
/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
1817+
#[inline]
1818+
#[target_feature(enable = "neon")]
1819+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1820+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.s32.f32"))]
1821+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
1822+
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1823+
use crate::core_arch::simd::{f32x4, i32x4};
1824+
transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
1825+
}
1826+
1827+
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1828+
#[inline]
1829+
#[target_feature(enable = "neon")]
1830+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1831+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.u32.f32"))]
1832+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
1833+
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1834+
use crate::core_arch::simd::{f32x4, u32x4};
1835+
transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
1836+
}
1837+
18161838
#[cfg(test)]
18171839
mod tests {
18181840
use super::*;
@@ -1878,6 +1900,22 @@ mod tests {
18781900
assert_eq!(r, e);
18791901
}
18801902

1903+
#[simd_test(enable = "neon")]
1904+
unsafe fn vcvtq_s32_f32() {
1905+
let e = i32x4::new(-1, 2, 3, 4);
1906+
let f = f32x4::new(-1., 2., 3., 4.);
1907+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1908+
assert_eq!(r, e);
1909+
}
1910+
1911+
#[simd_test(enable = "neon")]
1912+
unsafe fn vcvtq_u32_f32() {
1913+
let e = u32x4::new(1, 2, 3, 4);
1914+
let f = f32x4::new(1., 2., 3., 4.);
1915+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1916+
assert_eq!(r, e);
1917+
}
1918+
18811919
#[simd_test(enable = "neon")]
18821920
unsafe fn test_vget_lane_u8() {
18831921
let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

0 commit comments

Comments
 (0)