Skip to content

Commit 08e4e5e

Browse files
committed
Split out a separate implementation for aarch64
The ARM implementation uses fptoi that has undefined behaviour for out of range data. Clang has the same problem: https://llvm.org/PR47510
1 parent 8da5575 commit 08e4e5e

File tree

2 files changed

+67
-26
lines changed

2 files changed

+67
-26
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ extern "C" {
285285
b3: int8x16_t,
286286
c: uint8x16_t,
287287
) -> int8x16_t;
288+
289+
#[link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32"]
290+
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
291+
#[link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32"]
292+
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
288293
}
289294

290295
/// Absolute Value (wrapping).
@@ -1838,6 +1843,21 @@ pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
18381843
))
18391844
}
18401845

1846+
#[inline]
1847+
#[target_feature(enable = "neon")]
1848+
#[cfg_attr(test, assert_instr(fcvtzs))]
1849+
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
1850+
vcvtq_s32_f32_(a)
1851+
}
1852+
1853+
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
1854+
#[inline]
1855+
#[target_feature(enable = "neon")]
1856+
#[cfg_attr(test, assert_instr(fcvtzu))]
1857+
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
1858+
vcvtq_u32_f32_(a)
1859+
}
1860+
18411861
#[cfg(test)]
18421862
mod tests {
18431863
use crate::core_arch::aarch64::test_support::*;
@@ -1846,6 +1866,42 @@ mod tests {
18461866
use std::mem::transmute;
18471867
use stdarch_test::simd_test;
18481868

1869+
#[simd_test(enable = "neon")]
1870+
unsafe fn test_vcvtq_s32_f32() {
1871+
let f = f32x4::new(-1., 2., 3., 4.);
1872+
let e = i32x4::new(-1, 2, 3, 4);
1873+
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
1874+
assert_eq!(r, e);
1875+
1876+
let f = f32x4::new(10e37, 2., 3., 4.);
1877+
let e = i32x4::new(0x7fffffff, 2, 3, 4);
1878+
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1879+
assert_eq!(r, e);
1880+
1881+
let f = f32x4::new(-10e37, 2., 3., 4.);
1882+
let e = i32x4::new(-0x80000000, 2, 3, 4);
1883+
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1884+
assert_eq!(r, e);
1885+
}
1886+
1887+
#[simd_test(enable = "neon")]
1888+
unsafe fn test_vcvtq_u32_f32() {
1889+
let f = f32x4::new(1., 2., 3., 4.);
1890+
let e = u32x4::new(1, 2, 3, 4);
1891+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1892+
assert_eq!(r, e);
1893+
1894+
let f = f32x4::new(-1., 2., 3., 4.);
1895+
let e = u32x4::new(0, 2, 3, 4);
1896+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1897+
assert_eq!(r, e);
1898+
1899+
let f = f32x4::new(10e37, 2., 3., 4.);
1900+
let e = u32x4::new(0xffffffff, 2, 3, 4);
1901+
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1902+
assert_eq!(r, e);
1903+
}
1904+
18491905
#[simd_test(enable = "neon")]
18501906
unsafe fn test_vld1q_f32() {
18511907
let e = f32x4::new(1., 2., 3., 4.);

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,23 +1813,26 @@ pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
18131813
transmute(f32x4::new(v, v, v, v))
18141814
}
18151815

1816+
// These float-to-int implementations have undefined behaviour when `a` overflows
1817+
// the destination type. Clang has the same problem: https://llvm.org/PR47510
1818+
18161819
/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
18171820
#[inline]
1821+
#[cfg(target_arch = "arm")]
18181822
#[target_feature(enable = "neon")]
1819-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1820-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.s32.f32"))]
1821-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
1823+
#[target_feature(enable = "v7")]
1824+
#[cfg_attr(test, assert_instr("vcvt.s32.f32"))]
18221825
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
18231826
use crate::core_arch::simd::{f32x4, i32x4};
18241827
transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a)))
18251828
}
18261829

18271830
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
18281831
#[inline]
1832+
#[cfg(target_arch = "arm")]
18291833
#[target_feature(enable = "neon")]
1830-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1831-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcvt.u32.f32"))]
1832-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
1834+
#[target_feature(enable = "v7")]
1835+
#[cfg_attr(test, assert_instr("vcvt.u32.f32"))]
18331836
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
18341837
use crate::core_arch::simd::{f32x4, u32x4};
18351838
transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a)))
@@ -1900,40 +1903,22 @@ mod tests {
19001903
assert_eq!(r, e);
19011904
}
19021905

1906+
#[cfg(target_arch = "arm")]
19031907
#[simd_test(enable = "neon")]
19041908
unsafe fn test_vcvtq_s32_f32() {
19051909
let f = f32x4::new(-1., 2., 3., 4.);
19061910
let e = i32x4::new(-1, 2, 3, 4);
19071911
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
19081912
assert_eq!(r, e);
1909-
1910-
let f = f32x4::new(10e37, 2., 3., 4.);
1911-
let e = i32x4::new(0x7fffffff, 2, 3, 4);
1912-
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1913-
assert_eq!(r, e);
1914-
1915-
let f = f32x4::new(-10e37, 2., 3., 4.);
1916-
let e = i32x4::new(-0x80000000, 2, 3, 4);
1917-
let r: i32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1918-
assert_eq!(r, e);
19191913
}
19201914

1915+
#[cfg(target_arch = "arm")]
19211916
#[simd_test(enable = "neon")]
19221917
unsafe fn test_vcvtq_u32_f32() {
19231918
let f = f32x4::new(1., 2., 3., 4.);
19241919
let e = u32x4::new(1, 2, 3, 4);
19251920
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
19261921
assert_eq!(r, e);
1927-
1928-
let f = f32x4::new(-1., 2., 3., 4.);
1929-
let e = u32x4::new(0, 2, 3, 4);
1930-
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1931-
assert_eq!(r, e);
1932-
1933-
let f = f32x4::new(10e37, 2., 3., 4.);
1934-
let e = u32x4::new(0xffffffff, 2, 3, 4);
1935-
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
1936-
assert_eq!(r, e);
19371922
}
19381923

19391924
#[simd_test(enable = "neon")]

0 commit comments

Comments
 (0)