Skip to content

implement different types of parameters and double suffixes in code generator #1083

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,60 @@ pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
vcageq_f64(b, a)
}

/// Floating-point convert to higher precision long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtl))]
pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
simd_cast(a)
}

/// Floating-point convert to higher precision long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtl))]
pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
let b: float32x2_t = simd_shuffle2(a, a, [2, 3]);
simd_cast(b)
}

/// Floating-point convert to lower precision narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtn))]
pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
simd_cast(a)
}

/// Floating-point convert to lower precision narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtn))]
pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
simd_shuffle4(a, simd_cast(b), [0, 1, 2, 3])
}

/// Floating-point convert to lower precision narrow, rounding to odd
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtxn))]
pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
}
vcvtx_f32_f64_(a)
}

/// Floating-point convert to lower precision narrow, rounding to odd
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtxn))]
pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
}

/// Multiply
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -2366,6 +2420,56 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_f64_f32() {
let a: f32x2 = f32x2::new(-1.2, 1.2);
let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_high_f64_f32() {
let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_f32_f64() {
let a: f64x2 = f64x2::new(-1.2, 1.2);
let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_high_f32_f64() {
let a: f32x2 = f32x2::new(-1.2, 1.2);
let b: f64x2 = f64x2::new(-2.3, 3.4);
let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtx_f32_f64() {
let a: f64x2 = f64x2::new(-1.0, 2.0);
let e: f32x2 = f32x2::new(-1.0, 2.0);
let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtx_high_f32_f64() {
let a: f32x2 = f32x2::new(-1.0, 2.0);
let b: f64x2 = f64x2::new(-3.0, 4.0);
let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;
Expand Down
65 changes: 64 additions & 1 deletion crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,7 @@ generate int*_t

/// Unsigned count leading sign bits
name = vclz
multi_fn = transmute, [self-signed-ext, transmute(a)]
multi_fn = transmute, {self-signed-ext, transmute(a)}
a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0

Expand Down Expand Up @@ -589,6 +589,69 @@ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
arm = vacge.s
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

/// Floating-point convert to higher precision long
name = vcvt
double-suffixes
fn = simd_cast
a = -1.2, 1.2
validate -1.2f32 as f64, 1.2f32 as f64

aarch64 = fcvtl
generate float32x2_t:float64x2_t

/// Floating-point convert to higher precision long
name = vcvt_high
double-suffixes
multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3]
multi_fn = simd_cast, b
a = -1.2, 1.2, 2.3, 3.4
validate 2.3f32 as f64, 3.4f32 as f64

aarch64 = fcvtl
generate float32x4_t:float64x2_t

/// Floating-point convert to lower precision narrow
name = vcvt
double-suffixes
fn = simd_cast
a = -1.2, 1.2
validate -1.2f64 as f32, 1.2f64 as f32

aarch64 = fcvtn
generate float64x2_t:float32x2_t

/// Floating-point convert to lower precision narrow
name = vcvt_high
double-suffixes
multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3]
a = -1.2, 1.2
b = -2.3, 3.4
validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32

aarch64 = fcvtn
generate float32x2_t:float64x2_t:float32x4_t

/// Floating-point convert to lower precision narrow, rounding to odd
name = vcvtx
double-suffixes
a = -1.0, 2.0
validate -1.0, 2.0

aarch64 = fcvtxn
link-aarch64 = fcvtxn._EXT2_._EXT_
generate float64x2_t:float32x2_t

/// Floating-point convert to lower precision narrow, rounding to odd
name = vcvtx_high
double-suffixes
multi_fn = simd_shuffle4, a, {vcvtx-doubleself-noext, b}, [0, 1, 2, 3]
a = -1.0, 2.0
b = -3.0, 4.0
validate -1.0, 2.0, -3.0, 4.0

aarch64 = fcvtxn
generate float32x2_t:float64x2_t:float32x4_t

/// Saturating subtract
name = vqsub
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
Expand Down
Loading