Skip to content

Commit a63b0d4

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents 4ae11b3 + daae8f8 commit a63b0d4

File tree

6 files changed

+1494
-286
lines changed

6 files changed

+1494
-286
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,6 +1664,34 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
16641664
vcvtpq_u64_f64_(a)
16651665
}
16661666

1667+
/// Extract vector from pair of vectors
1668+
#[inline]
1669+
#[target_feature(enable = "neon")]
1670+
#[cfg_attr(test, assert_instr(ext, N = 1))]
1671+
#[rustc_legacy_const_generics(2)]
1672+
pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
1673+
static_assert_imm1!(N);
1674+
match N & 0b1 {
1675+
0 => simd_shuffle2(a, b, [0, 1]),
1676+
1 => simd_shuffle2(a, b, [1, 2]),
1677+
_ => unreachable_unchecked(),
1678+
}
1679+
}
1680+
1681+
/// Extract vector from pair of vectors
1682+
#[inline]
1683+
#[target_feature(enable = "neon")]
1684+
#[cfg_attr(test, assert_instr(ext, N = 1))]
1685+
#[rustc_legacy_const_generics(2)]
1686+
pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1687+
static_assert_imm1!(N);
1688+
match N & 0b1 {
1689+
0 => simd_shuffle2(a, b, [0, 1]),
1690+
1 => simd_shuffle2(a, b, [1, 2]),
1691+
_ => unreachable_unchecked(),
1692+
}
1693+
}
1694+
16671695
/// Floating-point multiply-add to accumulator
16681696
#[inline]
16691697
#[target_feature(enable = "neon")]
@@ -2510,6 +2538,71 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25102538
vmaxq_f64_(a, b)
25112539
}
25122540

2541+
/// Floating-point Maximun Number (vector)
2542+
#[inline]
2543+
#[target_feature(enable = "neon")]
2544+
#[cfg_attr(test, assert_instr(fmaxnm))]
2545+
pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
2546+
#[allow(improper_ctypes)]
2547+
extern "C" {
2548+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
2549+
fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
2550+
}
2551+
vmaxnm_f64_(a, b)
2552+
}
2553+
2554+
/// Floating-point Maximun Number (vector)
2555+
#[inline]
2556+
#[target_feature(enable = "neon")]
2557+
#[cfg_attr(test, assert_instr(fmaxnm))]
2558+
pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2559+
#[allow(improper_ctypes)]
2560+
extern "C" {
2561+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
2562+
fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2563+
}
2564+
vmaxnmq_f64_(a, b)
2565+
}
2566+
2567+
/// Floating-point Maximum Number Pairwise (vector).
2568+
#[inline]
2569+
#[target_feature(enable = "neon")]
2570+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2571+
pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
2572+
#[allow(improper_ctypes)]
2573+
extern "C" {
2574+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
2575+
fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
2576+
}
2577+
vpmaxnm_f32_(a, b)
2578+
}
2579+
2580+
/// Floating-point Maximum Number Pairwise (vector).
2581+
#[inline]
2582+
#[target_feature(enable = "neon")]
2583+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2584+
pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2585+
#[allow(improper_ctypes)]
2586+
extern "C" {
2587+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
2588+
fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2589+
}
2590+
vpmaxnmq_f64_(a, b)
2591+
}
2592+
2593+
/// Floating-point Maximum Number Pairwise (vector).
2594+
#[inline]
2595+
#[target_feature(enable = "neon")]
2596+
#[cfg_attr(test, assert_instr(fmaxnmp))]
2597+
pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2598+
#[allow(improper_ctypes)]
2599+
extern "C" {
2600+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
2601+
fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
2602+
}
2603+
vpmaxnmq_f32_(a, b)
2604+
}
2605+
25132606
/// Minimum (vector)
25142607
#[inline]
25152608
#[target_feature(enable = "neon")]
@@ -2536,6 +2629,71 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25362629
vminq_f64_(a, b)
25372630
}
25382631

2632+
/// Floating-point Minimun Number (vector)
2633+
#[inline]
2634+
#[target_feature(enable = "neon")]
2635+
#[cfg_attr(test, assert_instr(fminnm))]
2636+
pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
2637+
#[allow(improper_ctypes)]
2638+
extern "C" {
2639+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
2640+
fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
2641+
}
2642+
vminnm_f64_(a, b)
2643+
}
2644+
2645+
/// Floating-point Minimun Number (vector)
2646+
#[inline]
2647+
#[target_feature(enable = "neon")]
2648+
#[cfg_attr(test, assert_instr(fminnm))]
2649+
pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2650+
#[allow(improper_ctypes)]
2651+
extern "C" {
2652+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
2653+
fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2654+
}
2655+
vminnmq_f64_(a, b)
2656+
}
2657+
2658+
/// Floating-point Minimum Number Pairwise (vector).
2659+
#[inline]
2660+
#[target_feature(enable = "neon")]
2661+
#[cfg_attr(test, assert_instr(fminnmp))]
2662+
pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
2663+
#[allow(improper_ctypes)]
2664+
extern "C" {
2665+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
2666+
fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
2667+
}
2668+
vpminnm_f32_(a, b)
2669+
}
2670+
2671+
/// Floating-point Minimum Number Pairwise (vector).
2672+
#[inline]
2673+
#[target_feature(enable = "neon")]
2674+
#[cfg_attr(test, assert_instr(fminnmp))]
2675+
pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
2676+
#[allow(improper_ctypes)]
2677+
extern "C" {
2678+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
2679+
fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
2680+
}
2681+
vpminnmq_f64_(a, b)
2682+
}
2683+
2684+
/// Floating-point Minimum Number Pairwise (vector).
2685+
#[inline]
2686+
#[target_feature(enable = "neon")]
2687+
#[cfg_attr(test, assert_instr(fminnmp))]
2688+
pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2689+
#[allow(improper_ctypes)]
2690+
extern "C" {
2691+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
2692+
fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
2693+
}
2694+
vpminnmq_f32_(a, b)
2695+
}
2696+
25392697
/// Calculates the square root of each lane.
25402698
#[inline]
25412699
#[target_feature(enable = "neon")]
@@ -5614,6 +5772,24 @@ mod test {
56145772
assert_eq!(r, e);
56155773
}
56165774

5775+
#[simd_test(enable = "neon")]
5776+
unsafe fn test_vextq_p64() {
5777+
let a: i64x2 = i64x2::new(0, 8);
5778+
let b: i64x2 = i64x2::new(9, 11);
5779+
let e: i64x2 = i64x2::new(8, 9);
5780+
let r: i64x2 = transmute(vextq_p64::<1>(transmute(a), transmute(b)));
5781+
assert_eq!(r, e);
5782+
}
5783+
5784+
#[simd_test(enable = "neon")]
5785+
unsafe fn test_vextq_f64() {
5786+
let a: f64x2 = f64x2::new(0., 2.);
5787+
let b: f64x2 = f64x2::new(3., 4.);
5788+
let e: f64x2 = f64x2::new(2., 3.);
5789+
let r: f64x2 = transmute(vextq_f64::<1>(transmute(a), transmute(b)));
5790+
assert_eq!(r, e);
5791+
}
5792+
56175793
#[simd_test(enable = "neon")]
56185794
unsafe fn test_vmla_f64() {
56195795
let a: f64 = 0.;
@@ -6301,6 +6477,51 @@ mod test {
63016477
assert_eq!(r, e);
63026478
}
63036479

6480+
#[simd_test(enable = "neon")]
6481+
unsafe fn test_vmaxnm_f64() {
6482+
let a: f64 = 1.0;
6483+
let b: f64 = 8.0;
6484+
let e: f64 = 8.0;
6485+
let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
6486+
assert_eq!(r, e);
6487+
}
6488+
6489+
#[simd_test(enable = "neon")]
6490+
unsafe fn test_vmaxnmq_f64() {
6491+
let a: f64x2 = f64x2::new(1.0, 2.0);
6492+
let b: f64x2 = f64x2::new(8.0, 16.0);
6493+
let e: f64x2 = f64x2::new(8.0, 16.0);
6494+
let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
6495+
assert_eq!(r, e);
6496+
}
6497+
6498+
#[simd_test(enable = "neon")]
6499+
unsafe fn test_vpmaxnm_f32() {
6500+
let a: f32x2 = f32x2::new(1.0, 2.0);
6501+
let b: f32x2 = f32x2::new(6.0, -3.0);
6502+
let e: f32x2 = f32x2::new(2.0, 6.0);
6503+
let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
6504+
assert_eq!(r, e);
6505+
}
6506+
6507+
#[simd_test(enable = "neon")]
6508+
unsafe fn test_vpmaxnmq_f64() {
6509+
let a: f64x2 = f64x2::new(1.0, 2.0);
6510+
let b: f64x2 = f64x2::new(6.0, -3.0);
6511+
let e: f64x2 = f64x2::new(2.0, 6.0);
6512+
let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
6513+
assert_eq!(r, e);
6514+
}
6515+
6516+
#[simd_test(enable = "neon")]
6517+
unsafe fn test_vpmaxnmq_f32() {
6518+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
6519+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
6520+
let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
6521+
let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
6522+
assert_eq!(r, e);
6523+
}
6524+
63046525
#[simd_test(enable = "neon")]
63056526
unsafe fn test_vmin_f64() {
63066527
let a: f64 = 1.0;
@@ -6319,6 +6540,51 @@ mod test {
63196540
assert_eq!(r, e);
63206541
}
63216542

6543+
#[simd_test(enable = "neon")]
6544+
unsafe fn test_vminnm_f64() {
6545+
let a: f64 = 1.0;
6546+
let b: f64 = 8.0;
6547+
let e: f64 = 1.0;
6548+
let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
6549+
assert_eq!(r, e);
6550+
}
6551+
6552+
#[simd_test(enable = "neon")]
6553+
unsafe fn test_vminnmq_f64() {
6554+
let a: f64x2 = f64x2::new(1.0, 2.0);
6555+
let b: f64x2 = f64x2::new(8.0, 16.0);
6556+
let e: f64x2 = f64x2::new(1.0, 2.0);
6557+
let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
6558+
assert_eq!(r, e);
6559+
}
6560+
6561+
#[simd_test(enable = "neon")]
6562+
unsafe fn test_vpminnm_f32() {
6563+
let a: f32x2 = f32x2::new(1.0, 2.0);
6564+
let b: f32x2 = f32x2::new(6.0, -3.0);
6565+
let e: f32x2 = f32x2::new(1.0, -3.0);
6566+
let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
6567+
assert_eq!(r, e);
6568+
}
6569+
6570+
#[simd_test(enable = "neon")]
6571+
unsafe fn test_vpminnmq_f64() {
6572+
let a: f64x2 = f64x2::new(1.0, 2.0);
6573+
let b: f64x2 = f64x2::new(6.0, -3.0);
6574+
let e: f64x2 = f64x2::new(1.0, -3.0);
6575+
let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
6576+
assert_eq!(r, e);
6577+
}
6578+
6579+
#[simd_test(enable = "neon")]
6580+
unsafe fn test_vpminnmq_f32() {
6581+
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
6582+
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
6583+
let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
6584+
let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
6585+
assert_eq!(r, e);
6586+
}
6587+
63226588
#[simd_test(enable = "neon")]
63236589
unsafe fn test_vsqrt_f32() {
63246590
let a: f32x2 = f32x2::new(4.0, 9.0);

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::{
1515
};
1616
#[cfg(test)]
1717
use stdarch_test::assert_instr;
18+
use core::hint::unreachable_unchecked;
1819

1920
types! {
2021
/// ARM-specific 64-bit wide vector of one packed `f64`.
@@ -1427,6 +1428,29 @@ pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
14271428
vpmaxq_f64_(a, b)
14281429
}
14291430

1431+
/// Extract vector from pair of vectors
1432+
#[inline]
1433+
#[target_feature(enable = "neon")]
1434+
#[cfg_attr(test, assert_instr(str, N = 0))]
1435+
#[rustc_legacy_const_generics(2)]
1436+
pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
1437+
if N != 0 {
1438+
unreachable_unchecked()
1439+
}
1440+
a
1441+
}
1442+
1443+
/// Extract vector from pair of vectors
1444+
#[inline]
1445+
#[target_feature(enable = "neon")]
1446+
#[cfg_attr(test, assert_instr(str, N = 0))]
1447+
#[rustc_legacy_const_generics(2)]
1448+
pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
1449+
if N != 0 {
1450+
unreachable_unchecked()
1451+
}
1452+
a
1453+
}
14301454
/// Vector combine
14311455
#[inline]
14321456
#[target_feature(enable = "neon")]
@@ -3470,6 +3494,24 @@ mod tests {
34703494
assert_eq!(r, e);
34713495
}
34723496

3497+
#[simd_test(enable = "neon")]
3498+
unsafe fn test_vext_p64() {
3499+
let a: i64x1 = i64x1::new(0);
3500+
let b: i64x1 = i64x1::new(1);
3501+
let e: i64x1 = i64x1::new(0);
3502+
let r: i64x1 = transmute(vext_p64::<0>(transmute(a), transmute(b)));
3503+
assert_eq!(r, e);
3504+
}
3505+
3506+
#[simd_test(enable = "neon")]
3507+
unsafe fn test_vext_f64() {
3508+
let a: f64x1 = f64x1::new(0.);
3509+
let b: f64x1 = f64x1::new(1.);
3510+
let e: f64x1 = f64x1::new(0.);
3511+
let r: f64x1 = transmute(vext_f64::<0>(transmute(a), transmute(b)));
3512+
assert_eq!(r, e);
3513+
}
3514+
34733515
macro_rules! test_vcombine {
34743516
($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
34753517
#[allow(unused_assignments)]

0 commit comments

Comments
 (0)