Skip to content

Commit ed04262

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents f866c0a + 437b3f8 commit ed04262

File tree

5 files changed

+123
-3
lines changed

5 files changed

+123
-3
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,6 +2784,32 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
27842784
vmull_p8(a, b)
27852785
}
27862786

2787+
/// Floating-point fused Multiply-Add to accumulator(vector)
2788+
#[inline]
2789+
#[target_feature(enable = "neon")]
2790+
#[cfg_attr(test, assert_instr(fmadd))]
2791+
pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
2792+
#[allow(improper_ctypes)]
2793+
extern "C" {
2794+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v1f64")]
2795+
fn vfma_f64_(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
2796+
}
2797+
vfma_f64_(a, b, c)
2798+
}
2799+
2800+
/// Floating-point fused Multiply-Add to accumulator(vector)
2801+
#[inline]
2802+
#[target_feature(enable = "neon")]
2803+
#[cfg_attr(test, assert_instr(fmla))]
2804+
pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
2805+
#[allow(improper_ctypes)]
2806+
extern "C" {
2807+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f64")]
2808+
fn vfmaq_f64_(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
2809+
}
2810+
vfmaq_f64_(a, b, c)
2811+
}
2812+
27872813
/// Divide
27882814
#[inline]
27892815
#[target_feature(enable = "neon")]
@@ -7233,6 +7259,26 @@ mod test {
72337259
assert_eq!(r, e);
72347260
}
72357261

7262+
#[simd_test(enable = "neon")]
7263+
unsafe fn test_vfma_f64() {
7264+
let a: f64 = 2.0;
7265+
let b: f64 = 6.0;
7266+
let c: f64 = 8.0;
7267+
let e: f64 = 20.0;
7268+
let r: f64 = transmute(vfma_f64(transmute(a), transmute(b), transmute(c)));
7269+
assert_eq!(r, e);
7270+
}
7271+
7272+
#[simd_test(enable = "neon")]
7273+
unsafe fn test_vfmaq_f64() {
7274+
let a: f64x2 = f64x2::new(2.0, 3.0);
7275+
let b: f64x2 = f64x2::new(6.0, 4.0);
7276+
let c: f64x2 = f64x2::new(8.0, 18.0);
7277+
let e: f64x2 = f64x2::new(20.0, 30.0);
7278+
let r: f64x2 = transmute(vfmaq_f64(transmute(a), transmute(b), transmute(c)));
7279+
assert_eq!(r, e);
7280+
}
7281+
72367282
#[simd_test(enable = "neon")]
72377283
unsafe fn test_vdiv_f32() {
72387284
let a: f32x2 = f32x2::new(2.0, 6.0);

crates/core_arch/src/acle/neon/generated.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4706,6 +4706,38 @@ pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
47064706
vmull_p8_(a, b)
47074707
}
47084708

4709+
/// Floating-point fused Multiply-Add to accumulator(vector)
4710+
#[inline]
4711+
#[target_feature(enable = "neon")]
4712+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4713+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
4714+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
4715+
pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
4716+
#[allow(improper_ctypes)]
4717+
extern "C" {
4718+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
4719+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
4720+
fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
4721+
}
4722+
vfma_f32_(a, b, c)
4723+
}
4724+
4725+
/// Floating-point fused Multiply-Add to accumulator(vector)
4726+
#[inline]
4727+
#[target_feature(enable = "neon")]
4728+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4729+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
4730+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))]
4731+
pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
4732+
#[allow(improper_ctypes)]
4733+
extern "C" {
4734+
#[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
4735+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
4736+
fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
4737+
}
4738+
vfmaq_f32_(a, b, c)
4739+
}
4740+
47094741
/// Subtract
47104742
#[inline]
47114743
#[target_feature(enable = "neon")]
@@ -12642,6 +12674,26 @@ mod test {
1264212674
assert_eq!(r, e);
1264312675
}
1264412676

12677+
#[simd_test(enable = "neon")]
12678+
unsafe fn test_vfma_f32() {
12679+
let a: f32x2 = f32x2::new(2.0, 3.0);
12680+
let b: f32x2 = f32x2::new(6.0, 4.0);
12681+
let c: f32x2 = f32x2::new(8.0, 18.0);
12682+
let e: f32x2 = f32x2::new(20.0, 30.0);
12683+
let r: f32x2 = transmute(vfma_f32(transmute(a), transmute(b), transmute(c)));
12684+
assert_eq!(r, e);
12685+
}
12686+
12687+
#[simd_test(enable = "neon")]
12688+
unsafe fn test_vfmaq_f32() {
12689+
let a: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0);
12690+
let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0);
12691+
let c: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0);
12692+
let e: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0);
12693+
let r: f32x4 = transmute(vfmaq_f32(transmute(a), transmute(b), transmute(c)));
12694+
assert_eq!(r, e);
12695+
}
12696+
1264512697
#[simd_test(enable = "neon")]
1264612698
unsafe fn test_vsub_s8() {
1264712699
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

crates/core_arch/src/x86/sse.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 {
148148
rcpps(a)
149149
}
150150

151-
/// Returns the approximate reciprocal square root of the fist single-precision
152-
/// (32-bit) floating-point elements in `a`, the other elements are unchanged.
151+
/// Returns the approximate reciprocal square root of the first single-precision
152+
/// (32-bit) floating-point element in `a`, the other elements are unchanged.
153153
///
154154
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss)
155155
#[inline]

crates/stdarch-gen/neon.spec

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,28 @@ validate 9, 30, 11, 20, 13, 18, 15, 48
15441544
aarch64 = pmull
15451545
generate poly8x16_t:poly8x16_t:poly16x8_t
15461546

1547+
/// Floating-point fused Multiply-Add to accumulator(vector)
1548+
name = vfma
1549+
a = 2.0, 3.0, 4.0, 5.0
1550+
b = 6.0, 4.0, 7.0, 8.0
1551+
c = 8.0, 18.0, 12.0, 10.0
1552+
validate 20.0, 30.0, 40.0, 50.0
1553+
1554+
aarch64 = fmadd
1555+
link-aarch64 = llvm.fma._EXT_
1556+
generate float64x1_t
1557+
1558+
aarch64 = fmla
1559+
link-aarch64 = llvm.fma._EXT_
1560+
generate float64x2_t
1561+
1562+
target = fp-armv8
1563+
arm = vfma
1564+
aarch64 = fmla
1565+
link-arm = llvm.fma._EXT_
1566+
link-aarch64 = llvm.fma._EXT_
1567+
generate float*_t
1568+
15471569
/// Divide
15481570
name = vdiv
15491571
fn = simd_div

crates/stdarch-gen/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,7 @@ fn gen_arm(
12381238
),
12391239
(0, 3, _) => format!(
12401240
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
1241-
{}{}(a, b)
1241+
{}{}(a, b, c)
12421242
}}"#,
12431243
name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn,
12441244
),

0 commit comments

Comments
 (0)