Skip to content

Commit 53d89ef

Browse files
authored
[AArch64][Clang][NEON] Remove undefined vcmla intrinsics (#112575)
arm_neon.td currently generates the same 24 `vcmla` intrinsic prototypes for each of the f16, f32, and f64 base types. This is incorrect, the only valid vcmla intrinsics for the f64 base type are: - `vcmlaq_f64` - `vcmlaq_rot90_f64` - `vcmlaq_rot180_f64` - `vcmlaq_rot270_f64` (see ACLE https://github.com/ARM-software/acle/blob/main/neon_intrinsics/advsimd.md) This patch removes the incorrect intrinsic prototypes.
1 parent ad45eb4 commit 53d89ef

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,13 +1968,16 @@ let TargetGuard = "v8.3a,neon" in {
19681968
def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
19691969
def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
19701970

1971-
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
1971+
defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
19721972
}
19731973
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.3a,neon" in {
19741974
def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
19751975
def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
19761976

1977-
defm VCMLA_FP64 : VCMLA_ROTS<"d", "uint64x2_t", "uint64x2_t">;
1977+
def VCMLAQ_FP64 : SInst<"vcmlaq", "QQQQ", "d">;
1978+
def VCMLAQ_ROT90_FP64 : SInst<"vcmlaq_rot90", "QQQQ", "d">;
1979+
def VCMLAQ_ROT180_FP64 : SInst<"vcmlaq_rot180", "QQQQ", "d">;
1980+
def VCMLAQ_ROT270_FP64 : SInst<"vcmlaq_rot270", "QQQQ", "d">;
19781981
}
19791982

19801983
// V8.2-A BFloat intrinsics

clang/test/Sema/aarch64-neon-target.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ __attribute__((target("arch=armv8.3-a+fp16")))
5858
void test_v83(float32x4_t v4f32, float16x4_t v4f16, float64x2_t v2f64) {
5959
vcaddq_rot90_f32(v4f32, v4f32);
6060
vcmla_rot90_f16(v4f16, v4f16, v4f16);
61-
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1);
61+
vcmlaq_rot270_f64(v2f64, v2f64, v2f64);
6262
}
6363

6464
__attribute__((target("arch=armv8.5-a")))
@@ -95,7 +95,7 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t
9595
// 8.3 - complex
9696
vcaddq_rot90_f32(v4f32, v4f32); // expected-error {{always_inline function 'vcaddq_rot90_f32' requires target feature 'v8.3a'}}
9797
vcmla_rot90_f16(v4f16, v4f16, v4f16); // expected-error {{always_inline function 'vcmla_rot90_f16' requires target feature 'v8.3a'}}
98-
vcmlaq_rot270_laneq_f64(v2f64, v2f64, v2f64, 1); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
98+
vcmlaq_rot270_f64(v2f64, v2f64, v2f64); // expected-error {{always_inline function 'vcmlaq_rot270_f64' requires target feature 'v8.3a'}}
9999
// 8.5 - frint
100100
vrnd32xq_f32(v4f32); // expected-error {{always_inline function 'vrnd32xq_f32' requires target feature 'v8.5a'}}
101101

clang/test/Sema/aarch64-vcmla-undef.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify -verify-ignore-unexpected=note %s
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
#include <arm_neon.h>
6+
7+
void test(float64x1_t v1f64, float64x2_t v2f64) {
8+
vcmla_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_f64'}}
9+
vcmla_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_lane_f64'}}
10+
vcmla_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_laneq_f64'}}
11+
vcmlaq_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_lane_f64'}}
12+
vcmlaq_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_laneq_f64'}}
13+
14+
vcmla_rot90_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot90_f64'}}
15+
vcmla_rot90_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_lane_f64'}}
16+
vcmla_rot90_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot90_laneq_f64'}}
17+
vcmlaq_rot90_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_lane_f64'}}
18+
vcmlaq_rot90_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot90_laneq_f64'}}
19+
20+
vcmla_rot180_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot180_f64'}}
21+
vcmla_rot180_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_lane_f64'}}
22+
vcmla_rot180_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot180_laneq_f64'}}
23+
vcmlaq_rot180_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_lane_f64'}}
24+
vcmlaq_rot180_laneq_f64(v2f64, v2f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot180_laneq_f64'}}
25+
26+
vcmla_rot270_f64(v1f64, v1f64, v1f64); // expected-error {{call to undeclared function 'vcmla_rot270_f64'}}
27+
vcmla_rot270_lane_f64(v1f64, v1f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_lane_f64'}}
28+
vcmla_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmla_rot270_laneq_f64'}}
29+
vcmlaq_rot270_lane_f64(v2f64, v2f64, v1f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_lane_f64'}}
30+
vcmlaq_rot270_laneq_f64(v1f64, v1f64, v2f64, 0); // expected-error {{call to undeclared function 'vcmlaq_rot270_laneq_f64'}}
31+
}

0 commit comments

Comments
 (0)