Skip to content

Commit 87f1a5a

Browse files
[AArch64] Implement NEON vamin/vamax intrinsics
This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in ARM-software/acle#324 Co-authored-by: Hassnaa Hamdi <[email protected]>
1 parent 2f0661c commit 87f1a5a

File tree

7 files changed

+257
-2
lines changed

7 files changed

+257
-2
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2096,3 +2096,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r
20962096
def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
20972097
def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
20982098
}
2099+
2100+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "faminmax" in {
2101+
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
2102+
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
2103+
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13481,6 +13481,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1348113481
Int = Intrinsic::aarch64_neon_suqadd;
1348213482
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
1348313483
}
13484+
13485+
case NEON::BI__builtin_neon_vamin_f16:
13486+
case NEON::BI__builtin_neon_vaminq_f16:
13487+
case NEON::BI__builtin_neon_vamin_f32:
13488+
case NEON::BI__builtin_neon_vaminq_f32:
13489+
case NEON::BI__builtin_neon_vaminq_f64: {
13490+
Int = Intrinsic::aarch64_neon_famin;
13491+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13492+
}
13493+
case NEON::BI__builtin_neon_vamax_f16:
13494+
case NEON::BI__builtin_neon_vamaxq_f16:
13495+
case NEON::BI__builtin_neon_vamax_f32:
13496+
case NEON::BI__builtin_neon_vamaxq_f32:
13497+
case NEON::BI__builtin_neon_vamaxq_f64: {
13498+
Int = Intrinsic::aarch64_neon_famax;
13499+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13500+
}
1348413501
}
1348513502
}
1348613503

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
#include <arm_neon.h>
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s
6+
7+
// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
8+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
11+
// CHECK-NEXT: ret <4 x half> [[FAMIN2_I]]
12+
//
13+
float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
14+
return vamin_f16(vn, vm);
15+
}
16+
17+
// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
18+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
19+
// CHECK-NEXT: [[ENTRY:.*:]]
20+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
21+
// CHECK-NEXT: ret <8 x half> [[FAMIN2_I]]
22+
//
23+
float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
24+
return vaminq_f16(vn, vm);
25+
26+
}
27+
28+
// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
29+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
32+
// CHECK-NEXT: ret <2 x float> [[FAMIN2_I]]
33+
//
34+
float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
35+
return vamin_f32(vn, vm);
36+
37+
}
38+
39+
// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
40+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
41+
// CHECK-NEXT: [[ENTRY:.*:]]
42+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
43+
// CHECK-NEXT: ret <4 x float> [[FAMIN2_I]]
44+
//
45+
float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
46+
return vaminq_f32(vn, vm);
47+
48+
}
49+
50+
// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
51+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
52+
// CHECK-NEXT: [[ENTRY:.*:]]
53+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
54+
// CHECK-NEXT: ret <2 x double> [[FAMIN2_I]]
55+
//
56+
float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
57+
return vaminq_f64(vn, vm);
58+
}
59+
60+
61+
// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
62+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
63+
// CHECK-NEXT: [[ENTRY:.*:]]
64+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
65+
// CHECK-NEXT: ret <4 x half> [[FAMAX2_I]]
66+
//
67+
float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
68+
return vamax_f16(vn, vm);
69+
}
70+
71+
// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
72+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
73+
// CHECK-NEXT: [[ENTRY:.*:]]
74+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
75+
// CHECK-NEXT: ret <8 x half> [[FAMAX2_I]]
76+
//
77+
float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
78+
return vamaxq_f16(vn, vm);
79+
80+
}
81+
82+
// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
83+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
84+
// CHECK-NEXT: [[ENTRY:.*:]]
85+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
86+
// CHECK-NEXT: ret <2 x float> [[FAMAX2_I]]
87+
//
88+
float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
89+
return vamax_f32(vn, vm);
90+
91+
}
92+
93+
// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
94+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
95+
// CHECK-NEXT: [[ENTRY:.*:]]
96+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
97+
// CHECK-NEXT: ret <4 x float> [[FAMAX2_I]]
98+
//
99+
float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
100+
return vamaxq_f32(vn, vm);
101+
102+
}
103+
104+
// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
105+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
106+
// CHECK-NEXT: [[ENTRY:.*:]]
107+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
108+
// CHECK-NEXT: ret <2 x double> [[FAMAX2_I]]
109+
//
110+
float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
111+
return vamaxq_f64(vn, vm);
112+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3730,3 +3730,6 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic
37303730
def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
37313731
def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
37323732

3733+
// Neon absolute maximum and minimum
3734+
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
3735+
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5992,6 +5992,26 @@ multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
59925992
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
59935993
}
59945994

5995+
let mayRaiseFPException = 1, Uses = [FPCR] in
5996+
multiclass SIMDThreeVectorFP<bit U, bit S, bits<3> opc,
5997+
string asm, SDPatternOperator OpNode> {
5998+
def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
5999+
asm, ".4h",
6000+
[(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4i16 V64:$Rm)))]>;
6001+
def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
6002+
asm, ".8h",
6003+
[(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8i16 V128:$Rm)))]>;
6004+
def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
6005+
asm, ".2s",
6006+
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2i32 V64:$Rm)))]>;
6007+
def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
6008+
asm, ".4s",
6009+
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4i32 V128:$Rm)))]>;
6010+
def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
6011+
asm, ".2d",
6012+
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2i64 V128:$Rm)))]>;
6013+
}
6014+
59956015
let mayRaiseFPException = 1, Uses = [FPCR] in
59966016
multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
59976017
string asm,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10091,13 +10091,15 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
1009110091
// fminimum(abs(a), abs(b)) -> famin(a, b)
1009210092
// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
1009310093
def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
10094-
[(fminimum (fabs node:$Rn), (fabs node:$Rm)),
10094+
[(int_aarch64_neon_famin node:$Rn, node:$Rm),
10095+
(fminimum (fabs node:$Rn), (fabs node:$Rm)),
1009510096
(fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1009610097

1009710098
// fmaximum(abs(a), abs(b)) -> famax(a, b)
1009810099
// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
1009910100
def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
10100-
[(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
10101+
[(int_aarch64_neon_famax node:$Rn, node:$Rm),
10102+
(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
1010110103
(fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1010210104

1010310105
let Predicates = [HasNEON, HasFAMINMAX] in {
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
define <4 x half> @test_famin_f16(<4 x half> %vn, <4 x half> %vm) #0 {
7+
; CHECK-LABEL: test_famin_f16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
10+
; CHECK-NEXT: ret
11+
%res = call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> %vn, <4 x half> %vm)
12+
ret <4 x half> %res
13+
}
14+
15+
define <8 x half> @test_famin2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
16+
; CHECK-LABEL: test_famin2_f16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
19+
; CHECK-NEXT: ret
20+
%res = call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> %vn, <8 x half> %vm)
21+
ret <8 x half> %res
22+
}
23+
24+
define <2 x float> @test_famin_f32(<2 x float> %vn, <2 x float> %vm) #0 {
25+
; CHECK-LABEL: test_famin_f32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
28+
; CHECK-NEXT: ret
29+
%res = call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> %vn, <2 x float> %vm)
30+
ret <2 x float> %res
31+
}
32+
33+
define <4 x float> @test_famin2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
34+
; CHECK-LABEL: test_famin2_f32:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
37+
; CHECK-NEXT: ret
38+
%res = call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> %vn, <4 x float> %vm)
39+
ret <4 x float> %res
40+
}
41+
42+
define <2 x double> @test_famin_f64(<2 x double> %vn, <2 x double> %vm) #0 {
43+
; CHECK-LABEL: test_famin_f64:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
46+
; CHECK-NEXT: ret
47+
%res = call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> %vn, <2 x double> %vm)
48+
ret <2 x double> %res
49+
}
50+
51+
define <4 x half> @test_famax_f16(<4 x half> %vn, <4 x half> %vm) #0 {
52+
; CHECK-LABEL: test_famax_f16:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
55+
; CHECK-NEXT: ret
56+
%res = call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> %vn, <4 x half> %vm)
57+
ret <4 x half> %res
58+
}
59+
60+
define <8 x half> @test_famax2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
61+
; CHECK-LABEL: test_famax2_f16:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
64+
; CHECK-NEXT: ret
65+
%res = call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> %vn, <8 x half> %vm)
66+
ret <8 x half> %res
67+
}
68+
69+
define <2 x float> @test_famax_f32(<2 x float> %vn, <2 x float> %vm) #0 {
70+
; CHECK-LABEL: test_famax_f32:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
73+
; CHECK-NEXT: ret
74+
%res = call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> %vn, <2 x float> %vm)
75+
ret <2 x float> %res
76+
}
77+
78+
define <4 x float> @test_famax2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
79+
; CHECK-LABEL: test_famax2_f32:
80+
; CHECK: // %bb.0:
81+
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
82+
; CHECK-NEXT: ret
83+
%res = call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> %vn, <4 x float> %vm)
84+
ret <4 x float> %res
85+
}
86+
87+
define <2 x double> @test_famax_f64(<2 x double> %vn, <2 x double> %vm) #0 {
88+
; CHECK-LABEL: test_famax_f64:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
91+
; CHECK-NEXT: ret
92+
%res = call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> %vn, <2 x double> %vm)
93+
ret <2 x double> %res
94+
}
95+
96+
attributes #0 = { "target-features"="+neon,+faminmax" }

0 commit comments

Comments
 (0)