Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 398ab5f

Browse files
committed
[AArch64] Split the neon.addp intrinsic into integer and fp variants.
This is the result of discussions on the list about how to deal with intrinsics which require codegen to disambiguate them via only the integer/fp overloads. It causes problems for GlobalISel as some of that information is lost during translation, while with other operations like IR instructions the information is encoded into the instruction opcode. This patch changes clang to emit the new faddp intrinsic if the vector operands to the builtin have FP element types. LLVM IR AutoUpgrade has been taught to upgrade existing calls to aarch64.neon.addp with fp vector arguments, and we remove the workarounds introduced for GlobalISel in r355865. This is a more permanent solution to PR40968. Differential Revision: https://reviews.llvm.org/D59655 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356722 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e57e8cc commit 398ab5f

10 files changed

+35
-73
lines changed

include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in {
289289

290290
// Pairwise Add
291291
def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
292+
def int_aarch64_neon_faddp : AdvSIMD_2VectorArg_Intrinsic;
292293

293294
// Long Pairwise Add
294295
// FIXME: In theory, we shouldn't need intrinsics for saddlp or

lib/IR/AutoUpgrade.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,17 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
568568
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
569569
return true;
570570
}
571+
if (Name.startswith("aarch64.neon.addp")) {
572+
if (F->arg_size() != 2)
573+
break; // Invalid IR.
574+
auto fArgs = F->getFunctionType()->params();
575+
VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
576+
if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
577+
NewFn = Intrinsic::getDeclaration(F->getParent(),
578+
Intrinsic::aarch64_neon_faddp, fArgs);
579+
return true;
580+
}
581+
}
571582
break;
572583
}
573584

lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3499,7 +3499,7 @@ def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, V
34993499
}
35003500
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
35013501
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
3502-
defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
3502+
defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
35033503
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
35043504
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
35053505
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;

lib/Target/AArch64/AArch64LegalizerInfo.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
6464
return std::make_pair(0, EltTy);
6565
});
6666

67-
// HACK: Check that the intrinsic isn't ambiguous.
68-
// (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
69-
getActionDefinitionsBuilder(G_INTRINSIC)
70-
.custom();
71-
7267
getActionDefinitionsBuilder(G_PHI)
7368
.legalFor({p0, s16, s32, s64})
7469
.clampScalar(0, s16, s64)
@@ -517,30 +512,11 @@ bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
517512
return false;
518513
case TargetOpcode::G_VAARG:
519514
return legalizeVaArg(MI, MRI, MIRBuilder);
520-
case TargetOpcode::G_INTRINSIC:
521-
return legalizeIntrinsic(MI, MRI, MIRBuilder);
522515
}
523516

524517
llvm_unreachable("expected switch to return");
525518
}
526519

527-
bool AArch64LegalizerInfo::legalizeIntrinsic(
528-
MachineInstr &MI, MachineRegisterInfo &MRI,
529-
MachineIRBuilder &MIRBuilder) const {
530-
// HACK: Don't allow faddp/addp for now. We don't pass down the type info
531-
// necessary to get this right today.
532-
//
533-
// It looks like addp/faddp is the only intrinsic that's impacted by this.
534-
// All other intrinsics fully describe the required types in their names.
535-
//
536-
// (See: https://bugs.llvm.org/show_bug.cgi?id=40968)
537-
const MachineOperand &IntrinOp = MI.getOperand(1);
538-
if (IntrinOp.isIntrinsicID() &&
539-
IntrinOp.getIntrinsicID() == Intrinsic::aarch64_neon_addp)
540-
return false;
541-
return true;
542-
}
543-
544520
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
545521
MachineRegisterInfo &MRI,
546522
MachineIRBuilder &MIRBuilder) const {

lib/Target/AArch64/AArch64LegalizerInfo.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
3434
private:
3535
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
3636
MachineIRBuilder &MIRBuilder) const;
37-
38-
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
39-
MachineIRBuilder &MIRBuilder) const;
4037
};
4138
} // End llvm namespace.
4239
#endif

test/CodeGen/AArch64/GlobalISel/fallback-ambiguous-addp-intrinsic.mir

Lines changed: 0 additions & 32 deletions
This file was deleted.

test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@
151151
# DEBUG: .. the first uncovered type index: 1, OK
152152
#
153153
# DEBUG-NEXT: G_INTRINSIC (opcode {{[0-9]+}}): 0 type indices
154-
# DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected
154+
# DEBUG: .. type index coverage check SKIPPED: no rules defined
155155
#
156156
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices
157157
# DEBUG: .. type index coverage check SKIPPED: no rules defined

test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,27 +65,27 @@ define <2 x i64> @test_addp_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
6565
ret <2 x i64> %val
6666
}
6767

68-
declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>)
69-
declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
70-
declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>)
68+
declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
69+
declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>)
70+
declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>)
7171

7272
define <2 x float> @test_faddp_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
7373
; CHECK: test_faddp_v2f32:
74-
%val = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
74+
%val = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %lhs, <2 x float> %rhs)
7575
; CHECK: faddp v0.2s, v0.2s, v1.2s
7676
ret <2 x float> %val
7777
}
7878

7979
define <4 x float> @test_faddp_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
8080
; CHECK: test_faddp_v4f32:
81-
%val = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
81+
%val = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %lhs, <4 x float> %rhs)
8282
; CHECK: faddp v0.4s, v0.4s, v1.4s
8383
ret <4 x float> %val
8484
}
8585

8686
define <2 x double> @test_faddp_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
8787
; CHECK: test_faddp_v2f64:
88-
%val = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
88+
%val = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %lhs, <2 x double> %rhs)
8989
; CHECK: faddp v0.2d, v0.2d, v1.2d
9090
ret <2 x double> %val
9191
}

test/CodeGen/AArch64/arm64-vadd.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
712712
;CHECK: faddp.2s
713713
%tmp1 = load <2 x float>, <2 x float>* %A
714714
%tmp2 = load <2 x float>, <2 x float>* %B
715-
%tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
715+
%tmp3 = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
716716
ret <2 x float> %tmp3
717717
}
718718

@@ -721,7 +721,7 @@ define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
721721
;CHECK: faddp.4s
722722
%tmp1 = load <4 x float>, <4 x float>* %A
723723
%tmp2 = load <4 x float>, <4 x float>* %B
724-
%tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
724+
%tmp3 = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
725725
ret <4 x float> %tmp3
726726
}
727727

@@ -730,13 +730,13 @@ define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
730730
;CHECK: faddp.2d
731731
%tmp1 = load <2 x double>, <2 x double>* %A
732732
%tmp2 = load <2 x double>, <2 x double>* %B
733-
%tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
733+
%tmp3 = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
734734
ret <2 x double> %tmp3
735735
}
736736

737-
declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) nounwind readnone
738-
declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
739-
declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
737+
declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) nounwind readnone
738+
declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>) nounwind readnone
739+
declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>) nounwind readnone
740740

741741
define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs) {
742742
; CHECK-LABEL: uaddl_duprhs
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: opt -S < %s -mtriple=arm64 | FileCheck %s
2+
declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>)
3+
4+
; CHECK: call <4 x float> @llvm.aarch64.neon.faddp.v4f32
5+
define <4 x float> @upgrade_aarch64_neon_addp_float(<4 x float> %a, <4 x float> %b) {
6+
%res = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %a, <4 x float> %b)
7+
ret <4 x float> %res
8+
}
9+

0 commit comments

Comments
 (0)