Skip to content

[AArch64] Add CodeGen support for FEAT_CPA #79569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Target/TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ class TargetMachine {
virtual unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
return 0;
}

/// True if target has some form of pointer arithmetic checking.
/// Helps identify whether pointer arithmetic semantics should be preserved
/// for passes such as instruction selection.
virtual bool isPtrArithmeticChecked(const Function &F) const { return false; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't see why this needs to be in TargetMachine. It seems to only depend on the subtarget, so can go directly there?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plus the only use is in AArch64

};

/// This class describes a target machine that is implemented with the LLVM
Expand Down
5 changes: 3 additions & 2 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def SDTOther : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
def SDTUNDEF : SDTypeProfile<1, 0, []>; // for 'undef'.
def SDTUnaryOp : SDTypeProfile<1, 1, []>; // for bitconvert.

def SDTPtrAddOp : SDTypeProfile<1, 2, [ // ptradd
def SDTPtrAddSubOp : SDTypeProfile<1, 2, [ // ptradd, ptrsub
SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisPtrTy<1>
]>;
def SDTIntBinOp : SDTypeProfile<1, 2, [ // add, and, or, xor, udiv, etc.
Expand Down Expand Up @@ -384,8 +384,9 @@ def tblockaddress: SDNode<"ISD::TargetBlockAddress", SDTPtrLeaf, [],

def add : SDNode<"ISD::ADD" , SDTIntBinOp ,
[SDNPCommutative, SDNPAssociative]>;
def ptradd : SDNode<"ISD::ADD" , SDTPtrAddOp, []>;
def ptradd : SDNode<"ISD::ADD" , SDTPtrAddSubOp, []>;
def sub : SDNode<"ISD::SUB" , SDTIntBinOp>;
def ptrsub : SDNode<"ISD::SUB" , SDTPtrAddSubOp, []>;
def mul : SDNode<"ISD::MUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def mulhs : SDNode<"ISD::MULHS" , SDTIntBinOp, [SDNPCommutative]>;
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -9587,6 +9587,26 @@ let Predicates = [HasCPA] in {
// Scalar multiply-add/subtract
def MADDPT : MulAccumCPA<0, "maddpt">;
def MSUBPT : MulAccumCPA<1, "msubpt">;

// Rules to use CPA instructions in pointer arithmetic patterns which are not
// folded into loads/stores. The AddedComplexity serves to help supersede
// other simpler (non-CPA) patterns and make sure CPA is used instead.
let AddedComplexity = 20 in {
def : Pat<(ptradd GPR64sp:$Rn, GPR64sp:$Rm),
(ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i32 0))>;
def : Pat<(ptradd GPR64sp:$Rn, (shl GPR64sp:$Rm, (i64 imm0_7:$imm))),
(ADDPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
(i32 (trunc_imm imm0_7:$imm)))>;
def : Pat<(ptrsub GPR64sp:$Rn, GPR64sp:$Rm),
(SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm, (i32 0))>;
def : Pat<(ptrsub GPR64sp:$Rn, (shl GPR64sp:$Rm, (i64 imm0_7:$imm))),
(SUBPT_shift GPR64sp:$Rn, GPR64sp:$Rm,
(i32 (trunc_imm imm0_7:$imm)))>;
def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)),
(MADDPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
def : Pat<(ptradd GPR64:$Ra, (mul GPR64:$Rn, (sub (i64 0), GPR64:$Rm))),
(MSUBPT GPR64:$Rn, GPR64:$Rm, GPR64:$Ra)>;
}
}

include "AArch64InstrAtomics.td"
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -892,3 +892,7 @@ bool AArch64TargetMachine::parseMachineFunctionInfo(
MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
return false;
}

bool AArch64TargetMachine::isPtrArithmeticChecked(const Function &F) const {
return getSubtargetImpl(F)->hasCPA();
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class AArch64TargetMachine : public LLVMTargetMachine {
return true;
}

/// In AArch64, true if FEAT_CPA is present. Helps preserve pointer arithmetic
/// semantics for instruction selection.
bool isPtrArithmeticChecked(const Function &F) const override;

private:
bool isLittle;
};
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2075,6 +2075,10 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
return Changed;
}
case TargetOpcode::G_PTR_ADD:
// If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
// arithmetic semantics instead of falling back to regular arithmetic.
if (TM.isPtrArithmeticChecked(MF.getFunction()))
return false;
Comment on lines +2078 to +2081
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this supposed to work for SDAG?

return convertPtrAddToAdd(I, MRI);
case TargetOpcode::G_LOAD: {
// For scalar loads of pointers, we try to convert the dest type from p0
Expand Down
171 changes: 171 additions & 0 deletions llvm/test/CodeGen/AArch64/cpa-globalisel.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O0
; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=+cpa -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-CPA-O3
; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O0 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O0
; RUN: llc -mtriple=aarch64 -verify-machineinstrs --mattr=-cpa -O3 -global-isel=1 -global-isel-abort=1 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK-NOCPA-O3

%struct.my_type = type { i64, i64 }
%struct.my_type2 = type { i64, i64, i64, i64, i64, i64 }

@array = external dso_local global [10 x %struct.my_type], align 8
@array2 = external dso_local global [10 x %struct.my_type2], align 8

define void @addpt1(i64 %index, i64 %arg) {
; CHECK-CPA-O0-LABEL: addpt1:
; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-CPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
;
; CHECK-CPA-O3-LABEL: addpt1:
; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-CPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
;
; CHECK-NOCPA-O0-LABEL: addpt1:
; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-NOCPA-O0: str x{{[0-9]+}}, [[[REG1]], #8]
;
; CHECK-NOCPA-O3-LABEL: addpt1:
; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-NOCPA-O3: str x{{[0-9]+}}, [[[REG1]], #8]
entry:
%e2 = getelementptr inbounds %struct.my_type, ptr @array, i64 %index, i32 1
store i64 %arg, ptr %e2, align 8
ret void
}

define void @maddpt1(i32 %pos, ptr %val) {
; CHECK-CPA-O0-LABEL: maddpt1:
; CHECK-CPA-O0: maddpt x0, x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O0: b memcpy
;
; CHECK-CPA-O3-LABEL: maddpt1:
; CHECK-CPA-O3: maddpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]]]
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #16]
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #32]
;
; CHECK-NOCPA-O0-LABEL: maddpt1:
; CHECK-NOCPA-O0: smaddl x0, w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
; CHECK-NOCPA-O0: b memcpy
;
; CHECK-NOCPA-O3-LABEL: maddpt1:
; CHECK-NOCPA-O3: smaddl [[REG1:x[0-9]+]], w{{[0-9]+}}, w{{[0-9]+}}, x{{[0-9]+}}
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]]]
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #16]
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #32]
entry:
%idxprom = sext i32 %pos to i64
%arrayidx = getelementptr inbounds [10 x %struct.my_type2], ptr @array2, i64 0, i64 %idxprom
tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %arrayidx, ptr align 8 dereferenceable(48) %val, i64 48, i1 false)
ret void
}

define void @msubpt1(i32 %index, i32 %elem) {
; CHECK-CPA-O0-LABEL: msubpt1:
; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O0: msubpt x0, x{{[0-9]+}}, x{{[0-9]+}}, [[REG1]]
; CHECK-CPA-O0: addpt x1, x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O0: b memcpy
;
; CHECK-CPA-O3-LABEL: msubpt1:
; CHECK-CPA-O3: msubpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #192]
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #208]
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #224]
;
; CHECK-NOCPA-O0-LABEL: msubpt1:
; CHECK-NOCPA-O0: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-NOCPA-O0: add x0, x{{[0-9]+}}, [[REG1]]
; CHECK-NOCPA-O0: b memcpy
;
; CHECK-NOCPA-O3-LABEL: msubpt1:
; CHECK-NOCPA-O3: mneg [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-NOCPA-O3: add [[REG2:x[0-9]+]], x{{[0-9]+}}, [[REG1]]
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #192]
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #208]
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #224]
entry:
%idx.ext = sext i32 %index to i64
%idx.neg = sub nsw i64 0, %idx.ext
%add.ptr = getelementptr inbounds %struct.my_type2, ptr getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 6), i64 %idx.neg
tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 dereferenceable(48) %add.ptr, ptr align 8 dereferenceable(48) getelementptr inbounds ([10 x %struct.my_type2], ptr @array2, i64 0, i64 2), i64 48, i1 false), !tbaa.struct !6
ret void
}

define void @subpt1(i32 %index, i32 %elem) {
; CHECK-CPA-O0-LABEL: subpt1:
; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
;
; CHECK-CPA-O3-LABEL: subpt1:
; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
;
; CHECK-NOCPA-O0-LABEL: subpt1:
; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, #96
; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
;
; CHECK-NOCPA-O3-LABEL: subpt1:
; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
entry:
%conv = sext i32 %index to i64
%mul.neg = mul nsw i64 %conv, -16
%add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %mul.neg
tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !6
ret void
}

define void @subpt2(i32 %index, i32 %elem) {
; CHECK-CPA-O0-LABEL: subpt2:
; CHECK-CPA-O0: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}
; CHECK-CPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
;
; CHECK-CPA-O3-LABEL: subpt2:
; CHECK-CPA-O3: addpt [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-CPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
;
; CHECK-NOCPA-O0-LABEL: subpt2:
; CHECK-NOCPA-O0: add [[REG1:x[0-9]+]], x{{[0-9]+}}, #96
; CHECK-NOCPA-O0: str q{{[0-9]+}}, [[[REG1]], x{{[0-9]+}}, lsl #4]
;
; CHECK-NOCPA-O3-LABEL: subpt2:
; CHECK-NOCPA-O3: add [[REG1:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #4
; CHECK-NOCPA-O3: str q{{[0-9]+}}, [[[REG1]], #64]
entry:
%idx.ext = sext i32 %index to i64
%idx.neg = sub nsw i64 0, %idx.ext
%add.ptr = getelementptr inbounds %struct.my_type, ptr getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 6), i64 %idx.neg
tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %add.ptr, ptr noundef nonnull align 8 dereferenceable(16) getelementptr inbounds ([10 x %struct.my_type], ptr @array, i64 0, i64 2), i64 16, i1 false), !tbaa.struct !11
ret void
}

define ptr @subpt3(ptr %ptr, i32 %index) {
; CHECK-CPA-O0-LABEL: subpt3:
; CHECK-CPA-O0: mov [[REG1:x[0-9]+]], #-8
; CHECK-CPA-O0: addpt x0, x{{[0-9]+}}, [[REG1]]
; CHECK-CPA-O0: ret
;
; CHECK-CPA-O3-LABEL: subpt3:
; CHECK-CPA-O3: mov [[REG1:x[0-9]+]], #-8
; CHECK-CPA-O3: addpt x0, x{{[0-9]+}}, [[REG1]]
; CHECK-CPA-O3: ret
;
; CHECK-NOCPA-O0-LABEL: subpt3:
; CHECK-NOCPA-O0: subs x0, x{{[0-9]+}}, #8
; CHECK-NOCPA-O0: ret
;
; CHECK-NOCPA-O3-LABEL: subpt3:
; CHECK-NOCPA-O3: sub x0, x{{[0-9]+}}, #8
; CHECK-NOCPA-O3: ret
entry:
%incdec.ptr.i.i.i = getelementptr inbounds i64, ptr %ptr, i64 -1
ret ptr %incdec.ptr.i.i.i
}

declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

!6 = !{i64 0, i64 8, !7, i64 8, i64 8, !7, i64 16, i64 8, !7, i64 24, i64 8, !7, i64 32, i64 8, !7, i64 40, i64 8, !7}
!7 = !{!8, !8, i64 0}
!8 = !{!"long", !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C++ TBAA"}
!11 = !{i64 0, i64 8, !7, i64 8, i64 8, !7}
Loading