Skip to content

[AMDGPU][SDAG] Handle ISD::PTRADD in various special cases #145330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/ritter-x2a/06-17-_amdgpu_sdag_test_isd_ptradd_handling_in_various_special_cases
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
G = cast<GlobalAddressSDNode>(Src);
else if (Src.getOpcode() == ISD::ADD &&
else if (Src->isAnyAdd() &&
Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
Src.getOperand(1).getOpcode() == ISD::Constant) {
G = cast<GlobalAddressSDNode>(Src.getOperand(0));
Expand Down
21 changes: 16 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,8 +615,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// operands on the new node are also disjoint.
SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
: SDNodeFlags::None);
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::PTRADD) {
// It isn't a ptradd anymore if it doesn't operate on the entire
// pointer.
Opcode = ISD::ADD;
}
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
Opcode, dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
Expand Down Expand Up @@ -2851,6 +2857,11 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, And1);
}
[[fallthrough]];
case ISD::PTRADD:
if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
break;
// PTRADD behaves like ADD if pointers are represented as integers.
[[fallthrough]];
case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
Expand Down Expand Up @@ -2960,10 +2971,10 @@ bool TargetLowering::SimplifyDemandedBits(

if (Op.getOpcode() == ISD::MUL) {
Known = KnownBits::mul(KnownOp0, KnownOp1);
} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
Known = KnownBits::computeForAddSub(
Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(),
KnownOp0, KnownOp1);
}
break;
}
Expand Down Expand Up @@ -5606,7 +5617,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
return true;
}

if (N->getOpcode() == ISD::ADD) {
if (N->isAnyAdd()) {
SDValue N1 = N->getOperand(0);
SDValue N2 = N->getOperand(1);
if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
C1 = nullptr;
}

if (N0.getOpcode() == ISD::ADD) {
if (N0->isAnyAdd()) {
// (add N2, N3) -> addr64, or
// (add (add N2, N3), C1) -> addr64
SDValue N2 = N0.getOperand(0);
Expand Down Expand Up @@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
}

// Match the variable offset.
if (Addr.getOpcode() == ISD::ADD) {
if (Addr->isAnyAdd()) {
LHS = Addr.getOperand(0);
RHS = Addr.getOperand(1);

Expand Down Expand Up @@ -2230,7 +2230,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,

SDValue N0, N1;
// Extract the base and offset if possible.
if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
if (CurDAG->isBaseWithConstantOffset(Addr) || Addr->isAnyAdd()) {
N0 = Addr.getOperand(0);
N1 = Addr.getOperand(1);
} else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10477,7 +10477,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue VOffset;
// Try to split SAddr and VOffset. Global and LDS pointers share the same
// immediate offset, so we cannot use a regular SelectGlobalSAddr().
if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
if (Addr->isDivergent() && Addr->isAnyAdd()) {
SDValue LHS = Addr.getOperand(0);
SDValue RHS = Addr.getOperand(1);

Expand Down Expand Up @@ -12027,8 +12027,7 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,

// We only do this to handle cases where it's profitable when there are
// multiple uses of the add, so defer to the standard combine.
if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) ||
N0->hasOneUse())
if ((!N0->isAnyAdd() && N0.getOpcode() != ISD::OR) || N0->hasOneUse())
return SDValue();

const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
Expand Down Expand Up @@ -12067,6 +12066,8 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N, unsigned AddrSpace,
N->getFlags().hasNoUnsignedWrap() &&
(N0.getOpcode() == ISD::OR || N0->getFlags().hasNoUnsignedWrap()));

// Use ISD::ADD even if the original operation was ISD::PTRADD, since we can't
// be sure that the new left operand is a proper base pointer.
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
}

Expand Down
67 changes: 22 additions & 45 deletions llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,26 @@
; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.

define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX6_PTRADD-LABEL: v_add_i32:
; GFX6_PTRADD: ; %bb.0:
; GFX6_PTRADD-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX6_PTRADD-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6_PTRADD-NEXT: s_mov_b32 s7, 0x100f000
; GFX6_PTRADD-NEXT: s_mov_b32 s10, 0
; GFX6_PTRADD-NEXT: s_mov_b32 s11, s7
; GFX6_PTRADD-NEXT: s_waitcnt lgkmcnt(0)
; GFX6_PTRADD-NEXT: v_mov_b32_e32 v1, s3
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, s2, v0
; GFX6_PTRADD-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX6_PTRADD-NEXT: s_mov_b32 s8, s10
; GFX6_PTRADD-NEXT: s_mov_b32 s9, s10
; GFX6_PTRADD-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
; GFX6_PTRADD-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
; GFX6_PTRADD-NEXT: s_waitcnt vmcnt(0)
; GFX6_PTRADD-NEXT: s_mov_b32 s6, -1
; GFX6_PTRADD-NEXT: s_mov_b32 s4, s0
; GFX6_PTRADD-NEXT: s_mov_b32 s5, s1
; GFX6_PTRADD-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GFX6_PTRADD-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6_PTRADD-NEXT: s_endpgm
;
; GFX6_LEGACY-LABEL: v_add_i32:
; GFX6_LEGACY: ; %bb.0:
; GFX6_LEGACY-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX6_LEGACY-NEXT: s_mov_b32 s7, 0x100f000
; GFX6_LEGACY-NEXT: s_mov_b32 s10, 0
; GFX6_LEGACY-NEXT: s_mov_b32 s11, s7
; GFX6_LEGACY-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6_LEGACY-NEXT: s_waitcnt lgkmcnt(0)
; GFX6_LEGACY-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX6_LEGACY-NEXT: v_mov_b32_e32 v1, 0
; GFX6_LEGACY-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
; GFX6_LEGACY-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
; GFX6_LEGACY-NEXT: s_waitcnt vmcnt(0)
; GFX6_LEGACY-NEXT: s_mov_b32 s6, -1
; GFX6_LEGACY-NEXT: s_mov_b32 s4, s0
; GFX6_LEGACY-NEXT: s_mov_b32 s5, s1
; GFX6_LEGACY-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GFX6_LEGACY-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6_LEGACY-NEXT: s_endpgm
; GFX6-LABEL: v_add_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX6-NEXT: s_mov_b32 s7, 0x100f000
; GFX6-NEXT: s_mov_b32 s10, 0
; GFX6-NEXT: s_mov_b32 s11, s7
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX6-NEXT: v_mov_b32_e32 v1, 0
; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_mov_b32 s4, s0
; GFX6-NEXT: s_mov_b32 s5, s1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX6-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
%b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
Expand All @@ -60,4 +36,5 @@ define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX6: {{.*}}
; GFX6_LEGACY: {{.*}}
; GFX6_PTRADD: {{.*}}
Loading