Skip to content

Commit 1afe2b1

Browse files
committed
[AMDGPU][SDAG] Add ISD::PTRADD DAG combines
This patch focuses on generic DAG combines, plus an AMDGPU-target-specific one that is closely connected. The generic DAG combine is based on a part of PR #105669 by @rgwott, which was adapted from work by @jrtc27, @arichardson, @davidchisnall in the CHERI/Morello LLVM tree. I added some parts and removed several disjuncts from the reassociation condition: - `isNullConstant(X)`, since there are address spaces where 0 is a perfectly normal value that shouldn't be treated specially, - `(YIsConstant && ZOneUse)` and `(N0OneUse && ZOneUse && !ZIsConstant)`, since they cause regressions in AMDGPU. For SWDEV-516125.
1 parent d363847 commit 1afe2b1

File tree

4 files changed

+201
-134
lines changed

4 files changed

+201
-134
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@ namespace {
419419
SDValue visitADDLike(SDNode *N);
420420
SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
421421
SDNode *LocReference);
422+
SDValue visitPTRADD(SDNode *N);
422423
SDValue visitSUB(SDNode *N);
423424
SDValue visitADDSAT(SDNode *N);
424425
SDValue visitSUBSAT(SDNode *N);
@@ -1138,7 +1139,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
11381139
return true;
11391140
}
11401141

1141-
if (Opc != ISD::ADD)
1142+
if (Opc != ISD::ADD && Opc != ISD::PTRADD)
11421143
return false;
11431144

11441145
auto *C2 = dyn_cast<ConstantSDNode>(N1);
@@ -1858,6 +1859,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
18581859
case ISD::TokenFactor: return visitTokenFactor(N);
18591860
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
18601861
case ISD::ADD: return visitADD(N);
1862+
case ISD::PTRADD: return visitPTRADD(N);
18611863
case ISD::SUB: return visitSUB(N);
18621864
case ISD::SADDSAT:
18631865
case ISD::UADDSAT: return visitADDSAT(N);
@@ -2623,6 +2625,93 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
26232625
return SDValue();
26242626
}
26252627

2628+
/// Try to fold a pointer arithmetic node.
2629+
/// This needs to be done separately from normal addition, because pointer
2630+
/// addition is not commutative.
2631+
SDValue DAGCombiner::visitPTRADD(SDNode *N) {
2632+
SDValue N0 = N->getOperand(0);
2633+
SDValue N1 = N->getOperand(1);
2634+
EVT PtrVT = N0.getValueType();
2635+
EVT IntVT = N1.getValueType();
2636+
SDLoc DL(N);
2637+
2638+
// This is already ensured by an assert in SelectionDAG::getNode(). Several
2639+
// combines here depend on this assumption.
2640+
assert(PtrVT == IntVT &&
2641+
"PTRADD with different operand types is not supported");
2642+
2643+
// fold (ptradd undef, y) -> undef
2644+
if (N0.isUndef())
2645+
return N0;
2646+
2647+
// fold (ptradd x, undef) -> undef
2648+
if (N1.isUndef())
2649+
return DAG.getUNDEF(PtrVT);
2650+
2651+
// fold (ptradd x, 0) -> x
2652+
if (isNullConstant(N1))
2653+
return N0;
2654+
2655+
// fold (ptradd 0, x) -> x
2656+
if (isNullConstant(N0))
2657+
return N1;
2658+
2659+
if (N0.getOpcode() == ISD::PTRADD &&
2660+
!reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) {
2661+
SDValue X = N0.getOperand(0);
2662+
SDValue Y = N0.getOperand(1);
2663+
SDValue Z = N1;
2664+
bool N0OneUse = N0.hasOneUse();
2665+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
2666+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
2667+
2668+
// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
2669+
// * y is a constant and (ptradd x, y) has one use; or
2670+
// * y and z are both constants.
2671+
if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
2672+
SDNodeFlags Flags;
2673+
// If both additions in the original were NUW, the new ones are as well.
2674+
if (N->getFlags().hasNoUnsignedWrap() &&
2675+
N0->getFlags().hasNoUnsignedWrap())
2676+
Flags |= SDNodeFlags::NoUnsignedWrap;
2677+
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
2678+
AddToWorklist(Add.getNode());
2679+
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
2680+
}
2681+
2682+
// TODO: There is another possible fold here that was proven useful.
2683+
// It would be this:
2684+
//
2685+
// (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
2686+
// * (ptradd x, y) has one use; and
2687+
// * y is a constant; and
2688+
// * z is not a constant.
2689+
//
2690+
// In some cases, specifically in AArch64's FEAT_CPA, it exposes the
2691+
// opportunity to select more complex instructions such as SUBPT and
2692+
// MSUBPT. However, a hypothetical corner case has been found that we could
2693+
// not avoid. Consider this (pseudo-POSIX C):
2694+
//
2695+
// char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
2696+
// char *p = mmap(LARGE_CONSTANT);
2697+
// char *q = foo(p, -LARGE_CONSTANT);
2698+
//
2699+
// Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
2700+
// further + z takes it back to the start of the mapping, so valid,
2701+
// regardless of the address mmap gave back. However, if mmap gives you an
2702+
// address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
2703+
// borrow from the high bits (with the subsequent + z carrying back into
2704+
// the high bits to give you a well-defined pointer) and thus trip
2705+
// FEAT_CPA's pointer corruption checks.
2706+
//
2707+
// We leave this fold as an opportunity for future work, addressing the
2708+
// corner case for FEAT_CPA, as well as reconciling the solution with the
2709+
// more general application of pointer arithmetic in other future targets.
2710+
}
2711+
2712+
return SDValue();
2713+
}
2714+
26262715
/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
26272716
/// a shift and add with a different constant.
26282717
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
@@ -14940,6 +15029,7 @@ SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
1494015029
default:
1494115030
break;
1494215031
case ISD::ADD:
15032+
case ISD::PTRADD:
1494315033
case ISD::SUB: {
1494415034
unsigned AlignShift = Log2(AL);
1494515035
SDValue LHS = N0.getOperand(0);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
931931
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
932932

933933
setTargetDAGCombine({ISD::ADD,
934+
ISD::PTRADD,
934935
ISD::UADDO_CARRY,
935936
ISD::SUB,
936937
ISD::USUBO_CARRY,
@@ -14833,6 +14834,52 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
1483314834
return SDValue();
1483414835
}
1483514836

14837+
SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
14838+
DAGCombinerInfo &DCI) const {
14839+
SelectionDAG &DAG = DCI.DAG;
14840+
EVT VT = N->getValueType(0);
14841+
SDLoc DL(N);
14842+
SDValue N0 = N->getOperand(0);
14843+
SDValue N1 = N->getOperand(1);
14844+
14845+
if (N1.getOpcode() == ISD::ADD) {
14846+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
14847+
// y is not, and (add y, z) is used only once.
14848+
// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
14849+
// z is not, and (add y, z) is used only once.
14850+
// The goal is to move constant offsets to the outermost ptradd, to create
14851+
// more opportunities to fold offsets into memory instructions.
14852+
// Together with the generic combines in DAGCombiner.cpp, this also
14853+
// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
14854+
//
14855+
// This transform is here instead of in the general DAGCombiner as it can
14856+
// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
14857+
// AArch64's CPA.
14858+
SDValue X = N0;
14859+
SDValue Y = N1.getOperand(0);
14860+
SDValue Z = N1.getOperand(1);
14861+
bool N1OneUse = N1.hasOneUse();
14862+
bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
14863+
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
14864+
if ((ZIsConstant != YIsConstant) && N1OneUse) {
14865+
SDNodeFlags Flags;
14866+
// If both additions in the original were NUW, the new ones are as well.
14867+
if (N->getFlags().hasNoUnsignedWrap() &&
14868+
N1->getFlags().hasNoUnsignedWrap())
14869+
Flags |= SDNodeFlags::NoUnsignedWrap;
14870+
14871+
if (YIsConstant)
14872+
std::swap(Y, Z);
14873+
14874+
SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, Flags);
14875+
DCI.AddToWorklist(Inner.getNode());
14876+
return DAG.getMemBasePlusOffset(Inner, Z, DL, Flags);
14877+
}
14878+
}
14879+
14880+
return SDValue();
14881+
}
14882+
1483614883
SDValue SITargetLowering::performSubCombine(SDNode *N,
1483714884
DAGCombinerInfo &DCI) const {
1483814885
SelectionDAG &DAG = DCI.DAG;
@@ -15365,6 +15412,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1536515412
switch (N->getOpcode()) {
1536615413
case ISD::ADD:
1536715414
return performAddCombine(N, DCI);
15415+
case ISD::PTRADD:
15416+
return performPtrAddCombine(N, DCI);
1536815417
case ISD::SUB:
1536915418
return performSubCombine(N, DCI);
1537015419
case ISD::UADDO_CARRY:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
217217
DAGCombinerInfo &DCI) const;
218218

219219
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220+
SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220221
SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221222
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
222223
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;

0 commit comments

Comments
 (0)