llvm · ritter-x2a · Jun 4, 2025 · Jun 5, 2025 · Jun 6, 2025 · Jun 10, 2025
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -421,6 +421,7 @@ namespace {
     SDValue visitADDLike(SDNode *N);
     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1,
                                     SDNode *LocReference);
+    SDValue visitPTRADD(SDNode *N);
     SDValue visitSUB(SDNode *N);
     SDValue visitADDSAT(SDNode *N);
     SDValue visitSUBSAT(SDNode *N);
@@ -1140,7 +1141,7 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
       return true;
   }
 
-  if (Opc != ISD::ADD)
+  if (Opc != ISD::ADD && Opc != ISD::PTRADD)
     return false;
 
   auto *C2 = dyn_cast<ConstantSDNode>(N1);
@@ -1860,6 +1861,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::TokenFactor:        return visitTokenFactor(N);
   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   case ISD::ADD:                return visitADD(N);
+  case ISD::PTRADD:             return visitPTRADD(N);
   case ISD::SUB:                return visitSUB(N);
   case ISD::SADDSAT:
   case ISD::UADDSAT:            return visitADDSAT(N);
@@ -2630,6 +2632,86 @@ SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
   return SDValue();
 }
 
+/// Try to fold a pointer arithmetic node.
+/// This needs to be done separately from normal addition, because pointer
+/// addition is not commutative.
+SDValue DAGCombiner::visitPTRADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT PtrVT = N0.getValueType();
+  EVT IntVT = N1.getValueType();
+  SDLoc DL(N);
+
+  // This is already ensured by an assert in SelectionDAG::getNode(). Several
+  // combines here depend on this assumption.
+  assert(PtrVT == IntVT &&
+         "PTRADD with different operand types is not supported");
+
+  // fold (ptradd x, 0) -> x
+  if (isNullConstant(N1))
+    return N0;
+
+  // fold (ptradd 0, x) -> x
+  if (PtrVT == IntVT && isNullConstant(N0))
+    return N1;
+
+  if (N0.getOpcode() != ISD::PTRADD ||
+      reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1))
+    return SDValue();
+
+  SDValue X = N0.getOperand(0);
+  SDValue Y = N0.getOperand(1);
+  SDValue Z = N1;
+  bool N0OneUse = N0.hasOneUse();
+  bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+  bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+
+  // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if:
+  //   * y is a constant and (ptradd x, y) has one use; or
+  //   * y and z are both constants.
+  if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) {
+    // If both additions in the original were NUW, the new ones are as well.
+    SDNodeFlags Flags =
+        (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+    SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
+    AddToWorklist(Add.getNode());
+    return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
+  }
+
+  // TODO: There is another possible fold here that was proven useful.
+  // It would be this:
+  //
+  // (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y) if:
+  //   * (ptradd x, y) has one use; and
+  //   * y is a constant; and
+  //   * z is not a constant.
+  //
+  // In some cases, specifically in AArch64's FEAT_CPA, it exposes the
+  // opportunity to select more complex instructions such as SUBPT and
+  // MSUBPT. However, a hypothetical corner case has been found that we could
+  // not avoid. Consider this (pseudo-POSIX C):
+  //
+  // char *foo(char *x, int z) {return (x + LARGE_CONSTANT) + z;}
+  // char *p = mmap(LARGE_CONSTANT);
+  // char *q = foo(p, -LARGE_CONSTANT);
+  //
+  // Then x + LARGE_CONSTANT is one-past-the-end, so valid, and a
+  // further + z takes it back to the start of the mapping, so valid,
+  // regardless of the address mmap gave back. However, if mmap gives you an
+  // address < LARGE_CONSTANT (ignoring high bits), x - LARGE_CONSTANT will
+  // borrow from the high bits (with the subsequent + z carrying back into
+  // the high bits to give you a well-defined pointer) and thus trip
+  // FEAT_CPA's pointer corruption checks.
+  //
+  // We leave this fold as an opportunity for future work, addressing the
+  // corner case for FEAT_CPA, as well as reconciling the solution with the
+  // more general application of pointer arithmetic in other future targets.
+  // For now each architecture that wants this fold must implement it in the
+  // target-specific code (see e.g. SITargetLowering::performPtrAddCombine)
+
+  return SDValue();
+}
+
 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
 /// a shift and add with a different constant.
 static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL,
@@ -15061,6 +15143,7 @@ SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
   default:
     break;
   case ISD::ADD:
+  case ISD::PTRADD:
   case ISD::SUB: {
     unsigned AlignShift = Log2(AL);
     SDValue LHS = N0.getOperand(0);

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -945,6 +945,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   }
 
   setTargetDAGCombine({ISD::ADD,
+                       ISD::PTRADD,
                        ISD::UADDO_CARRY,
                        ISD::SUB,
                        ISD::USUBO_CARRY,
@@ -15095,6 +15096,49 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   return SDValue();
 }
 
+SDValue SITargetLowering::performPtrAddCombine(SDNode *N,
+                                               DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (N1.getOpcode() == ISD::ADD) {
+    // (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant,
+    //    y is not, and (add y, z) is used only once.
+    // (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant,
+    //    z is not, and (add y, z) is used only once.
+    // The goal is to move constant offsets to the outermost ptradd, to create
+    // more opportunities to fold offsets into memory instructions.
+    // Together with the generic combines in DAGCombiner.cpp, this also
+    // implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)).
+    //
+    // This transform is here instead of in the general DAGCombiner as it can
+    // turn in-bounds pointer arithmetic out-of-bounds, which is problematic for
+    // AArch64's CPA.
+    SDValue X = N0;
+    SDValue Y = N1.getOperand(0);
+    SDValue Z = N1.getOperand(1);
+    if (N1.hasOneUse()) {
+      bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y);
+      bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
+      if (ZIsConstant != YIsConstant) {
+        // If both additions in the original were NUW, the new ones are as well.
+        SDNodeFlags Flags =
+            (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+        if (YIsConstant)
+          std::swap(Y, Z);
+
+        SDValue Inner = DAG.getMemBasePlusOffset(X, Y, DL, Flags);
+        DCI.AddToWorklist(Inner.getNode());
+        return DAG.getMemBasePlusOffset(Inner, Z, DL, Flags);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue SITargetLowering::performSubCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -15633,6 +15677,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   case ISD::ADD:
     return performAddCombine(N, DCI);
+  case ISD::PTRADD:
+    return performPtrAddCombine(N, DCI);
   case ISD::SUB:
     return performSubCombine(N, DCI);
   case ISD::UADDO_CARRY:

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -220,6 +220,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
                                           DAGCombinerInfo &DCI) const;
 
   SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;

diff --git a/llvm/test/CodeGen/AArch64/cpa-selectiondag.ll b/llvm/test/CodeGen/AArch64/cpa-selectiondag.ll
@@ -119,23 +119,17 @@ define void @msubpt1(i32 %index, i32 %elem) {
 ; CHECK-CPA-O0:       // %bb.0: // %entry
 ; CHECK-CPA-O0-NEXT:    // implicit-def: $x8
 ; CHECK-CPA-O0-NEXT:    mov w8, w0
-; CHECK-CPA-O0-NEXT:    sxtw x9, w8
-; CHECK-CPA-O0-NEXT:    mov x8, xzr
-; CHECK-CPA-O0-NEXT:    subs x8, x8, x9
-; CHECK-CPA-O0-NEXT:    lsl x8, x8, #1
-; CHECK-CPA-O0-NEXT:    subs x10, x8, x9
+; CHECK-CPA-O0-NEXT:    sxtw x8, w8
+; CHECK-CPA-O0-NEXT:    mov w9, #48 // =0x30
+; CHECK-CPA-O0-NEXT:    // kill: def $x9 killed $w9
+; CHECK-CPA-O0-NEXT:    mneg x8, x8, x9
+; CHECK-CPA-O0-NEXT:    add x8, x8, #288
 ; CHECK-CPA-O0-NEXT:    adrp x9, array2
 ; CHECK-CPA-O0-NEXT:    add x9, x9, :lo12:array2
-; CHECK-CPA-O0-NEXT:    mov w8, #288 // =0x120
-; CHECK-CPA-O0-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-CPA-O0-NEXT:    addpt x8, x9, x8
-; CHECK-CPA-O0-NEXT:    addpt x8, x8, x10, lsl #4
-; CHECK-CPA-O0-NEXT:    mov w10, #96 // =0x60
-; CHECK-CPA-O0-NEXT:    // kill: def $x10 killed $w10
-; CHECK-CPA-O0-NEXT:    addpt x10, x9, x10
-; CHECK-CPA-O0-NEXT:    ldr q1, [x10, #16]
-; CHECK-CPA-O0-NEXT:    ldr q2, [x10, #32]
 ; CHECK-CPA-O0-NEXT:    ldr q0, [x9, #96]
+; CHECK-CPA-O0-NEXT:    ldr q1, [x9, #112]
+; CHECK-CPA-O0-NEXT:    ldr q2, [x9, #128]
 ; CHECK-CPA-O0-NEXT:    str q2, [x8, #32]
 ; CHECK-CPA-O0-NEXT:    str q1, [x8, #16]
 ; CHECK-CPA-O0-NEXT:    str q0, [x8]
@@ -144,21 +138,17 @@ define void @msubpt1(i32 %index, i32 %elem) {
 ; CHECK-CPA-O3-LABEL: msubpt1:
 ; CHECK-CPA-O3:       // %bb.0: // %entry
 ; CHECK-CPA-O3-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-CPA-O3-NEXT:    sxtw x9, w0
-; CHECK-CPA-O3-NEXT:    adrp x8, array2
-; CHECK-CPA-O3-NEXT:    add x8, x8, :lo12:array2
-; CHECK-CPA-O3-NEXT:    mov w11, #96 // =0x60
-; CHECK-CPA-O3-NEXT:    mov w12, #288 // =0x120
-; CHECK-CPA-O3-NEXT:    ldr q2, [x8, #96]
-; CHECK-CPA-O3-NEXT:    neg x10, x9
-; CHECK-CPA-O3-NEXT:    addpt x11, x8, x11
-; CHECK-CPA-O3-NEXT:    lsl x10, x10, #1
-; CHECK-CPA-O3-NEXT:    ldp q1, q0, [x11, #16]
-; CHECK-CPA-O3-NEXT:    sub x9, x10, x9
-; CHECK-CPA-O3-NEXT:    addpt x10, x8, x12
-; CHECK-CPA-O3-NEXT:    addpt x9, x10, x9, lsl #4
-; CHECK-CPA-O3-NEXT:    stp q1, q0, [x9, #16]
-; CHECK-CPA-O3-NEXT:    str q2, [x9]
+; CHECK-CPA-O3-NEXT:    sxtw x8, w0
+; CHECK-CPA-O3-NEXT:    mov w9, #48 // =0x30
+; CHECK-CPA-O3-NEXT:    mneg x8, x8, x9
+; CHECK-CPA-O3-NEXT:    adrp x9, array2
+; CHECK-CPA-O3-NEXT:    add x9, x9, :lo12:array2
+; CHECK-CPA-O3-NEXT:    ldp q1, q0, [x9, #112]
+; CHECK-CPA-O3-NEXT:    ldr q2, [x9, #96]
+; CHECK-CPA-O3-NEXT:    add x8, x8, #288
+; CHECK-CPA-O3-NEXT:    addpt x8, x9, x8
+; CHECK-CPA-O3-NEXT:    stp q1, q0, [x8, #16]
+; CHECK-CPA-O3-NEXT:    str q2, [x8]
 ; CHECK-CPA-O3-NEXT:    ret
 ;
 ; CHECK-NOCPA-O0-LABEL: msubpt1:
@@ -205,29 +195,29 @@ entry:
 define void @subpt1(i32 %index, i32 %elem) {
 ; CHECK-CPA-O0-LABEL: subpt1:
 ; CHECK-CPA-O0:       // %bb.0: // %entry
-; CHECK-CPA-O0-NEXT:    adrp x9, array
-; CHECK-CPA-O0-NEXT:    add x9, x9, :lo12:array
+; CHECK-CPA-O0-NEXT:    // implicit-def: $x8
+; CHECK-CPA-O0-NEXT:    mov w8, w0
+; CHECK-CPA-O0-NEXT:    sxtw x9, w8
 ; CHECK-CPA-O0-NEXT:    mov w8, #96 // =0x60
 ; CHECK-CPA-O0-NEXT:    // kill: def $x8 killed $w8
+; CHECK-CPA-O0-NEXT:    subs x8, x8, x9, lsl #8
+; CHECK-CPA-O0-NEXT:    adrp x9, array
+; CHECK-CPA-O0-NEXT:    add x9, x9, :lo12:array
 ; CHECK-CPA-O0-NEXT:    addpt x8, x9, x8
-; CHECK-CPA-O0-NEXT:    // implicit-def: $x10
-; CHECK-CPA-O0-NEXT:    mov w10, w0
-; CHECK-CPA-O0-NEXT:    sbfiz x10, x10, #8, #32
-; CHECK-CPA-O0-NEXT:    subpt x8, x8, x10
 ; CHECK-CPA-O0-NEXT:    ldr q0, [x9, #32]
 ; CHECK-CPA-O0-NEXT:    str q0, [x8]
 ; CHECK-CPA-O0-NEXT:    ret
 ;
 ; CHECK-CPA-O3-LABEL: subpt1:
 ; CHECK-CPA-O3:       // %bb.0: // %entry
 ; CHECK-CPA-O3-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-CPA-O3-NEXT:    adrp x8, array
-; CHECK-CPA-O3-NEXT:    add x8, x8, :lo12:array
+; CHECK-CPA-O3-NEXT:    sxtw x8, w0
 ; CHECK-CPA-O3-NEXT:    mov w9, #96 // =0x60
-; CHECK-CPA-O3-NEXT:    sbfiz x10, x0, #8, #32
-; CHECK-CPA-O3-NEXT:    addpt x9, x8, x9
-; CHECK-CPA-O3-NEXT:    ldr q0, [x8, #32]
-; CHECK-CPA-O3-NEXT:    subpt x8, x9, x10
+; CHECK-CPA-O3-NEXT:    sub x8, x9, x8, lsl #8
+; CHECK-CPA-O3-NEXT:    adrp x9, array
+; CHECK-CPA-O3-NEXT:    add x9, x9, :lo12:array
+; CHECK-CPA-O3-NEXT:    ldr q0, [x9, #32]
+; CHECK-CPA-O3-NEXT:    addpt x8, x9, x8
 ; CHECK-CPA-O3-NEXT:    str q0, [x8]
 ; CHECK-CPA-O3-NEXT:    ret
 ;
@@ -264,28 +254,24 @@ entry:
 define void @subpt2(i32 %index, i32 %elem) {
 ; CHECK-CPA-O0-LABEL: subpt2:
 ; CHECK-CPA-O0:       // %bb.0: // %entry
-; CHECK-CPA-O0-NEXT:    mov x8, xzr
-; CHECK-CPA-O0-NEXT:    subs x10, x8, w0, sxtw
-; CHECK-CPA-O0-NEXT:    adrp x9, array
-; CHECK-CPA-O0-NEXT:    add x9, x9, :lo12:array
 ; CHECK-CPA-O0-NEXT:    mov w8, #96 // =0x60
 ; CHECK-CPA-O0-NEXT:    // kill: def $x8 killed $w8
+; CHECK-CPA-O0-NEXT:    subs x8, x8, w0, sxtw #4
+; CHECK-CPA-O0-NEXT:    adrp x9, array
+; CHECK-CPA-O0-NEXT:    add x9, x9, :lo12:array
 ; CHECK-CPA-O0-NEXT:    addpt x8, x9, x8
-; CHECK-CPA-O0-NEXT:    addpt x8, x8, x10, lsl #4
 ; CHECK-CPA-O0-NEXT:    ldr q0, [x9, #32]
 ; CHECK-CPA-O0-NEXT:    str q0, [x8]
 ; CHECK-CPA-O0-NEXT:    ret
 ;
 ; CHECK-CPA-O3-LABEL: subpt2:
 ; CHECK-CPA-O3:       // %bb.0: // %entry
-; CHECK-CPA-O3-NEXT:    mov x8, xzr
-; CHECK-CPA-O3-NEXT:    mov w9, #96 // =0x60
-; CHECK-CPA-O3-NEXT:    adrp x10, array
-; CHECK-CPA-O3-NEXT:    add x10, x10, :lo12:array
-; CHECK-CPA-O3-NEXT:    sub x8, x8, w0, sxtw
-; CHECK-CPA-O3-NEXT:    addpt x9, x10, x9
-; CHECK-CPA-O3-NEXT:    ldr q0, [x10, #32]
-; CHECK-CPA-O3-NEXT:    addpt x8, x9, x8, lsl #4
+; CHECK-CPA-O3-NEXT:    mov w8, #96 // =0x60
+; CHECK-CPA-O3-NEXT:    adrp x9, array
+; CHECK-CPA-O3-NEXT:    add x9, x9, :lo12:array
+; CHECK-CPA-O3-NEXT:    sub x8, x8, w0, sxtw #4
+; CHECK-CPA-O3-NEXT:    ldr q0, [x9, #32]
+; CHECK-CPA-O3-NEXT:    addpt x8, x9, x8
 ; CHECK-CPA-O3-NEXT:    str q0, [x8]
 ; CHECK-CPA-O3-NEXT:    ret
 ;
@@ -670,14 +656,13 @@ define hidden void @multidim() {
 ; CHECK-CPA-O0-NEXT:    .cfi_offset w30, -16
 ; CHECK-CPA-O0-NEXT:    adrp x8, b
 ; CHECK-CPA-O0-NEXT:    ldrh w9, [x8, :lo12:b]
+; CHECK-CPA-O0-NEXT:    // implicit-def: $x8
 ; CHECK-CPA-O0-NEXT:    mov w8, w9
-; CHECK-CPA-O0-NEXT:    mov w10, w8
+; CHECK-CPA-O0-NEXT:    ubfiz x8, x8, #1, #32
+; CHECK-CPA-O0-NEXT:    add x10, x8, #2
 ; CHECK-CPA-O0-NEXT:    adrp x8, a
 ; CHECK-CPA-O0-NEXT:    add x8, x8, :lo12:a
-; CHECK-CPA-O0-NEXT:    mov w11, #2 // =0x2
-; CHECK-CPA-O0-NEXT:    // kill: def $x11 killed $w11
-; CHECK-CPA-O0-NEXT:    addpt x8, x8, x11
-; CHECK-CPA-O0-NEXT:    addpt x8, x8, x10, lsl #1
+; CHECK-CPA-O0-NEXT:    addpt x8, x8, x10
 ; CHECK-CPA-O0-NEXT:    add w9, w9, #1
 ; CHECK-CPA-O0-NEXT:    mov w9, w9
 ; CHECK-CPA-O0-NEXT:    // kill: def $x9 killed $w9
@@ -697,13 +682,13 @@ define hidden void @multidim() {
 ; CHECK-CPA-O3-LABEL: multidim:
 ; CHECK-CPA-O3:       // %bb.0: // %entry
 ; CHECK-CPA-O3-NEXT:    adrp x8, b
-; CHECK-CPA-O3-NEXT:    mov w9, #2 // =0x2
 ; CHECK-CPA-O3-NEXT:    adrp x10, a
 ; CHECK-CPA-O3-NEXT:    add x10, x10, :lo12:a
 ; CHECK-CPA-O3-NEXT:    ldrh w8, [x8, :lo12:b]
-; CHECK-CPA-O3-NEXT:    addpt x9, x10, x9
-; CHECK-CPA-O3-NEXT:    addpt x9, x9, x8, lsl #1
+; CHECK-CPA-O3-NEXT:    lsl x9, x8, #1
 ; CHECK-CPA-O3-NEXT:    add x8, x8, #1
+; CHECK-CPA-O3-NEXT:    add x9, x9, #2
+; CHECK-CPA-O3-NEXT:    addpt x9, x10, x9
 ; CHECK-CPA-O3-NEXT:    addpt x8, x9, x8
 ; CHECK-CPA-O3-NEXT:    ldrb w8, [x8]
 ; CHECK-CPA-O3-NEXT:    cbz w8, .LBB14_2