[SelectOpt] Support ADD and SUB with zext operands. #115489

fhahn · 2024-11-08T14:40:57Z

Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.

llvmbot · 2024-11-08T14:41:30Z

@llvm/pr-subscribers-backend-aarch64

Author: Florian Hahn (fhahn)

Changes

Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.

Full diff: https://github.com/llvm/llvm-project/pull/115489.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectOptimize.cpp (+30-14)
(modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+14-8)
(modified) llvm/test/CodeGen/AArch64/selectopt-cast.ll (+40-16)

diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 9587534d1170fc..ce3c90d43391d9 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -145,11 +145,20 @@ class SelectOptimizeImpl {
 
       // An Or(zext(i1 X), Y) can also be treated like a select, with condition
       // C and values Y|1 and Y.
-      Value *X;
-      if (PatternMatch::match(
-              I, m_c_Or(m_OneUse(m_ZExt(m_Value(X))), m_Value())) &&
-          X->getType()->isIntegerTy(1))
-        return SelectLike(I);
+      switch (I->getOpcode()) {
+      case Instruction::Add:
+      case Instruction::Or:
+      case Instruction::Sub: {
+        Value *X;
+        if ((PatternMatch::match(I->getOperand(0),
+                                 m_OneUse(m_ZExt(m_Value(X)))) ||
+             PatternMatch::match(I->getOperand(1),
+                                 m_OneUse(m_ZExt(m_Value(X))))) &&
+            X->getType()->isIntegerTy(1))
+          return SelectLike(I);
+        break;
+      }
+      }
 
       return SelectLike(nullptr);
     }
@@ -250,19 +259,22 @@ class SelectOptimizeImpl {
                                          : Scaled64::getZero();
         }
 
-      // Or case - add the cost of an extra Or to the cost of the False case.
-      if (isa<BinaryOperator>(I))
-        if (auto I = dyn_cast<Instruction>(getFalseValue())) {
+      // BinaryOp case - add the cost of an extra BinOp to the cost of the False
+      // case.
+      if (isa<BinaryOperator>(I)) {
+        if (auto OpI = dyn_cast<Instruction>(getFalseValue())) {
           auto It = InstCostMap.find(I);
           if (It != InstCostMap.end()) {
             InstructionCost OrCost = TTI->getArithmeticInstrCost(
-                Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency,
+                I->getOpcode(), OpI->getType(),
+                TargetTransformInfo::TCK_Latency,
                 {TargetTransformInfo::OK_AnyValue,
                  TargetTransformInfo::OP_None},
                 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
             return It->second.NonPredCost + Scaled64::get(*OrCost.getValue());
           }
         }
+      }
 
       return Scaled64::getZero();
     }
@@ -548,12 +560,16 @@ getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue,
       V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
   }
 
-  if (isa<BinaryOperator>(SI.getI())) {
-    assert(SI.getI()->getOpcode() == Instruction::Or &&
-           "Only currently handling Or instructions.");
+  if (auto *BinOp = dyn_cast<BinaryOperator>(SI.getI())) {
+    assert((BinOp->getOpcode() == Instruction::Add ||
+            BinOp->getOpcode() == Instruction::Or ||
+            BinOp->getOpcode() == Instruction::Sub) &&
+           "Only currently handling Add, Or and Sub instructions.");
     V = SI.getFalseValue();
-    if (isTrue)
-      V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1));
+    if (isTrue) {
+      Constant *CI = ConstantInt::get(V->getType(), 1);
+      V = IB.CreateBinOp(BinOp->getOpcode(), V, CI);
+    }
   }
 
   assert(V && "Failed to get select true/false value");
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 71f9bbbbc35041..75eea291896ef7 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4678,14 +4678,20 @@ AArch64TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
 }
 
 bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
-  // For the binary operators (e.g. or) we need to be more careful than
-  // selects, here we only transform them if they are already at a natural
-  // break point in the code - the end of a block with an unconditional
-  // terminator.
-  if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&
-      isa<BranchInst>(I->getNextNode()) &&
-      cast<BranchInst>(I->getNextNode())->isUnconditional())
-    return true;
+  if (EnableOrLikeSelectOpt) {
+    // For the binary operators (e.g. or) we need to be more careful than
+    // selects, here we only transform them if they are already at a natural
+    // break point in the code - the end of a block with an unconditional
+    // terminator.
+    if (I->getOpcode() == Instruction::Or &&
+        isa<BranchInst>(I->getNextNode()) &&
+        cast<BranchInst>(I->getNextNode())->isUnconditional())
+      return true;
+
+    if (I->getOpcode() == Instruction::Add ||
+        I->getOpcode() == Instruction::Sub)
+      return true;
+  }
   return BaseT::shouldTreatInstructionLikeSelect(I);
 }
 
diff --git a/llvm/test/CodeGen/AArch64/selectopt-cast.ll b/llvm/test/CodeGen/AArch64/selectopt-cast.ll
index b1804a02ec1c82..3dfb72b299c2ea 100644
--- a/llvm/test/CodeGen/AArch64/selectopt-cast.ll
+++ b/llvm/test/CodeGen/AArch64/selectopt-cast.ll
@@ -7,16 +7,22 @@ define void @test_add_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
 ; CHECK-NEXT:    [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
 ; CHECK-NEXT:    [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
 ; CHECK-NEXT:    [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
 ; CHECK-NEXT:    [[DEC:%.*]] = zext i1 [[CMP3]] to i64
-; CHECK-NEXT:    [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
+; CHECK-NEXT:    [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[J]], 1
+; CHECK-NEXT:    br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
@@ -54,9 +60,9 @@ define void @test_add_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
 ; CHECK-NEXT:    [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
 ; CHECK-NEXT:    [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
@@ -64,7 +70,13 @@ define void @test_add_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
 ; CHECK-NEXT:    [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
 ; CHECK-NEXT:    [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
-; CHECK-NEXT:    [[J_NEXT]] = add nsw i64 [[J]], [[DEC]]
+; CHECK-NEXT:    [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[J]], 1
+; CHECK-NEXT:    br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
@@ -308,16 +320,22 @@ define void @test_sub_zext(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.star
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
 ; CHECK-NEXT:    [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
 ; CHECK-NEXT:    [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
 ; CHECK-NEXT:    [[L_J:%.*]] = load ptr, ptr [[GEP_J]], align 8
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
 ; CHECK-NEXT:    [[DEC:%.*]] = zext i1 [[CMP3]] to i64
-; CHECK-NEXT:    [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
+; CHECK-NEXT:    [[CMP3_FROZEN:%.*]] = freeze i1 [[CMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[J]], 1
+; CHECK-NEXT:    br i1 [[CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
@@ -355,9 +373,9 @@ define void @test_sub_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[HIGH:%.*]], [[ENTRY]] ], [ [[J_NEXT:%.*]], [[SELECT_END]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_START:%.*]], [[ENTRY]] ], [ [[J_NEXT]], [[SELECT_END]] ]
 ; CHECK-NEXT:    [[GEP_I:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
 ; CHECK-NEXT:    [[L_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
 ; CHECK-NEXT:    [[GEP_J:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
@@ -365,7 +383,13 @@ define void @test_sub_zext_not(ptr %dst, ptr %src, i64 %j.start, i64 %p, i64 %i.
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp ult ptr [[L_I]], [[L_J]]
 ; CHECK-NEXT:    [[NOT_CMP3:%.*]] = xor i1 [[CMP3]], true
 ; CHECK-NEXT:    [[DEC:%.*]] = zext i1 [[NOT_CMP3]] to i64
-; CHECK-NEXT:    [[J_NEXT]] = sub nsw i64 [[J]], [[DEC]]
+; CHECK-NEXT:    [[NOT_CMP3_FROZEN:%.*]] = freeze i1 [[NOT_CMP3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[J]], 1
+; CHECK-NEXT:    br i1 [[NOT_CMP3_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
+; CHECK:       select.false:
+; CHECK-NEXT:    br label [[SELECT_END]]
+; CHECK:       select.end:
+; CHECK-NEXT:    [[J_NEXT]] = phi i64 [ [[TMP0]], [[LOOP]] ], [ [[J]], [[SELECT_FALSE]] ]
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    store i64 [[J_NEXT]], ptr [[GEP_DST]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1

sapostolakis

Good idea. LGTM.

fhahn · 2024-11-08T15:12:00Z

This only enables it for AArch64, @sapostolakis not sure if you would like to look into enabling this for other architectures (I think the OR support is only enabled for AArch64 as well)

davemgreen · 2024-11-08T15:31:18Z

Do you have performance results for this change? I was trying in the past to try and keep it relatively conservative, as based on the predictability of the branch it can make things better or worse. (We found a case when it was worse recently). We have been looking at some other selects that could benefit from the transform, but I had expected they would only be generally profitable once you considered them part of a larger select group.

My understanding was that the Apple cpus did not use this option either. Do you intend to change that?

davemgreen · 2024-11-08T15:32:44Z

Oh and theoretically a zext i1 %c is select %c, 1, 0, and sext i1 %c is select %c, -1, 0, so the add/sub/or part might not be strictly necessary if they not there to control profitability. (But I'm not sure if that is a good idea or not. We rule out constants at other places in selectopt at the moment).

fhahn · 2024-11-11T17:56:08Z

Do you have performance results for this change? I was trying in the past to try and keep it relatively conservative, as based on the predictability of the branch it can make things better or worse. (We found a case when it was worse recently). We have been looking at some other selects that could benefit from the transform, but I had expected they would only be generally profitable once you considered them part of a larger select group.

My understanding was that the Apple cpus did not use this option either. Do you intend to change that?

I am currently working on enabling it for Apple CPUs and for some motivating cases we need support for Adds. The motivating cases are somewhat similar to the ones mentioned in #115745.

Overall enabling it with this change is neutral on SPEC and some other proprietary benchmarks. I agree that there is the risk that if we get it wrong the result can be pretty bad. But I don't think the Add case is different to the Or case and if anything the cost-model should be responsible for making the decision.

I expect that as we support more cases, we are likely hitting additional cases where the cost-model gets things wrong. What do you think is the best way to make progress here?

Building on top of llvm#115489 extend support for binops with SExt operand.

davemgreen · 2024-11-13T15:56:08Z

I am currently working on enabling it for Apple CPUs and for some motivating cases we need support for Adds. The motivating cases are somewhat similar to the ones mentioned in #115745.

That's good to hear. I was under the impression (without knowing any of the details) that they were at least less susceptible to the issues we see. Maybe they are present everywhere to some extent.

Overall enabling it with this change is neutral on SPEC and some other proprietary benchmarks. I agree that there is the risk that if we get it wrong the result can be pretty bad. But I don't think the Add case is different to the Or case and if anything the cost-model should be responsible for making the decision.

Yeah the add and or are very similar. We ruled out logical and/or in ca78b56, and recently prevented it from folding selects with two constant operands in 1789534. I also tried to make the or relatively constrained when I added it, only adding them where a branch already existed (a2d68b4). I am a bit suspicious of things that look like logic or arithmetic, as they are sometimes quicker as logic/arithmetic.

The problem with the cost model is that it doesn't know how predictable the branch is, and so doesn't have full info available. My understanding is that well-predictable loops can benefit greatly from converting the selects, but in larger codebases that are already heavy on the number of branches it can introduce extra overhead.

I expect that as we support more cases, we are likely hitting additional cases where the cost-model gets things wrong. What do you think is the best way to make progress here?

Honestly - My hope was that they would fix it in the hardware :) I ran this and it didn't look too bad, but I didn't look into any specific case deeply.

Do you have tests for commuted sub operands, where the first operand is the condition?

davemgreen · 2024-11-13T17:01:26Z

As an example of something that I thought of with adds, a i128 addition using i64 values has a zext and add that we would usually hope to transform into an adc. https://godbolt.org/z/nY1xfzE5M. That will not transform due to the cost model, and would need more going on to transform, but would be the kind of thing we might need to worry about.

sapostolakis · 2024-11-20T14:49:24Z

I agree with @fhahn 's point that Add case is no different than the Or case and if there is a regression, the cost modeling would probably need to be adjusted.
So, I am ok with letting this go through, especially given that Florian's testing does not show any regression.

@davemgreen you are right that the predictability of the branch is a missing aspect here that could lead to incorrect cost modeling. The cost model will skip converting to branches if the select is annotated as unpredictable (either with user annotations or populated by other passes; e.g., recently there was a discussion of feeding back perf data for mispredictions), but there is no similar heuristic for highly-predictable branches and if I am not mistaken not equivalent annotation (maybe we could add a predictable metadata and populate it based on user hints such as ABSL_PREDICT_TRUE or SPGO mispredict data). The default misprediction rate in the cost model is meant to be conservative and assumes some level of misprediction given that converting to a highly-mispredicting branch is a worse case scenario than a predictable select.

davemgreen · 2024-11-25T09:55:12Z

Hi - Yeah the heuristic is trying to do the best it can with the information it has, it just won't always be perfect. @igogo-x86 mentioned that this was something he was planning to add anyway, so it sounds OK as it will likely be added either here or in another patch. If it was me I might make it more conservative (maybe only do it if it was part of a larger select group), but I don't know of any cases where this will directly cause problems. The example above with i128 addition doesn't trigger on its own.

The sub issue still remains as I believe they will be treated like a commutative operation, so we would need to make sure it was the second operand that was the zext/sext.

Extra tests for llvm#115489 with different operand order. Also fixes the target triple.

Extra tests for #115489 with different operand order. Also fixes the target triple.

Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.

fhahn · 2024-11-28T09:16:51Z

Hi - Yeah the heuristic is trying to do the best it can with the information it has, it just won't always be perfect. @igogo-x86 mentioned that this was something he was planning to add anyway, so it sounds OK as it will likely be added either here or in another patch. If it was me I might make it more conservative (maybe only do it if it was part of a larger select group), but I don't know of any cases where this will directly cause problems. The example above with i128 addition doesn't trigger on its own.

The sub issue still remains as I believe they will be treated like a commutative operation, so we would need to make sure it was the second operand that was the zext/sext.

Thanks, updated the patch after @igogo-x86 latest patch landing again, and I think the operands are added in the correct order (e.g. as in test_sub_zext_first_op)?

Building on top of llvm#115489 extend support for binops with SExt operand.

alexfh · 2024-12-10T01:04:27Z

Heads up: we see a number of test failures on aarch64 after this commit. Might be a miscompilation. A reduced test case is in the works.

igogo-x86 · 2024-12-10T11:45:30Z

@alexfh Thanks! Could you check if this patch fixes the problem - #119362 ?

alexfh · 2024-12-11T21:29:28Z

Thanks for the fix! It resolves the issues we're seeing.

Building on top of #115489 extend support for binops with SExt operand. PR: #115879

Extra tests for llvm#115489 with different operand order. Also fixes the target triple. (cherry picked from commit f8f238d)

Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches. PR: llvm#115489 (cherry picked from commit 9a0f251)

Building on top of llvm#115489 extend support for binops with SExt operand. PR: llvm#115879 (cherry picked from commit c1f5937)

fhahn requested review from davemgreen and sapostolakis November 8, 2024 14:40

llvmbot added the backend:AArch64 label Nov 8, 2024

fhahn changed the title ~~[SelectOpt] Support add and sub with zext operands.~~ [SelectOpt] Support ADD and SUB with zext operands. Nov 8, 2024

sapostolakis approved these changes Nov 8, 2024

View reviewed changes

davemgreen requested a review from igogo-x86 November 8, 2024 15:26

fhahn force-pushed the select-opt-add-sub-cast branch from 31a831e to 7e5ca4e Compare November 12, 2024 14:18

fhahn added a commit to fhahn/llvm-project that referenced this pull request Nov 12, 2024

[SelectOpt] Support BinOps with SExt operands.

b67a1c3

Building on top of llvm#115489 extend support for binops with SExt operand.

fhahn mentioned this pull request Nov 12, 2024

[SelectOpt] Support BinOps with SExt operands. #115879

Merged

fhahn force-pushed the select-opt-add-sub-cast branch from 7e5ca4e to 3d0fde8 Compare November 28, 2024 09:07

fhahn added a commit to fhahn/llvm-project that referenced this pull request Nov 28, 2024

[AArch64] Add extra add/cast tests for select-optimize.

7841ba1

Extra tests for llvm#115489 with different operand order. Also fixes the target triple.

fhahn added a commit that referenced this pull request Nov 28, 2024

[AArch64] Add extra add/cast tests for select-optimize.

f8f238d

Extra tests for #115489 with different operand order. Also fixes the target triple.

fhahn added 2 commits November 28, 2024 09:15

[SelectOpt] Support add and sub with zext operands.

5dbb7ea

Extend the support for implicit selects in the form of OR with a ZExt operand to support ADD and SUB binops as well. They similarly can form implicit selects which can be profitable to convert back the branches.

!fixup rebase on top of latest main

76ce3b6

fhahn force-pushed the select-opt-add-sub-cast branch from 3d0fde8 to 76ce3b6 Compare November 28, 2024 09:15

fhahn merged commit 9a0f251 into llvm:main Nov 30, 2024
8 checks passed

fhahn deleted the select-opt-add-sub-cast branch November 30, 2024 21:05

fhahn added a commit to fhahn/llvm-project that referenced this pull request Dec 9, 2024

[SelectOpt] Support BinOps with SExt operands.

99a675d

Building on top of llvm#115489 extend support for binops with SExt operand.

fhahn added a commit that referenced this pull request Dec 17, 2024

[SelectOpt] Support BinOps with SExt operands. (#115879)

c1f5937

Building on top of #115489 extend support for binops with SExt operand. PR: #115879

fhahn added a commit to fhahn/llvm-project that referenced this pull request Feb 28, 2025

[AArch64] Add extra add/cast tests for select-optimize.

09b4a30

Extra tests for llvm#115489 with different operand order. Also fixes the target triple. (cherry picked from commit f8f238d)

fhahn added a commit to fhahn/llvm-project that referenced this pull request Feb 28, 2025

[SelectOpt] Support BinOps with SExt operands. (llvm#115879)

a5eebc8

Building on top of llvm#115489 extend support for binops with SExt operand. PR: llvm#115879 (cherry picked from commit c1f5937)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SelectOpt] Support ADD and SUB with zext operands. #115489

[SelectOpt] Support ADD and SUB with zext operands. #115489

Uh oh!

fhahn commented Nov 8, 2024

Uh oh!

llvmbot commented Nov 8, 2024

Uh oh!

sapostolakis left a comment

Uh oh!

fhahn commented Nov 8, 2024

Uh oh!

davemgreen commented Nov 8, 2024

Uh oh!

davemgreen commented Nov 8, 2024 •

edited

Loading

Uh oh!

fhahn commented Nov 11, 2024

Uh oh!

davemgreen commented Nov 13, 2024

Uh oh!

davemgreen commented Nov 13, 2024

Uh oh!

sapostolakis commented Nov 20, 2024

Uh oh!

davemgreen commented Nov 25, 2024

Uh oh!

fhahn commented Nov 28, 2024

Uh oh!

Uh oh!

alexfh commented Dec 10, 2024

Uh oh!

igogo-x86 commented Dec 10, 2024

Uh oh!

alexfh commented Dec 11, 2024

Uh oh!

Uh oh!

[SelectOpt] Support ADD and SUB with zext operands. #115489

[SelectOpt] Support ADD and SUB with zext operands. #115489

Uh oh!

Conversation

fhahn commented Nov 8, 2024

Uh oh!

llvmbot commented Nov 8, 2024

Uh oh!

sapostolakis left a comment

Choose a reason for hiding this comment

Uh oh!

fhahn commented Nov 8, 2024

Uh oh!

davemgreen commented Nov 8, 2024

Uh oh!

davemgreen commented Nov 8, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

fhahn commented Nov 11, 2024

Uh oh!

davemgreen commented Nov 13, 2024

Uh oh!

davemgreen commented Nov 13, 2024

Uh oh!

sapostolakis commented Nov 20, 2024

Uh oh!

davemgreen commented Nov 25, 2024

Uh oh!

fhahn commented Nov 28, 2024

Uh oh!

Uh oh!

alexfh commented Dec 10, 2024

Uh oh!

igogo-x86 commented Dec 10, 2024

Uh oh!

alexfh commented Dec 11, 2024

Uh oh!

Uh oh!

davemgreen commented Nov 8, 2024 •

edited

Loading