-
Notifications
You must be signed in to change notification settings - Fork 14.4k
GVN: strip bad TODO, FIXME (NFC) #111416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GVN: strip bad TODO, FIXME (NFC) #111416
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesimpliesEquivalenceIfTrue and impliesEquivalenceIfFalse can be extended to aggregates easily, although as the added tests show, the codepath cannot be exercised: GVN does not propogate values through a vector-select. Generalize the code anyway, as it opens up opportunities for optimizing GVN. While at it, note that the optimization is invalid for the no-signed-zeros case, and strip the bad FIXME. Alive2 proof: https://alive2.llvm.org/ce/z/vEaK8M -- 8< -- Patch is 214.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111416.diff 60 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 2ba600497e00d3..315ffe4c041a00 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -1998,19 +1998,17 @@ static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
// +0.0 vs 0.0 for all operators
if (Cmp->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
(Cmp->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
- Cmp->getFastMathFlags().noNaNs())) {
+ Cmp->hasNoNaNs())) {
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
// If we can prove either side non-zero, then equality must imply
// equivalence.
- // FIXME: We should do this optimization if 'no signed zeros' is
- // applicable via an instruction-level fast-math-flag or some other
- // indicator that relaxed FP semantics are being used.
- if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
- return true;
- if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
- return true;
- // TODO: Handle vector floating point constants
+ auto *ConstLHS = dyn_cast<Constant>(LHS),
+ *ConstRHS = dyn_cast<Constant>(RHS);
+ if (auto *Const = ConstLHS ? ConstLHS : ConstRHS) {
+ if (!Const->isZeroValue())
+ return true;
+ }
}
return false;
}
@@ -2023,20 +2021,18 @@ static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
// NaNs for unordered operators
// +0.0 vs 0.0 for all operators
if ((Cmp->getPredicate() == CmpInst::Predicate::FCMP_ONE &&
- Cmp->getFastMathFlags().noNaNs()) ||
+ Cmp->hasNoNaNs()) ||
Cmp->getPredicate() == CmpInst::Predicate::FCMP_UNE) {
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
// If we can prove either side non-zero, then equality must imply
// equivalence.
- // FIXME: We should do this optimization if 'no signed zeros' is
- // applicable via an instruction-level fast-math-flag or some other
- // indicator that relaxed FP semantics are being used.
- if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
- return true;
- if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
- return true;
- // TODO: Handle vector floating point constants
+ auto *ConstLHS = dyn_cast<Constant>(LHS),
+ *ConstRHS = dyn_cast<Constant>(RHS);
+ if (auto *Const = ConstLHS ? ConstLHS : ConstRHS) {
+ if (!Const->isZeroValue())
+ return true;
+ }
}
return false;
}
diff --git a/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 9c720049bd84a0..9b196eb0a3350e 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -1,15 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=gvn -S | FileCheck %s
- %struct.INT2 = type { i32, i32 }
+%struct.INT2 = type { i32, i32 }
@blkshifts = external global ptr ; <ptr> [#uses=2]
define i32 @xcompact() {
+; CHECK-LABEL: define i32 @xcompact() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: store ptr null, ptr @blkshifts, align 4
+; CHECK-NEXT: br label %[[BB:.*]]
+; CHECK: [[BB]]:
+; CHECK-NEXT: br label %[[BB]]
+;
entry:
- store ptr null, ptr @blkshifts, align 4
- br label %bb
+ store ptr null, ptr @blkshifts, align 4
+ br label %bb
bb: ; preds = %bb, %entry
- %tmp10 = load ptr, ptr @blkshifts, align 4 ; <ptr> [#uses=0]
-; CHECK-NOT: %tmp10
- br label %bb
+ %tmp10 = load ptr, ptr @blkshifts, align 4 ; <ptr> [#uses=0]
+ br label %bb
}
diff --git a/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index e8fb8f1087cce9..4c264c1a76d4a6 100644
--- a/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -1,40 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=gvn -S | FileCheck %s
@last = external global [65 x ptr]
define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: define i32 @NextRootMove(
+; CHECK-SAME: i32 [[WTM:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[TMP17618:%.*]] = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
+; CHECK-NEXT: store ptr [[TMP17618]], ptr [[A]], align 8
+; CHECK-NEXT: br label %[[COND_TRUE116:.*]]
+; CHECK: [[COND_TRUE116]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[COND_TRUE128:.*]], label %[[COND_TRUE145:.*]]
+; CHECK: [[COND_TRUE128]]:
+; CHECK-NEXT: store ptr [[TMP17618]], ptr [[A]], align 8
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], [[Z]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[BB98_BACKEDGE:.*]], label %[[RETURN_LOOPEXIT:.*]]
+; CHECK: [[BB98_BACKEDGE]]:
+; CHECK-NEXT: br label %[[COND_TRUE116]]
+; CHECK: [[COND_TRUE145]]:
+; CHECK-NEXT: store ptr [[TMP17618]], ptr [[A]], align 8
+; CHECK-NEXT: br i1 false, label %[[COND_TRUE145_BB98_BACKEDGE_CRIT_EDGE:.*]], label %[[RETURN_LOOPEXIT]]
+; CHECK: [[COND_TRUE145_BB98_BACKEDGE_CRIT_EDGE]]:
+; CHECK-NEXT: br label %[[BB98_BACKEDGE]]
+; CHECK: [[RETURN_LOOPEXIT]]:
+; CHECK-NEXT: ret i32 0
+;
entry:
- %A = alloca ptr
- %tmp17618 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
- store ptr %tmp17618, ptr %A
-; CHECK: entry:
-; CHECK-NEXT: alloca ptr
-; CHECK-NEXT: %tmp17618 = load
-; CHECK-NOT: load
-; CHECK-NOT: phi
- br label %cond_true116
+ %A = alloca ptr
+ %tmp17618 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
+ store ptr %tmp17618, ptr %A
+ br label %cond_true116
cond_true116:
- %cmp = icmp eq i32 %x, %y
- br i1 %cmp, label %cond_true128, label %cond_true145
+ %cmp = icmp eq i32 %x, %y
+ br i1 %cmp, label %cond_true128, label %cond_true145
cond_true128:
- %tmp17625 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
- store ptr %tmp17625, ptr %A
- %cmp1 = icmp eq i32 %x, %z
- br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
+ %tmp17625 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
+ store ptr %tmp17625, ptr %A
+ %cmp1 = icmp eq i32 %x, %z
+ br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
bb98.backedge:
- br label %cond_true116
+ br label %cond_true116
cond_true145:
- %tmp17631 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
- store ptr %tmp17631, ptr %A
- br i1 false, label %bb98.backedge, label %return.loopexit
+ %tmp17631 = load ptr, ptr getelementptr ([65 x ptr], ptr @last, i32 0, i32 1), align 4
+ store ptr %tmp17631, ptr %A
+ br i1 false, label %bb98.backedge, label %return.loopexit
return.loopexit:
- br label %return
+ br label %return
return:
- ret i32 0
+ ret i32 0
}
diff --git a/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll b/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
index 6abdc122cd45ff..9fcbd581271a83 100644
--- a/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
+++ b/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
@@ -1,44 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=gvn -S | FileCheck %s
- %struct..0anon = type { i32 }
- %struct.FILE = type { ptr, i32, i32, i16, i16, %struct.__sbuf, i32, ptr, ptr, ptr, ptr, ptr, %struct.__sbuf, ptr, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
- %struct.__sFILEX = type opaque
- %struct.__sbuf = type { ptr, i32 }
- %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+%struct..0anon = type { i32 }
+%struct.FILE = type { ptr, i32, i32, i16, i16, %struct.__sbuf, i32, ptr, ptr, ptr, ptr, ptr, %struct.__sbuf, ptr, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { ptr, i32 }
+%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
@n_spills = external global i32 ; <ptr> [#uses=2]
define i32 @reload(ptr %first, i32 %global, ptr %dumpfile) {
+; CHECK-LABEL: define i32 @reload(
+; CHECK-SAME: ptr [[FIRST:%.*]], i32 [[GLOBAL:%.*]], ptr [[DUMPFILE:%.*]]) {
+; CHECK-NEXT: [[COND_NEXT2835_1:.*:]]
+; CHECK-NEXT: br label %[[BB2928:.*]]
+; CHECK: [[BB2928]]:
+; CHECK-NEXT: br i1 false, label %[[BB2928_COND_NEXT2943_CRIT_EDGE:.*]], label %[[COND_TRUE2935:.*]]
+; CHECK: [[BB2928_COND_NEXT2943_CRIT_EDGE]]:
+; CHECK-NEXT: br label %[[COND_NEXT2943:.*]]
+; CHECK: [[COND_TRUE2935]]:
+; CHECK-NEXT: br label %[[COND_NEXT2943]]
+; CHECK: [[COND_NEXT2943]]:
+; CHECK-NEXT: br i1 false, label %[[BB2982_PREHEADER:.*]], label %[[BB2928]]
+; CHECK: [[BB2982_PREHEADER]]:
+; CHECK-NEXT: [[TMP298316:%.*]] = load i32, ptr @n_spills, align 4
+; CHECK-NEXT: ret i32 [[TMP298316]]
+;
cond_next2835.1: ; preds = %cond_next2861
- %tmp2922 = load i32, ptr @n_spills, align 4 ; <i32> [#uses=0]
- br label %bb2928
+ %tmp2922 = load i32, ptr @n_spills, align 4 ; <i32> [#uses=0]
+ br label %bb2928
bb2928: ; preds = %cond_next2835.1, %cond_next2943
- br i1 false, label %cond_next2943, label %cond_true2935
+ br i1 false, label %cond_next2943, label %cond_true2935
cond_true2935: ; preds = %bb2928
- br label %cond_next2943
+ br label %cond_next2943
cond_next2943: ; preds = %cond_true2935, %bb2928
- br i1 false, label %bb2982.preheader, label %bb2928
+ br i1 false, label %bb2982.preheader, label %bb2928
bb2982.preheader: ; preds = %cond_next2943
- %tmp298316 = load i32, ptr @n_spills, align 4 ; <i32> [#uses=0]
- ret i32 %tmp298316
-
+ %tmp298316 = load i32, ptr @n_spills, align 4 ; <i32> [#uses=0]
+ ret i32 %tmp298316
}
-; CHECK: define i32 @reload(ptr %first, i32 %global, ptr %dumpfile) {
-; CHECK-NEXT: cond_next2835.1:
-; CHECK-NEXT: br label %bb2928
-; CHECK: bb2928:
-; CHECK-NEXT: br i1 false, label %bb2928.cond_next2943_crit_edge, label %cond_true2935
-; CHECK: bb2928.cond_next2943_crit_edge:
-; CHECK-NEXT: br label %cond_next2943
-; CHECK: cond_true2935:
-; CHECK-NEXT: br label %cond_next2943
-; CHECK: cond_next2943:
-; CHECK-NEXT: br i1 false, label %bb2982.preheader, label %bb2928
-; CHECK: bb2982.preheader:
-; CHECK-NEXT: %tmp298316 = load i32, ptr @n_spills, align 4
-; CHECK-NEXT: ret i32 %tmp298316
-; CHECK-NEXT: }
diff --git a/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index b56ccc128bd6bf..dd94f09a11be23 100644
--- a/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -1,6 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=gvn -S | FileCheck %s
- %struct.anon = type { ptr, i32, i32, [3 x i32], ptr, ptr, ptr }
+%struct.anon = type { ptr, i32, i32, [3 x i32], ptr, ptr, ptr }
@debug = external constant i32 ; <ptr> [#uses=0]
@counters = external constant i32 ; <ptr> [#uses=1]
@trialx = external global [17 x i32] ; <ptr> [#uses=1]
@@ -133,182 +134,294 @@ declare i32 @increment()
declare i32 @search()
define i32 @main(i32 %argc, ptr %argv) {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: i32 [[ARGC:%.*]], ptr [[ARGV:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[NUM_SOL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TOTAL:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+; CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+; CHECK-NEXT: store i32 0, ptr [[NUM_SOL]], align 4
+; CHECK-NEXT: store i32 1, ptr @numi, align 4
+; CHECK-NEXT: br label %[[BB91:.*]]
+; CHECK: [[BB:.*]]:
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 (ptr, ...) @printf(ptr @.str43, i32 [[TMP1:%.*]])
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB13:.*]]
+; CHECK: [[BB4:.*]]:
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [17 x i32], ptr @trialx, i32 0, i32 [[TMP11:%.*]]
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = call i32 @userfun(i32 [[TMP8]])
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [17 x i32], ptr @correct_result, i32 0, i32 [[TMP11]]
+; CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1
+; CHECK-NEXT: store i32 [[TMP12]], ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB13]]
+; CHECK: [[BB13]]:
+; CHECK-NEXT: [[TMP11]] = phi i32 [ [[TMP12]], %[[BB4]] ], [ 0, %[[BB]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp sle i32 [[TMP11]], 16
+; CHECK-NEXT: [[TMP1516:%.*]] = zext i1 [[TMP15]] to i32
+; CHECK-NEXT: br i1 [[TMP15]], label %[[BB4]], label %[[BB17:.*]]
+; CHECK: [[BB17]]:
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB49:.*]]
+; CHECK: [[BB18:.*]]:
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [5 x { i32, [3 x i32] }], ptr @pgm, i32 0, i32 [[TMP47:%.*]]
+; CHECK-NEXT: store i32 0, ptr [[TMP20]], align 4
+; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr getelementptr inbounds (i8, ptr @isa, i64 16), align 4
+; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { i32, [3 x i32] }, ptr [[TMP20]], i32 0, i32 1
+; CHECK-NEXT: store i32 [[TMP26]], ptr [[TMP28]], align 4
+; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr getelementptr inbounds (i8, ptr @isa, i64 20), align 4
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr [3 x i32], ptr [[TMP28]], i32 0, i32 1
+; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP37]], align 4
+; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr getelementptr inbounds (i8, ptr @isa, i64 24), align 4
+; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [3 x i32], ptr [[TMP28]], i32 0, i32 2
+; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP45]], align 4
+; CHECK-NEXT: call void @fix_operands(i32 [[TMP47]])
+; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1
+; CHECK-NEXT: store i32 [[TMP48]], ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB49]]
+; CHECK: [[BB49]]:
+; CHECK-NEXT: [[TMP47]] = phi i32 [ [[TMP48]], %[[BB18]] ], [ 0, %[[BB17]] ]
+; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr @numi, align 4
+; CHECK-NEXT: [[TMP52:%.*]] = icmp slt i32 [[TMP47]], [[TMP50]]
+; CHECK-NEXT: [[TMP5253:%.*]] = zext i1 [[TMP52]] to i32
+; CHECK-NEXT: br i1 [[TMP52]], label %[[BB18]], label %[[BB55:.*]]
+; CHECK: [[BB55]]:
+; CHECK-NEXT: [[TMP56:%.*]] = call i32 @search()
+; CHECK-NEXT: store i32 [[TMP56]], ptr [[NUM_SOL]], align 4
+; CHECK-NEXT: [[TMP59:%.*]] = call i32 (ptr, ...) @printf(ptr @.str44, i32 [[TMP56]])
+; CHECK-NEXT: [[TMP60:%.*]] = load i32, ptr @counters, align 4
+; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0
+; CHECK-NEXT: [[TMP6162:%.*]] = zext i1 [[TMP61]] to i32
+; CHECK-NEXT: br i1 [[TMP61]], label %[[COND_TRUE:.*]], label %[[COND_NEXT:.*]]
+; CHECK: [[COND_TRUE]]:
+; CHECK-NEXT: store i32 0, ptr [[TOTAL]], align 4
+; CHECK-NEXT: [[TMP65:%.*]] = call i32 (ptr, ...) @printf(ptr @.str45)
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB79:.*]]
+; CHECK: [[BB66:.*]]:
+; CHECK-NEXT: [[TMP68:%.*]] = getelementptr [5 x i32], ptr @counter, i32 0, i32 [[TMP77:%.*]]
+; CHECK-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4
+; CHECK-NEXT: [[TMP71:%.*]] = call i32 (ptr, ...) @printf(ptr @.str46, i32 [[TMP69]])
+; CHECK-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP68]], align 4
+; CHECK-NEXT: [[TMP76:%.*]] = add i32 [[TMP74]], [[TMP75:%.*]]
+; CHECK-NEXT: store i32 [[TMP76]], ptr [[TOTAL]], align 4
+; CHECK-NEXT: [[TMP78:%.*]] = add i32 [[TMP77]], 1
+; CHECK-NEXT: store i32 [[TMP78]], ptr [[I]], align 4
+; CHECK-NEXT: br label %[[BB79]]
+; CHECK: [[BB79]]:
+; CHECK-NEXT: [[TMP75]] = phi i32 [ [[TMP76]], %[[BB66]] ], [ 0, %[[COND_TRUE]] ]
+; CHECK-NEXT: [[TMP77]] = phi i32 [ [[TMP78]], %[[BB66]] ], [ 0, %[[COND_TRUE]] ]
+; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr @numi, align 4
+; CHECK-NEXT: [[TMP82:%.*]] = icmp slt i32 [[TMP77]], [[TMP80]]
+; CHECK-NEXT: [[TMP8283:%.*]] = zext i1 [[TMP82]] to i32
+; CHECK-NEXT: br i1 [[TMP82]], label %[[BB66]], label %[[BB85:.*]]
+; CHECK: [[BB85]]:
+; CHECK-NEXT: [[TMP88:%.*]] = call i32 (ptr, ...) @printf(ptr @.str47, i32 [[TMP75]])
+; CHECK-NEXT: br label %[[COND_NEXT]]
+; CHECK: [[COND_NEXT]]:
+; CHECK-NEXT: [[TMP89:%.*]] = load i32, ptr @numi, align 4
+; CHECK-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 1
+; CHECK-NEXT: store i32 [[TMP90]], ptr @numi, align 4
+; CHECK-NEXT: br label %[[BB91]]
+; CHECK: [[BB91]]:
+; CHECK-NEXT: [[TMP98:%.*]] = phi i32 [ [[TMP56]], %[[COND_NEXT]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP90]], %[[COND_NEXT]] ], [ 1, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP93:%.*]] = icmp sgt i32 [[TMP1]], 5
+; CHECK-NEXT: [[TMP9394:%.*]] = zext i1 [[TMP93]] to i32
+; CHECK-NEXT: br i1 [[TMP93]], label %[[COND_TRUE96:.*]], label %[[COND_NEXT97:.*]]
+; CHECK: [[COND_TRUE96]]:
+; CHECK-NEXT: br label %[[BB102:.*]]
+; CHECK: [[COND_NEXT97]]:
+; CHECK-NEXT: [[TMP99:%.*]] = icmp eq i32 [[TMP98]], 0
+; CHECK-NEXT: [[TMP99100:%.*]] = zext i1 [[TMP99]] to i32
+; CHECK-NEXT: br i1 [[TMP99]], label %[[BB]], label %[[BB102]]
+; CHECK: [[BB102]]:
+; CHECK-NEXT: store i32 0, ptr [[TMP]], align 4
+; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
+; CHECK-NEXT: ret i32 0
+;
entry:
- %argc_addr = alloca i32 ; <ptr> [#uses=1]
- %argv_addr = alloca ptr ; <ptr> [#uses=1]
- %retval = alloca i32, align 4 ; <ptr> [#uses=2]
- %tmp = alloca i32, align 4 ; <ptr> [#uses=2]
- %i = alloca i32, align 4 ; <ptr> [#uses=21]
- %num_sol = alloca i32, align 4 ; <ptr> [#uses=4]
- %total = alloca i32, align 4 ; <ptr> [#uses=4]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- store i32 %argc, ptr %argc_addr
- store ptr %argv, ptr %argv_addr
- store i32 0, ptr %num_sol
- store i32 1, ptr @numi
- br label %bb91
+ %argc_addr = alloca i32 ; <ptr> [#uses=1]
+ %argv_addr = alloca ptr ; <ptr> [#uses=1]
+ %retval = alloca i32, align 4 ; <ptr> [#uses=2]
+ %tmp = alloca i32, align 4 ; <ptr> [#uses=2]
+ %i = alloca i32, align 4 ; <ptr> [#uses=21]
+ %num_sol = alloca i32, align 4 ; <ptr> [#uses=4]
+ %total = alloca i32, align 4 ; <ptr> [#uses=4]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 %argc, ptr %argc_addr
+ store ptr %argv, ptr %argv_addr
+ store i32 0, ptr %num_sol
+ store i32 1, ptr @numi
+ br label %bb91
bb: ; preds = %cond_next97
- %tmp1 = load i32, ptr @numi ; <i32> [#uses=1]
- %tmp2 = getelementptr [44 x i8], ptr @.str43, i32 0, i32 0 ; <ptr> [#uses=1]
- %tmp3 = call i32 (ptr, ...) @printf( ptr %tmp2, i32 %tmp1 ) ; <i32> [#uses=0]
- store i32 0, ptr %i
- br label %bb13
+ %tmp1 = load i32, ptr @numi ; <i32> [#uses=1]
+ %tmp2 = getelementptr [44 x i8], ptr @.str43, i32 0, i32 0 ; <ptr> [#uses=1]
+ %tmp3 = call i32 (ptr, ...) @printf( ptr %tmp2, i32 %tmp1 ) ; <i32> [#uses=0]
+ store i32 0, ptr %i
+ br label %bb13
bb4: ; preds = %bb13
- %tmp5 = load i32, ptr %i ; <i32> [#uses=1]
- %tmp6 = load i32, ptr %i ; <i32> [#uses=1]
- %tmp7 = getelementptr [17 x i32], ptr @trialx, i32 0, i32 %tmp6 ; <ptr> [#uses=1]
- %tmp8 = load i32, ptr %tmp7 ; <i32> [#uses=1]
- %tmp9 = call i32 @userfun( i32 %tmp8 ) ; <i32> [#uses=1]
- %tmp10 = getelementptr [17 x i32], ptr @correct_result, i32 0, i32 %tmp5 ; <ptr> [#uses=1]
- store i32 %tmp9, ptr %tmp10
- %tmp11 = load i32, ptr %i ; <i32> [#uses=1]
- %tmp12 = add i32 %tmp11, 1 ; <i32> [#uses=1]
- store i32 %tmp12, ptr %i
- br label %bb13
+ %tmp5 = load i32, ptr ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
a02cbcf
to
6b9f044
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that doing the transform with nsz wouldn't be valid, given its peculiar semantics.
However, !isZeroValue
does not imply that the value isn't zero. It could either be a vector that has zero elements (but is not entirely zero) or a constant expression that evaluates to zero.
Additionally, when it comes to replacements based on vector comparisons you have to be quite careful, because they are only legal if no cross-lane operations are involved.
If we wanted to do the select fold, we'd do this in InstCombine rather than GVN. InstCombine already does this for integer comparisons, but not float comparisons.
In impliesEquivalenceIfTrue and impliesEquivalenceIfFalse, note that the optimization is invalid for the no-signed-zeros case: strip the bad FIXME. Also note that, a ConstantVector would be handled by select(fcmp()) pattern in InstCombine, and GVN is not the right place for the optimization: strip the bad TODO. Alive2 proof: https://alive2.llvm.org/ce/z/vEaK8M
Not sure what you mean, because InstCombine doesn't seem to work here: https://godbolt.org/z/5b36ccK9j. Proof: https://alive2.llvm.org/ce/z/Lr6jqc. If we change the |
See
|
6b9f044
to
791468d
Compare
I hope I've addressed the points correctly now. Currently working on a patch for InstCombine. |
In impliesEquivalenceIfTrue and impliesEquivalenceIfFalse, note that the optimization is invalid for the no-signed-zeros case: strip the bad FIXME. Also note that, a ConstantVector would be handled by select(fcmp()) pattern in InstCombine, and GVN is not the right place for the optimization: strip the bad TODO.
Alive2 proof: https://alive2.llvm.org/ce/z/vEaK8M