Skip to content

Commit 2228b35

Browse files
committed
Revert "Revert "[InstCombine] Add oneuse checks to shr + cmp constant folds.""
This reverts commit d37b283. There was a simple logic bug in the else path. Tests codegen is different with the fix.
1 parent 46cb7e4 commit 2228b35

File tree

5 files changed

+11
-16
lines changed

5 files changed

+11
-16
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,7 +2451,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
24512451
// constant-value-based preconditions in the folds below, then we could assert
24522452
// those conditions rather than checking them. This is difficult because of
24532453
// undef/poison (PR34838).
2454-
if (IsAShr) {
2454+
if (IsAShr && Shr->hasOneUse()) {
24552455
if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
24562456
// When ShAmtC can be shifted losslessly:
24572457
// icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
@@ -2491,7 +2491,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
24912491
ConstantInt::getAllOnesValue(ShrTy));
24922492
}
24932493
}
2494-
} else {
2494+
} else if (!IsAShr) {
24952495
if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
24962496
// icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
24972497
// icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)

llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
22
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
33

4-
; This test is pre-committed to show sub-optimal codegen due to
5-
; min/max idiom breakage. On AArch64, these constants are also expensive to materialize,
4+
; Check we don't have sub-optimal codegen due to min/max idiom breakage.
5+
; On AArch64, these constants are also expensive to materialize,
66
; and therefore generate poor code vs maintaining the min/max idiom.
77

88
define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
99
; CHECK-LABEL: define i64 @dont_break_minmax_i64
1010
; CHECK-SAME: (i64 [[CONV:%.*]], i64 [[CONV2:%.*]]) {
1111
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV2]]
1212
; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[MUL]], 4
13-
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i64 [[MUL]], 5579712
14-
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i64 [[SHR]], i64 348731
13+
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i64 @llvm.smin.i64(i64 [[SHR]], i64 348731)
1514
; CHECK-NEXT: ret i64 [[SPEC_SELECT_I]]
1615
;
1716
%mul = mul nsw i64 %conv, %conv2

llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
900900
define i1 @ashrsgt_01_00_multiuse(i4 %x, ptr %p) {
901901
; CHECK-LABEL: @ashrsgt_01_00_multiuse(
902902
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
903-
; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X]], 1
903+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0
904904
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
905905
; CHECK-NEXT: ret i1 [[C]]
906906
;
@@ -2442,7 +2442,7 @@ define i1 @ashr_sle_noexact(i8 %x) {
24422442
define i1 @ashr_00_00_ashr_extra_use(i8 %x, ptr %ptr) {
24432443
; CHECK-LABEL: @ashr_00_00_ashr_extra_use(
24442444
; CHECK-NEXT: [[S:%.*]] = ashr exact i8 [[X:%.*]], 3
2445-
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 88
2445+
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[S]], 11
24462446
; CHECK-NEXT: store i8 [[S]], ptr [[PTR:%.*]], align 1
24472447
; CHECK-NEXT: ret i1 [[C]]
24482448
;

llvm/test/Transforms/InstCombine/icmp-shr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ define i1 @ashr_ult_2(i4 %x) {
780780
define i1 @ashr_ult_2_multiuse(i4 %x, ptr %p) {
781781
; CHECK-LABEL: @ashr_ult_2_multiuse(
782782
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
783-
; CHECK-NEXT: [[R:%.*]] = icmp ult i4 [[X]], 4
783+
; CHECK-NEXT: [[R:%.*]] = icmp ult i4 [[S]], 2
784784
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
785785
; CHECK-NEXT: ret i1 [[R]]
786786
;

llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ define i32 @testa(i32 %mul) {
55
; CHECK-LABEL: define i32 @testa(
66
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
8-
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i32 [[MUL]], 1073741824
9-
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i32 [[SHR]], i32 32767
8+
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
109
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
1110
;
1211
%shr = ashr i32 %mul, 15
@@ -20,11 +19,8 @@ define i32 @testb(i32 %mul) {
2019
; CHECK-LABEL: define i32 @testb(
2120
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] {
2221
; CHECK-NEXT: [[SHR102:%.*]] = ashr i32 [[MUL]], 7
23-
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[MUL]], 16383
24-
; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP4_I]], i32 127, i32 -128
25-
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[MUL]], 16384
26-
; CHECK-NEXT: [[CLEANUP_DEST_SLOT_0_I:%.*]] = icmp ult i32 [[TMP1]], 32768
27-
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CLEANUP_DEST_SLOT_0_I]], i32 [[SHR102]], i32 [[RETVAL_0_I]]
22+
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128)
23+
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
2824
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
2925
;
3026
%shr102 = ashr i32 %mul, 7

0 commit comments

Comments
 (0)