Skip to content

Commit de2b6cb

Browse files
authored
[InstCombine] Fold icmp over select of cmp more aggressively (#105536)
When folding an icmp into a select, treat an icmp of a constant with a one-use ucmp/scmp intrinsic as a simplification. These comparisons will reduce down to an icmp. This addresses a regression seen in Rust and also in llvm-opt-benchmark.
1 parent 67d3ef7 commit de2b6cb

File tree

2 files changed

+50
-14
lines changed

2 files changed

+50
-14
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4209,6 +4209,14 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
42094209
if (Op2)
42104210
CI = dyn_cast<ConstantInt>(Op2);
42114211

4212+
auto Simplifies = [&](Value *Op, unsigned Idx) {
4213+
// A comparison of ucmp/scmp with a constant will fold into an icmp.
4214+
const APInt *Dummy;
4215+
return Op ||
4216+
(isa<CmpIntrinsic>(SI->getOperand(Idx)) &&
4217+
SI->getOperand(Idx)->hasOneUse() && match(RHS, m_APInt(Dummy)));
4218+
};
4219+
42124220
// We only want to perform this transformation if it will not lead to
42134221
// additional code. This is true if either both sides of the select
42144222
// fold to a constant (in which case the icmp is replaced with a select
@@ -4219,7 +4227,7 @@ Instruction *InstCombinerImpl::foldSelectICmp(ICmpInst::Predicate Pred,
42194227
bool Transform = false;
42204228
if (Op1 && Op2)
42214229
Transform = true;
4222-
else if (Op1 || Op2) {
4230+
else if (Simplifies(Op1, 1) || Simplifies(Op2, 2)) {
42234231
// Local case
42244232
if (SI->hasOneUse())
42254233
Transform = true;

llvm/test/Transforms/InstCombine/select-cmp.ll

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -482,10 +482,9 @@ define i1 @test_select_inverse_nonconst4(i64 %x, i64 %y, i64 %z, i1 %cond) {
482482

483483
define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
484484
; CHECK-LABEL: @sel_icmp_two_cmp(
485-
; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
486-
; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
487-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
488-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
485+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
486+
; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
487+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
489488
; CHECK-NEXT: ret i1 [[CMP]]
490489
;
491490
%v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -498,10 +497,10 @@ define i1 @sel_icmp_two_cmp(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
498497
define i1 @sel_icmp_two_cmp_extra_use1(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
499498
; CHECK-LABEL: @sel_icmp_two_cmp_extra_use1(
500499
; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
501-
; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A3:%.*]], i32 [[A4:%.*]])
502500
; CHECK-NEXT: call void @use.i8(i8 [[V1]])
503-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V1]], i8 [[V2]]
504-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
501+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1]], [[A2]]
502+
; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[A3:%.*]], [[A4:%.*]]
503+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
505504
; CHECK-NEXT: ret i1 [[CMP]]
506505
;
507506
%v1 = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -544,6 +543,35 @@ define i1 @sel_icmp_two_cmp_not_const(i1 %c, i32 %a1, i32 %a2, i32 %a3, i32 %a4,
544543
ret i1 %cmp
545544
}
546545

546+
define <2 x i1> @sel_icmp_two_cmp_vec(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
547+
; CHECK-LABEL: @sel_icmp_two_cmp_vec(
548+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule <2 x i32> [[A1:%.*]], [[A2:%.*]]
549+
; CHECK-NEXT: [[CMP2:%.*]] = icmp sle <2 x i32> [[A3:%.*]], [[A4:%.*]]
550+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], <2 x i1> [[CMP1]], <2 x i1> [[CMP2]]
551+
; CHECK-NEXT: ret <2 x i1> [[CMP]]
552+
;
553+
%v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
554+
%v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
555+
%sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
556+
%cmp = icmp sle <2 x i8> %sel, zeroinitializer
557+
ret <2 x i1> %cmp
558+
}
559+
560+
define <2 x i1> @sel_icmp_two_cmp_vec_nonsplat(i1 %c, <2 x i32> %a1, <2 x i32> %a2, <2 x i32> %a3, <2 x i32> %a4) {
561+
; CHECK-LABEL: @sel_icmp_two_cmp_vec_nonsplat(
562+
; CHECK-NEXT: [[V1:%.*]] = call <2 x i8> @llvm.ucmp.v2i8.v2i32(<2 x i32> [[A1:%.*]], <2 x i32> [[A2:%.*]])
563+
; CHECK-NEXT: [[V2:%.*]] = call <2 x i8> @llvm.scmp.v2i8.v2i32(<2 x i32> [[A3:%.*]], <2 x i32> [[A4:%.*]])
564+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], <2 x i8> [[V1]], <2 x i8> [[V2]]
565+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[SEL]], <i8 1, i8 2>
566+
; CHECK-NEXT: ret <2 x i1> [[CMP]]
567+
;
568+
%v1 = call <2 x i8> @llvm.ucmp(<2 x i32> %a1, <2 x i32> %a2)
569+
%v2 = call <2 x i8> @llvm.scmp(<2 x i32> %a3, <2 x i32> %a4)
570+
%sel = select i1 %c, <2 x i8> %v1, <2 x i8> %v2
571+
%cmp = icmp sle <2 x i8> %sel, <i8 0, i8 1>
572+
ret <2 x i1> %cmp
573+
}
574+
547575
define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
548576
; CHECK-LABEL: @sel_icmp_cmp_and_simplify(
549577
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
@@ -559,9 +587,9 @@ define i1 @sel_icmp_cmp_and_simplify(i1 %c, i32 %a1, i32 %a2) {
559587

560588
define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
561589
; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify(
562-
; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
563-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[V]], i8 [[B:%.*]]
564-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
590+
; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
591+
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[B:%.*]], 1
592+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
565593
; CHECK-NEXT: ret i1 [[CMP]]
566594
;
567595
%v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)
@@ -572,9 +600,9 @@ define i1 @sel_icmp_cmp_and_no_simplify(i1 %c, i32 %a1, i32 %a2, i8 %b) {
572600

573601
define i1 @sel_icmp_cmp_and_no_simplify_comm(i1 %c, i32 %a1, i32 %a2, i8 %b) {
574602
; CHECK-LABEL: @sel_icmp_cmp_and_no_simplify_comm(
575-
; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A1:%.*]], i32 [[A2:%.*]])
576-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i8 [[B:%.*]], i8 [[V]]
577-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL]], 1
603+
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[B:%.*]], 1
604+
; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i32 [[A1:%.*]], [[A2:%.*]]
605+
; CHECK-NEXT: [[CMP:%.*]] = select i1 [[C:%.*]], i1 [[CMP1]], i1 [[CMP2]]
578606
; CHECK-NEXT: ret i1 [[CMP]]
579607
;
580608
%v = call i8 @llvm.ucmp(i32 %a1, i32 %a2)

0 commit comments

Comments
 (0)