Skip to content

Commit cc2fafa

Browse files
authored
[InstSimplify] Add constant folding support for ucmp/scmp intrinsics (#93730)
This PR adds support for folding calls to `ucmp`/`scmp` intrinsics with constant arguments.
1 parent a8e03ae commit cc2fafa

File tree

2 files changed

+115
-0
lines changed

2 files changed

+115
-0
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15041504
case Intrinsic::smin:
15051505
case Intrinsic::umax:
15061506
case Intrinsic::umin:
1507+
case Intrinsic::scmp:
1508+
case Intrinsic::ucmp:
15071509
case Intrinsic::sadd_with_overflow:
15081510
case Intrinsic::uadd_with_overflow:
15091511
case Intrinsic::ssub_with_overflow:
@@ -2773,6 +2775,21 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
27732775
? *C0
27742776
: *C1);
27752777

2778+
case Intrinsic::scmp:
2779+
case Intrinsic::ucmp:
2780+
if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
2781+
return PoisonValue::get(Ty);
2782+
2783+
if (!C0 || !C1)
2784+
return ConstantInt::get(Ty, 0);
2785+
2786+
int Res;
2787+
if (IntrinsicID == Intrinsic::scmp)
2788+
Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
2789+
else
2790+
Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
2791+
return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
2792+
27762793
case Intrinsic::usub_with_overflow:
27772794
case Intrinsic::ssub_with_overflow:
27782795
// X - undef -> { 0, false }
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
3+
4+
define i8 @scmp_lt() {
5+
; CHECK-LABEL: define i8 @scmp_lt() {
6+
; CHECK-NEXT: ret i8 -1
7+
;
8+
%1 = call i8 @llvm.scmp(i32 -7, i32 3)
9+
ret i8 %1
10+
}
11+
12+
define i8 @scmp_eq() {
13+
; CHECK-LABEL: define i8 @scmp_eq() {
14+
; CHECK-NEXT: ret i8 0
15+
;
16+
%1 = call i8 @llvm.scmp(i32 2, i32 2)
17+
ret i8 %1
18+
}
19+
20+
define i8 @scmp_gt() {
21+
; CHECK-LABEL: define i8 @scmp_gt() {
22+
; CHECK-NEXT: ret i8 1
23+
;
24+
%1 = call i8 @llvm.scmp(i32 2, i32 -7)
25+
ret i8 %1
26+
}
27+
28+
define i8 @ucmp_lt() {
29+
; CHECK-LABEL: define i8 @ucmp_lt() {
30+
; CHECK-NEXT: ret i8 -1
31+
;
32+
%1 = call i8 @llvm.ucmp(i32 7, i32 12)
33+
ret i8 %1
34+
}
35+
36+
define i2 @ucmp_eq() {
37+
; CHECK-LABEL: define i2 @ucmp_eq() {
38+
; CHECK-NEXT: ret i2 0
39+
;
40+
%1 = call i2 @llvm.ucmp(i32 12, i32 12)
41+
ret i2 %1
42+
}
43+
44+
define i100 @ucmp_gt() {
45+
; CHECK-LABEL: define i100 @ucmp_gt() {
46+
; CHECK-NEXT: ret i100 1
47+
;
48+
%1 = call i100 @llvm.ucmp(i32 7, i32 3)
49+
ret i100 %1
50+
}
51+
52+
define i8 @ucmp_poison() {
53+
; CHECK-LABEL: define i8 @ucmp_poison() {
54+
; CHECK-NEXT: ret i8 poison
55+
;
56+
%1 = call i8 @llvm.ucmp(i32 poison, i32 5)
57+
ret i8 %1
58+
}
59+
60+
define i8 @scmp_poison() {
61+
; CHECK-LABEL: define i8 @scmp_poison() {
62+
; CHECK-NEXT: ret i8 poison
63+
;
64+
%1 = call i8 @llvm.scmp(i32 0, i32 poison)
65+
ret i8 %1
66+
}
67+
68+
define i8 @scmp_undef() {
69+
; CHECK-LABEL: define i8 @scmp_undef() {
70+
; CHECK-NEXT: ret i8 0
71+
;
72+
%1 = call i8 @llvm.scmp(i32 undef, i32 -12)
73+
ret i8 %1
74+
}
75+
76+
define i8 @ucmp_undef() {
77+
; CHECK-LABEL: define i8 @ucmp_undef() {
78+
; CHECK-NEXT: ret i8 0
79+
;
80+
%1 = call i8 @llvm.ucmp(i32 2, i32 undef)
81+
ret i8 %1
82+
}
83+
84+
define <4 x i8> @ucmp_lt_splat() {
85+
; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
86+
; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
87+
;
88+
%1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
89+
ret <4 x i8> %1
90+
}
91+
92+
define <4 x i8> @scmp_nonsplat() {
93+
; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
94+
; CHECK-NEXT: ret <4 x i8> <i8 1, i8 0, i8 1, i8 -1>
95+
;
96+
%1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
97+
ret <4 x i8> %1
98+
}

0 commit comments

Comments
 (0)