-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstSimplify] Add constant folding support for ucmp
/scmp
intrinsics
#93730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (Poseydon42) ChangesThis PR adds support for folding calls to Full diff: https://github.com/llvm/llvm-project/pull/93730.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 046a769453808..c11d3032020f4 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1506,6 +1506,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -2764,6 +2766,32 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
? *C0
: *C1);
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ if (ICmpInst::compare(*C0, *C1,
+ IntrinsicID == Intrinsic::scmp
+ ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT)) {
+ return ConstantInt::get(Ty, -1, true);
+ }
+ if (ICmpInst::compare(*C0, *C1, ICmpInst::ICMP_EQ)) {
+ return ConstantInt::get(Ty, 0);
+ }
+ if (ICmpInst::compare(*C0, *C1,
+ IntrinsicID == Intrinsic::scmp
+ ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT)) {
+ return ConstantInt::get(Ty, 1);
+ }
+
+ assert(false && "Integer values must compare as equal, or one must be less than the other");
+
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - undef -> { 0, false }
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
new file mode 100644
index 0000000000000..c4e9f7bd54fd7
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+define i8 @scmp_lt() {
+; CHECK-LABEL: define i8 @scmp_lt() {
+; CHECK-NEXT: ret i8 -1
+;
+ %1 = call i8 @llvm.scmp(i32 -7, i32 3)
+ ret i8 %1
+}
+
+define i2 @ucmp_eq() {
+; CHECK-LABEL: define i2 @ucmp_eq() {
+; CHECK-NEXT: ret i2 0
+;
+ %1 = call i2 @llvm.ucmp(i32 12, i32 12)
+ ret i2 %1
+}
+
+define i100 @ucmp_gt() {
+; CHECK-LABEL: define i100 @ucmp_gt() {
+; CHECK-NEXT: ret i100 1
+;
+ %1 = call i100 @llvm.ucmp(i32 7, i32 3)
+ ret i100 %1
+}
+
+define <4 x i8> @ucmp_lt_splat() {
+; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
+; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
+ ret <4 x i8> %1
+}
+
+define <4 x i8> @scmp_nonsplat() {
+; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
+; CHECK-NEXT: ret <4 x i8> <i8 1, i8 0, i8 1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
+ ret <4 x i8> %1
+}
|
@llvm/pr-subscribers-llvm-analysis Author: None (Poseydon42) ChangesThis PR adds support for folding calls to Full diff: https://github.com/llvm/llvm-project/pull/93730.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 046a769453808..c11d3032020f4 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1506,6 +1506,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -2764,6 +2766,32 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
? *C0
: *C1);
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp:
+ if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+ return PoisonValue::get(Ty);
+
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ if (ICmpInst::compare(*C0, *C1,
+ IntrinsicID == Intrinsic::scmp
+ ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT)) {
+ return ConstantInt::get(Ty, -1, true);
+ }
+ if (ICmpInst::compare(*C0, *C1, ICmpInst::ICMP_EQ)) {
+ return ConstantInt::get(Ty, 0);
+ }
+ if (ICmpInst::compare(*C0, *C1,
+ IntrinsicID == Intrinsic::scmp
+ ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT)) {
+ return ConstantInt::get(Ty, 1);
+ }
+
+ assert(false && "Integer values must compare as equal, or one must be less than the other");
+
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - undef -> { 0, false }
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
new file mode 100644
index 0000000000000..c4e9f7bd54fd7
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+define i8 @scmp_lt() {
+; CHECK-LABEL: define i8 @scmp_lt() {
+; CHECK-NEXT: ret i8 -1
+;
+ %1 = call i8 @llvm.scmp(i32 -7, i32 3)
+ ret i8 %1
+}
+
+define i2 @ucmp_eq() {
+; CHECK-LABEL: define i2 @ucmp_eq() {
+; CHECK-NEXT: ret i2 0
+;
+ %1 = call i2 @llvm.ucmp(i32 12, i32 12)
+ ret i2 %1
+}
+
+define i100 @ucmp_gt() {
+; CHECK-LABEL: define i100 @ucmp_gt() {
+; CHECK-NEXT: ret i100 1
+;
+ %1 = call i100 @llvm.ucmp(i32 7, i32 3)
+ ret i100 %1
+}
+
+define <4 x i8> @ucmp_lt_splat() {
+; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
+; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
+ ret <4 x i8> %1
+}
+
+define <4 x i8> @scmp_nonsplat() {
+; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
+; CHECK-NEXT: ret <4 x i8> <i8 1, i8 0, i8 1, i8 -1>
+;
+ %1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
+ ret <4 x i8> %1
+}
|
b7ef565
to
28f7512
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
28f7512
to
d926bc1
Compare
d926bc1
to
0da0092
Compare
This PR adds support for folding calls to
ucmp
/scmp
intrinsics with constant arguments.