-
Notifications
You must be signed in to change notification settings - Fork 14.3k
ValueTracking: handle more ops in isNotCrossLaneOperation #112183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Reuse llvm::isTriviallyVectorizable in llvm::isNotCrossLaneOperation, in order to get it to handle more intrinsics. Alive2 proofs for changed tests: https://alive2.llvm.org/ce/z/XSV_GT
@llvm/pr-subscribers-llvm-analysis Author: Ramkumar Ramachandra (artagnon) ChangesReuse llvm::isTriviallyVectorizable in llvm::isNotCrossLaneOperation, in order to get it to handle more intrinsics. Alive2 proofs for changed tests: https://alive2.llvm.org/ce/z/XSV_GT Full diff: https://github.com/llvm/llvm-project/pull/112183.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 4e76f35266534b..f83347e7cd2bba 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6948,25 +6948,8 @@ bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
}
bool llvm::isNotCrossLaneOperation(const Instruction *I) {
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- // TODO: expand this list.
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
- case Intrinsic::ctpop:
- case Intrinsic::umin:
- case Intrinsic::umax:
- case Intrinsic::smin:
- case Intrinsic::smax:
- case Intrinsic::usub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::sadd_sat:
- return true;
- default:
- return false;
- }
- }
+ if (auto *II = dyn_cast<IntrinsicInst>(I))
+ return isTriviallyVectorizable(II->getIntrinsicID());
return !isa<CallBase, BitCastInst, ShuffleVectorInst, ExtractElementInst>(I);
}
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index f1f1708333901a..65faf0974b503f 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -998,8 +998,8 @@ define nofpclass(nan inf) float @pow_f32(float nofpclass(nan inf) %arg, float no
; CHECK-NEXT: [[I12:%.*]] = select i1 [[I11]], float [[ARG]], float 1.000000e+00
; CHECK-NEXT: [[I13:%.*]] = tail call noundef float @llvm.copysign.f32(float noundef [[I4]], float noundef [[I12]])
; CHECK-NEXT: [[I17:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00
-; CHECK-NEXT: [[I21:%.*]] = select i1 [[I11]], float [[ARG]], float 0.000000e+00
-; CHECK-NEXT: [[I22:%.*]] = tail call noundef nofpclass(nan sub norm) float @llvm.copysign.f32(float noundef 0.000000e+00, float noundef [[I21]])
+; CHECK-NEXT: [[TMP0:%.*]] = tail call nofpclass(nan sub norm) float @llvm.copysign.f32(float 0.000000e+00, float [[ARG]])
+; CHECK-NEXT: [[I22:%.*]] = select i1 [[I11]], float [[TMP0]], float 0.000000e+00
; CHECK-NEXT: [[I23:%.*]] = select i1 [[I17]], float [[I22]], float [[I13]]
; CHECK-NEXT: [[I24:%.*]] = fcmp oeq float [[ARG]], 1.000000e+00
; CHECK-NEXT: [[I25:%.*]] = fcmp oeq float [[ARG1]], 0.000000e+00
diff --git a/llvm/test/Transforms/InstSimplify/select-abs.ll b/llvm/test/Transforms/InstSimplify/select-abs.ll
index 6b4708443fb261..102a5b660a3dec 100644
--- a/llvm/test/Transforms/InstSimplify/select-abs.ll
+++ b/llvm/test/Transforms/InstSimplify/select-abs.ll
@@ -236,10 +236,8 @@ entry:
define <4 x i16> @select_v4i16_eq0_abs_t(<4 x i16> %a) {
; CHECK-LABEL: @select_v4i16_eq0_abs_t(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq <4 x i16> [[A:%.*]], zeroinitializer
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> zeroinitializer, <4 x i16> [[ABS]]
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp eq <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
@@ -251,10 +249,8 @@ entry:
define <4 x i16> @select_v4i16_ne0_abs_t(<4 x i16> %a) {
; CHECK-LABEL: @select_v4i16_ne0_abs_t(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp ne <4 x i16> [[A:%.*]], zeroinitializer
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> [[ABS]], <4 x i16> zeroinitializer
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp ne <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
@@ -305,10 +301,8 @@ entry:
define <4 x i16> @badsplat1_select_v4i16_ne0_abs(<4 x i16> %a) {
; CHECK-LABEL: @badsplat1_select_v4i16_ne0_abs(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp ne <4 x i16> [[A:%.*]], <i16 0, i16 1, i16 0, i16 0>
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> [[ABS]], <4 x i16> <i16 0, i16 1, i16 0, i16 0>
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp ne <4 x i16> %a, <i16 0, i16 1, i16 0, i16 0>
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesReuse llvm::isTriviallyVectorizable in llvm::isNotCrossLaneOperation, in order to get it to handle more intrinsics. Alive2 proofs for changed tests: https://alive2.llvm.org/ce/z/XSV_GT Full diff: https://github.com/llvm/llvm-project/pull/112183.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 4e76f35266534b..f83347e7cd2bba 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -6948,25 +6948,8 @@ bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
}
bool llvm::isNotCrossLaneOperation(const Instruction *I) {
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- // TODO: expand this list.
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
- case Intrinsic::ctpop:
- case Intrinsic::umin:
- case Intrinsic::umax:
- case Intrinsic::smin:
- case Intrinsic::smax:
- case Intrinsic::usub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::sadd_sat:
- return true;
- default:
- return false;
- }
- }
+ if (auto *II = dyn_cast<IntrinsicInst>(I))
+ return isTriviallyVectorizable(II->getIntrinsicID());
return !isa<CallBase, BitCastInst, ShuffleVectorInst, ExtractElementInst>(I);
}
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index f1f1708333901a..65faf0974b503f 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -998,8 +998,8 @@ define nofpclass(nan inf) float @pow_f32(float nofpclass(nan inf) %arg, float no
; CHECK-NEXT: [[I12:%.*]] = select i1 [[I11]], float [[ARG]], float 1.000000e+00
; CHECK-NEXT: [[I13:%.*]] = tail call noundef float @llvm.copysign.f32(float noundef [[I4]], float noundef [[I12]])
; CHECK-NEXT: [[I17:%.*]] = fcmp oeq float [[ARG]], 0.000000e+00
-; CHECK-NEXT: [[I21:%.*]] = select i1 [[I11]], float [[ARG]], float 0.000000e+00
-; CHECK-NEXT: [[I22:%.*]] = tail call noundef nofpclass(nan sub norm) float @llvm.copysign.f32(float noundef 0.000000e+00, float noundef [[I21]])
+; CHECK-NEXT: [[TMP0:%.*]] = tail call nofpclass(nan sub norm) float @llvm.copysign.f32(float 0.000000e+00, float [[ARG]])
+; CHECK-NEXT: [[I22:%.*]] = select i1 [[I11]], float [[TMP0]], float 0.000000e+00
; CHECK-NEXT: [[I23:%.*]] = select i1 [[I17]], float [[I22]], float [[I13]]
; CHECK-NEXT: [[I24:%.*]] = fcmp oeq float [[ARG]], 1.000000e+00
; CHECK-NEXT: [[I25:%.*]] = fcmp oeq float [[ARG1]], 0.000000e+00
diff --git a/llvm/test/Transforms/InstSimplify/select-abs.ll b/llvm/test/Transforms/InstSimplify/select-abs.ll
index 6b4708443fb261..102a5b660a3dec 100644
--- a/llvm/test/Transforms/InstSimplify/select-abs.ll
+++ b/llvm/test/Transforms/InstSimplify/select-abs.ll
@@ -236,10 +236,8 @@ entry:
define <4 x i16> @select_v4i16_eq0_abs_t(<4 x i16> %a) {
; CHECK-LABEL: @select_v4i16_eq0_abs_t(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq <4 x i16> [[A:%.*]], zeroinitializer
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> zeroinitializer, <4 x i16> [[ABS]]
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp eq <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
@@ -251,10 +249,8 @@ entry:
define <4 x i16> @select_v4i16_ne0_abs_t(<4 x i16> %a) {
; CHECK-LABEL: @select_v4i16_ne0_abs_t(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp ne <4 x i16> [[A:%.*]], zeroinitializer
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> [[ABS]], <4 x i16> zeroinitializer
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp ne <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
@@ -305,10 +301,8 @@ entry:
define <4 x i16> @badsplat1_select_v4i16_ne0_abs(<4 x i16> %a) {
; CHECK-LABEL: @badsplat1_select_v4i16_ne0_abs(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp ne <4 x i16> [[A:%.*]], <i16 0, i16 1, i16 0, i16 0>
-; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A]], i1 true)
-; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[COND]], <4 x i16> [[ABS]], <4 x i16> <i16 0, i16 1, i16 0, i16 0>
-; CHECK-NEXT: ret <4 x i16> [[RES]]
+; CHECK-NEXT: [[ABS:%.*]] = tail call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[A:%.*]], i1 true)
+; CHECK-NEXT: ret <4 x i16> [[ABS]]
;
entry:
%cond = icmp ne <4 x i16> %a, <i16 0, i16 1, i16 0, i16 0>
|
The CodeGen tests are broken in the main this patch is based on. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Reuse llvm::isTriviallyVectorizable in llvm::isNotCrossLaneOperation, in order to get it to handle more intrinsics. Alive2 proofs for changed tests: https://alive2.llvm.org/ce/z/XSV_GT
Reuse llvm::isTriviallyVectorizable in llvm::isNotCrossLaneOperation, in order to get it to handle more intrinsics.
Alive2 proofs for changed tests: https://alive2.llvm.org/ce/z/XSV_GT