-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[CostModel] improveShuffleKindFromMask - recognise a SK_PermuteSingleSrc incorrectly tagged as SK_PermuteTwoSrc #145352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesIf a SK_PermuteTwoSrc shuffle kind's mask only references the first operand, then treat this as SK_PermuteSingleSrc Part of #145335 Full diff: https://github.com/llvm/llvm-project/pull/145352.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0477c1b6f1a6f..cabecbec175b3 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1122,6 +1122,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
break;
}
case TTI::SK_PermuteTwoSrc: {
+ if (all_of(Mask, [NumSrcElts](int M) { return M < NumSrcElts; }))
+ return improveShuffleKindFromMask(TTI::SK_PermuteSingleSrc, Mask, SrcTy,
+ Index, SubTy);
int NumSubElts;
if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
Mask, NumSrcElts, NumSubElts, Index)) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
index a1d3f250b8a83..9f9e9d84108e6 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
@@ -7,20 +7,17 @@ target triple = "aarch64-unknown-linux-gnu"
define void @foo(ptr %0) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: vector.scevcheck:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0:%.*]], i64 4
-; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr null, i64 4
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[SCEVGEP]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x ptr> [[TMP2]], <4 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x ptr> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> <ptr poison, ptr null>, ptr [[TMP0:%.*]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 4)
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[SCEVGEP3]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x ptr> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i1> [[TMP5]], [[TMP10]]
-; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_OP]])
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0)
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4)
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
; CHECK-NEXT: br i1 [[OP_RDX]], label [[DOTLR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
index 862c07c3d2270..b3de3b8f1ca62 100644
--- a/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
-; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
; Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)"
@@ -83,25 +83,15 @@ define <4 x double> @fadd_v4f64_multiuse_op(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %post
}
-; Negative test - multiple use of inner shuffle (only fold if the moved shuffle is cheaper).
define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %b) {
-; SSE-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle(
-; SSE-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
-; SSE-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; SSE-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
-; SSE-NEXT: call void @use_v4f64(<4 x double> [[A1]])
-; SSE-NEXT: ret <4 x double> [[POST]]
-;
-; AVX-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle(
-; AVX-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
-; AVX-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX-NEXT: [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
-; AVX-NEXT: [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]]
-; AVX-NEXT: [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; AVX-NEXT: call void @use_v4f64(<4 x double> [[A1]])
-; AVX-NEXT: ret <4 x double> [[POST]]
+; CHECK-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle(
+; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: call void @use_v4f64(<4 x double> [[A1]])
+; CHECK-NEXT: ret <4 x double> [[POST]]
;
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
%b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
|
…Src incorrectly tagged as SK_PermuteTwoSrc If a SK_PermuteTwoSrc shuffle kind's mask only references the first operand, then treat this as SK_PermuteSingleSrc Part of llvm#145335
a378d4f
to
d937ecc
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
If a SK_PermuteTwoSrc shuffle kind's mask only references the first operand, then treat this as SK_PermuteSingleSrc
Part of #145335