Skip to content

Commit 1f919aa

Browse files
authored
VectorCombine: lift one-use limitation in foldExtractedCmps (llvm#110902)
There are artificial one-use limitations on foldExtractedCmps. Adjust the costs to account for multi-use, and strip the one-use matcher, lifting the limitations.
1 parent 36a0d44 commit 1f919aa

File tree

2 files changed

+67
-12
lines changed

2 files changed

+67
-12
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,23 +1038,20 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10381038

10391039
// The compare predicates should match, and each compare should have a
10401040
// constant operand.
1041-
// TODO: Relax the one-use constraints.
10421041
Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
10431042
Instruction *I0, *I1;
10441043
Constant *C0, *C1;
10451044
CmpInst::Predicate P0, P1;
1046-
if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
1047-
!match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
1048-
P0 != P1)
1045+
if (!match(B0, m_Cmp(P0, m_Instruction(I0), m_Constant(C0))) ||
1046+
!match(B1, m_Cmp(P1, m_Instruction(I1), m_Constant(C1))) || P0 != P1)
10491047
return false;
10501048

10511049
// The compare operands must be extracts of the same vector with constant
10521050
// extract indexes.
1053-
// TODO: Relax the one-use constraints.
10541051
Value *X;
10551052
uint64_t Index0, Index1;
1056-
if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
1057-
!match(I1, m_OneUse(m_ExtractElt(m_Specific(X), m_ConstantInt(Index1)))))
1053+
if (!match(I0, m_ExtractElt(m_Value(X), m_ConstantInt(Index0))) ||
1054+
!match(I1, m_ExtractElt(m_Specific(X), m_ConstantInt(Index1))))
10581055
return false;
10591056

10601057
auto *Ext0 = cast<ExtractElementInst>(I0);
@@ -1073,14 +1070,16 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10731070
return false;
10741071

10751072
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1073+
InstructionCost Ext0Cost =
1074+
TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0),
1075+
Ext1Cost =
1076+
TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
10761077
InstructionCost OldCost =
1077-
TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0);
1078-
OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, CostKind, Index1);
1079-
OldCost +=
1078+
Ext0Cost + Ext1Cost +
10801079
TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
10811080
CmpInst::makeCmpResultType(I0->getType()), Pred) *
1082-
2;
1083-
OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
1081+
2 +
1082+
TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
10841083

10851084
// The proposed vector pattern is:
10861085
// vcmp = cmp Pred X, VecC
@@ -1096,6 +1095,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10961095
ShufMask);
10971096
NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
10981097
NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CostKind, CheapIndex);
1098+
NewCost += Ext0->hasOneUse() ? 0 : Ext0Cost;
1099+
NewCost += Ext1->hasOneUse() ? 0 : Ext1Cost;
10991100

11001101
// Aggressively form vector ops if the cost is equal because the transform
11011102
// may enable further optimization.

llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,60 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) {
9292
ret i1 %r
9393
}
9494

95+
declare void @use()
96+
97+
define i1 @fcmp_and_v2f64_multiuse(<2 x double> %a) {
98+
; SSE-LABEL: @fcmp_and_v2f64_multiuse(
99+
; SSE-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
100+
; SSE-NEXT: call void @use(double [[E1]])
101+
; SSE-NEXT: [[E2:%.*]] = extractelement <2 x double> [[A]], i32 1
102+
; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01
103+
; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00
104+
; SSE-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]]
105+
; SSE-NEXT: call void @use(i1 [[R]])
106+
; SSE-NEXT: ret i1 [[R]]
107+
;
108+
; AVX-LABEL: @fcmp_and_v2f64_multiuse(
109+
; AVX-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
110+
; AVX-NEXT: call void @use(double [[E1]])
111+
; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A]], <double 4.200000e+01, double -8.000000e+00>
112+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
113+
; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]]
114+
; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
115+
; AVX-NEXT: call void @use(i1 [[R]])
116+
; AVX-NEXT: ret i1 [[R]]
117+
;
118+
%e1 = extractelement <2 x double> %a, i32 0
119+
call void @use(double %e1)
120+
%e2 = extractelement <2 x double> %a, i32 1
121+
%cmp1 = fcmp olt double %e1, 42.0
122+
%cmp2 = fcmp olt double %e2, -8.0
123+
%r = and i1 %cmp1, %cmp2
124+
call void @use(i1 %r)
125+
ret i1 %r
126+
}
127+
128+
define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
129+
; CHECK-LABEL: @icmp_xor_v4i32_multiuse(
130+
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1
131+
; CHECK-NEXT: call void @use(i32 [[E2]])
132+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
133+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
134+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
135+
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
136+
; CHECK-NEXT: call void @use(i1 [[R]])
137+
; CHECK-NEXT: ret i1 [[R]]
138+
;
139+
%e1 = extractelement <4 x i32> %a, i32 3
140+
%e2 = extractelement <4 x i32> %a, i32 1
141+
call void @use(i32 %e2)
142+
%cmp1 = icmp sgt i32 %e1, 42
143+
%cmp2 = icmp sgt i32 %e2, -8
144+
%r = xor i1 %cmp1, %cmp2
145+
call void @use(i1 %r)
146+
ret i1 %r
147+
}
148+
95149
; Negative test - this could CSE/simplify.
96150

97151
define i1 @same_extract_index(<4 x i32> %a) {

0 commit comments

Comments
 (0)