Skip to content

Commit 5f730b6

Browse files
committed
[VectorCombine] account for extra uses in scalarization cost
Follow-up to D79452. Mimics the extra use cost formula for the inverse transform with extracts.
1 parent 7c480c4 commit 5f730b6

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,14 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
316316
if (!match(&I, m_BinOp(m_Instruction(Ins0), m_Instruction(Ins1))))
317317
return false;
318318

319-
// TODO: Loosen restriction for one-use by adjusting cost equation.
320319
// TODO: Deal with mismatched index constants and variable indexes?
321320
Constant *VecC0, *VecC1;
322321
Value *V0, *V1;
323322
uint64_t Index;
324-
if (!match(Ins0, m_OneUse(m_InsertElement(m_Constant(VecC0), m_Value(V0),
325-
m_ConstantInt(Index)))) ||
326-
!match(Ins1, m_OneUse(m_InsertElement(m_Constant(VecC1), m_Value(V1),
327-
m_SpecificInt(Index)))))
323+
if (!match(Ins0, m_InsertElement(m_Constant(VecC0), m_Value(V0),
324+
m_ConstantInt(Index))) ||
325+
!match(Ins1, m_InsertElement(m_Constant(VecC1), m_Value(V1),
326+
m_SpecificInt(Index))))
328327
return false;
329328

330329
Type *ScalarTy = V0->getType();
@@ -342,7 +341,9 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
342341
int InsertCost =
343342
TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
344343
int OldCost = InsertCost + InsertCost + VectorOpCost;
345-
int NewCost = ScalarOpCost + InsertCost;
344+
int NewCost = ScalarOpCost + InsertCost +
345+
!Ins0->hasOneUse() * InsertCost +
346+
!Ins1->hasOneUse() * InsertCost;
346347

347348
// We want to scalarize unless the vector variant actually has lower cost.
348349
if (OldCost < NewCost)

llvm/test/Transforms/VectorCombine/X86/insert-binop.ll

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,14 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
134134
ret <2 x i64> %r
135135
}
136136

137-
; Negative test
138-
; TODO: extra use can be accounted for in cost calculation.
137+
; Extra use is accounted for in cost calculation.
139138

140139
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
141140
; CHECK-LABEL: @ins0_ins0_xor(
142141
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
143142
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
144-
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
145-
; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
143+
; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
144+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
146145
; CHECK-NEXT: ret <4 x i32> [[R]]
147146
;
148147
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
@@ -152,12 +151,14 @@ define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
152151
ret <4 x i32> %r
153152
}
154153

154+
; Extra use is accounted for in cost calculation.
155+
155156
define <4 x float> @ins1_ins1_fmul(float %x, float %y) {
156157
; CHECK-LABEL: @ins1_ins1_fmul(
157-
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 1
158158
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1
159159
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
160-
; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[I0]], [[I1]]
160+
; CHECK-NEXT: [[R_SCALAR:%.*]] = fmul float [[X:%.*]], [[Y]]
161+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[R_SCALAR]], i64 1
161162
; CHECK-NEXT: ret <4 x float> [[R]]
162163
;
163164
%i0 = insertelement <4 x float> undef, float %x, i32 1
@@ -167,6 +168,8 @@ define <4 x float> @ins1_ins1_fmul(float %x, float %y) {
167168
ret <4 x float> %r
168169
}
169170

171+
; If the scalar binop is not cheaper than the vector binop, extra uses can prevent the transform.
172+
170173
define <4 x float> @ins2_ins2_fsub(float %x, float %y) {
171174
; CHECK-LABEL: @ins2_ins2_fsub(
172175
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
@@ -184,14 +187,25 @@ define <4 x float> @ins2_ins2_fsub(float %x, float %y) {
184187
ret <4 x float> %r
185188
}
186189

190+
; It may be worth scalarizing an expensive binop even if both inserts have extra uses.
191+
187192
define <4 x float> @ins3_ins3_fdiv(float %x, float %y) {
188-
; CHECK-LABEL: @ins3_ins3_fdiv(
189-
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
190-
; CHECK-NEXT: call void @usef(<4 x float> [[I0]])
191-
; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
192-
; CHECK-NEXT: call void @usef(<4 x float> [[I1]])
193-
; CHECK-NEXT: [[R:%.*]] = fdiv <4 x float> [[I0]], [[I1]]
194-
; CHECK-NEXT: ret <4 x float> [[R]]
193+
; SSE-LABEL: @ins3_ins3_fdiv(
194+
; SSE-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
195+
; SSE-NEXT: call void @usef(<4 x float> [[I0]])
196+
; SSE-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
197+
; SSE-NEXT: call void @usef(<4 x float> [[I1]])
198+
; SSE-NEXT: [[R_SCALAR:%.*]] = fdiv float [[X]], [[Y]]
199+
; SSE-NEXT: [[R:%.*]] = insertelement <4 x float> undef, float [[R_SCALAR]], i64 3
200+
; SSE-NEXT: ret <4 x float> [[R]]
201+
;
202+
; AVX-LABEL: @ins3_ins3_fdiv(
203+
; AVX-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
204+
; AVX-NEXT: call void @usef(<4 x float> [[I0]])
205+
; AVX-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 3
206+
; AVX-NEXT: call void @usef(<4 x float> [[I1]])
207+
; AVX-NEXT: [[R:%.*]] = fdiv <4 x float> [[I0]], [[I1]]
208+
; AVX-NEXT: ret <4 x float> [[R]]
195209
;
196210
%i0 = insertelement <4 x float> undef, float %x, i32 3
197211
call void @usef(<4 x float> %i0)

0 commit comments

Comments
 (0)