Skip to content

Commit ea83e1c

Browse files
committed
[LV] Assign cost to all interleave members when not interleaving.
At the moment, the full cost of all interleave group members is assigned to the instruction at the group's insert position, even if the decision was to not form an interleave group. This can lead to inaccurate cost estimates, e.g. if the instruction at the insert position is dead. If the decision is to not vectorize but scalarize or scather/gather, then the cost will be to total cost for all members. In those cases, assign individual the cost per member, to more closely reflect to choice per instruction. This fixes a divergence between legacy and VPlan-based cost model. Fixes #108098.
1 parent b8239e1 commit ea83e1c

36 files changed

+1141
-1039
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,13 +1182,23 @@ class LoopVectorizationCostModel {
11821182
InstructionCost Cost) {
11831183
assert(VF.isVector() && "Expected VF >=2");
11841184
/// Broadcast this decicion to all instructions inside the group.
1185-
/// But the cost will be assigned to one instruction only.
1185+
/// When interleaving, the cost will only be assigned one instruction, the
1186+
/// insert position. For other cases, add the appropriate fraction of the
1187+
/// total cost to each instruction. This ensures accurate costs are used,
1188+
/// even if the insert position instruction is not used.
1189+
InstructionCost InsertPosCost = Cost;
1190+
InstructionCost OtherMemberCost = 0;
1191+
if (W != CM_Interleave)
1192+
OtherMemberCost = InsertPosCost = Cost / Grp->getNumMembers();
1193+
;
11861194
for (unsigned Idx = 0; Idx < Grp->getFactor(); ++Idx) {
11871195
if (auto *I = Grp->getMember(Idx)) {
11881196
if (Grp->getInsertPos() == I)
1189-
WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
1197+
WideningDecisions[std::make_pair(I, VF)] =
1198+
std::make_pair(W, InsertPosCost);
11901199
else
1191-
WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0);
1200+
WideningDecisions[std::make_pair(I, VF)] =
1201+
std::make_pair(W, OtherMemberCost);
11921202
}
11931203
}
11941204
}

llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -82,26 +82,26 @@ define void @test() {
8282
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
8383
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
8484
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
85-
; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
86-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
87-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
88-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
89-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
90-
; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
91-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
92-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
93-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
94-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
95-
; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
96-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
97-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
98-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
99-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
100-
; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
101-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
102-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
103-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
104-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
85+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
86+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
87+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
88+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
89+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
90+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
91+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
92+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
93+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
94+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
95+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
96+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
97+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
98+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
99+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
100+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
101+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
102+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
103+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
104+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
105105
;
106106
; AVX512-LABEL: 'test'
107107
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
@@ -139,11 +139,11 @@ define void @test() {
139139
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
140140
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
141141
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
142-
; AVX512: LV: Found an estimated cost of 400 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
143-
; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4
144-
; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4
145-
; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4
146-
; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4
142+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
143+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4
144+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4
145+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4
146+
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4
147147
;
148148
entry:
149149
br label %for.body

llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -108,34 +108,34 @@ define void @test() {
108108
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
109109
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
110110
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
111-
; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
112-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
113-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
114-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
115-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
116-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
117-
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
118-
; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
119-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
120-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
121-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
122-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
123-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
124-
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
125-
; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
126-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
127-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
128-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
129-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
130-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
131-
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
132-
; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
133-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
134-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
135-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
136-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
137-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
138-
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
111+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
112+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
113+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
114+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
115+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
116+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
117+
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
118+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
119+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
120+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
121+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
122+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
123+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
124+
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
125+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
126+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
127+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
128+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
129+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
130+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
131+
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
132+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
133+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
134+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
135+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
136+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
137+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
138+
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
139139
;
140140
; AVX512-LABEL: 'test'
141141
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
@@ -180,13 +180,13 @@ define void @test() {
180180
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
181181
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
182182
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
183-
; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
184-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
185-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
186-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
187-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
188-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
189-
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
183+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
184+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
185+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
186+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
187+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
188+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
189+
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
190190
;
191191
entry:
192192
br label %for.body

0 commit comments

Comments
 (0)