Skip to content

Commit 11289e4

Browse files
committed
Fixups
Change-Id: I687099076728ddb73466cfd7cf1a8307f064b4f7
1 parent c0a18e9 commit 11289e4

File tree

3 files changed

+60
-21
lines changed

3 files changed

+60
-21
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5500,15 +5500,18 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
55005500
// overhead.
55015501
for (Use &U : I->operands())
55025502
if (auto *J = dyn_cast<Instruction>(U.get())) {
5503-
assert(VectorType::isValidElementType(J->getType()) &&
5503+
assert(canVectorizeTy(J->getType()) &&
55045504
"Instruction has non-scalar type");
55055505
if (CanBeScalarized(J))
55065506
Worklist.push_back(J);
55075507
else if (needsExtract(J, VF)) {
5508-
ScalarCost += TTI.getScalarizationOverhead(
5509-
cast<VectorType>(toVectorTy(J->getType(), VF)),
5510-
APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ false,
5511-
/*Extract*/ true, CostKind);
5508+
Type *WideTy = toVectorizedTy(J->getType(), VF);
5509+
for (Type *VectorTy : getContainedTypes(WideTy)) {
5510+
ScalarCost += TTI.getScalarizationOverhead(
5511+
cast<VectorType>(VectorTy),
5512+
APInt::getAllOnes(VF.getFixedValue()), /*Insert*/ false,
5513+
/*Extract*/ true, CostKind);
5514+
}
55125515
}
55135516
}
55145517

llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ define void @struct_return_widen(ptr noalias %in, ptr noalias writeonly %out_a,
2020
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2121
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
2222
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
23-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 4
24-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x half>, ptr [[TMP1]], align 4
23+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
24+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x half>, ptr [[TMP1]], align 2
2525
; CHECK-NEXT: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD]])
2626
; CHECK-NEXT: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1]])
2727
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 0
@@ -30,12 +30,12 @@ define void @struct_return_widen(ptr noalias %in, ptr noalias writeonly %out_a,
3030
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 1
3131
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
3232
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 4
33-
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP8]], align 4
34-
; CHECK-NEXT: store <2 x half> [[TMP5]], ptr [[TMP9]], align 4
33+
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP8]], align 2
34+
; CHECK-NEXT: store <2 x half> [[TMP5]], ptr [[TMP9]], align 2
3535
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
3636
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP10]], i64 4
37-
; CHECK-NEXT: store <2 x half> [[TMP6]], ptr [[TMP10]], align 4
38-
; CHECK-NEXT: store <2 x half> [[TMP7]], ptr [[TMP11]], align 4
37+
; CHECK-NEXT: store <2 x half> [[TMP6]], ptr [[TMP10]], align 2
38+
; CHECK-NEXT: store <2 x half> [[TMP7]], ptr [[TMP11]], align 2
3939
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4040
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
4141
; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -48,14 +48,14 @@ entry:
4848
for.body:
4949
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
5050
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
51-
%in_val = load half, ptr %arrayidx, align 4
51+
%in_val = load half, ptr %arrayidx, align 2
5252
%call = tail call { half, half } @foo(half %in_val) #0
5353
%extract_a = extractvalue { half, half } %call, 0
5454
%extract_b = extractvalue { half, half } %call, 1
5555
%arrayidx2 = getelementptr inbounds half, ptr %out_a, i64 %iv
56-
store half %extract_a, ptr %arrayidx2, align 4
56+
store half %extract_a, ptr %arrayidx2, align 2
5757
%arrayidx4 = getelementptr inbounds half, ptr %out_b, i64 %iv
58-
store half %extract_b, ptr %arrayidx4, align 4
58+
store half %extract_b, ptr %arrayidx4, align 2
5959
%iv.next = add nuw nsw i64 %iv, 1
6060
%exitcond.not = icmp eq i64 %iv.next, 1024
6161
br i1 %exitcond.not, label %exit, label %for.body
@@ -78,7 +78,7 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out
7878
; CHECK: [[VECTOR_BODY]]:
7979
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
8080
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
81-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 4
81+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 2
8282
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 0
8383
; CHECK-NEXT: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1]]) #[[ATTR0:[0-9]+]]
8484
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 1
@@ -92,9 +92,9 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out
9292
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, half } [[TMP4]], 1
9393
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x half> [[TMP8]], half [[TMP11]], i64 1
9494
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
95-
; CHECK-NEXT: store <2 x half> [[TMP10]], ptr [[TMP13]], align 4
95+
; CHECK-NEXT: store <2 x half> [[TMP10]], ptr [[TMP13]], align 2
9696
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
97-
; CHECK-NEXT: store <2 x half> [[TMP12]], ptr [[TMP14]], align 4
97+
; CHECK-NEXT: store <2 x half> [[TMP12]], ptr [[TMP14]], align 2
9898
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
9999
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
100100
; CHECK-NEXT: br i1 [[TMP15]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -107,15 +107,15 @@ entry:
107107
for.body:
108108
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
109109
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
110-
%in_val = load half, ptr %arrayidx, align 4
110+
%in_val = load half, ptr %arrayidx, align 2
111111
; #3 does not have a fixed-size vector mapping (so replication is used)
112112
%call = tail call { half, half } @foo(half %in_val) #1
113113
%extract_a = extractvalue { half, half } %call, 0
114114
%extract_b = extractvalue { half, half } %call, 1
115115
%arrayidx2 = getelementptr inbounds half, ptr %out_a, i64 %iv
116-
store half %extract_a, ptr %arrayidx2, align 4
116+
store half %extract_a, ptr %arrayidx2, align 2
117117
%arrayidx4 = getelementptr inbounds half, ptr %out_b, i64 %iv
118-
store half %extract_b, ptr %arrayidx4, align 4
118+
store half %extract_b, ptr %arrayidx4, align 2
119119
%iv.next = add nuw nsw i64 %iv, 1
120120
%exitcond.not = icmp eq i64 %iv.next, 1024
121121
br i1 %exitcond.not, label %exit, label %for.body

llvm/test/Transforms/LoopVectorize/struct-return.ll

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,40 @@ exit:
208208
ret void
209209
}
210210

211+
; Test crafted to exercise computePredInstDiscount with struct results
212+
; (mainly it does not crash).
213+
; CHECK-REMARKS: remark: {{.*}} vectorized loop
214+
define void @scalarized_predicated_struct_return(ptr %a) optsize {
215+
; CHECK-LABEL: define void @scalarized_predicated_struct_return
216+
; CHECK: vector.body:
217+
; CHECK: pred.store.if:
218+
; CHECK: tail call { i64, i64 } @bar_i64(i64 %5)
219+
entry:
220+
br label %for.body
221+
222+
for.body:
223+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
224+
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
225+
%in_val = load i64, ptr %arrayidx, align 8
226+
%sgt_zero = icmp sgt i64 %in_val, 0
227+
br i1 %sgt_zero, label %if.then, label %for.inc
228+
229+
if.then:
230+
%call = tail call { i64, i64 } @bar_i64(i64 %in_val) #6
231+
%extract_a = extractvalue { i64, i64 } %call, 0
232+
%div = udiv i64 %extract_a, %in_val
233+
store i64 %div, ptr %arrayidx, align 8
234+
br label %for.inc
235+
236+
for.inc:
237+
%iv.next = add nuw nsw i64 %iv, 1
238+
%exitcond.not = icmp eq i64 %iv.next, 1024
239+
br i1 %exitcond.not, label %exit, label %for.body
240+
241+
exit:
242+
ret void
243+
}
244+
211245
; Negative test. Widening structs of vectors is not supported.
212246
; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized
213247
define void @negative_struct_of_vectors(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -431,17 +465,19 @@ declare { [2 x float] } @foo_arrays(float)
431465
declare { float, [1 x float] } @foo_one_non_widenable_element(float)
432466
declare { <1 x float>, <1 x float> } @foo_vectors(<1 x float>)
433467
declare { i32, i32, i32 } @qux(i32)
468+
declare { i64, i64 } @bar_i64(i64)
434469

435470
declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>)
436471
declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>)
437472
declare { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float>)
438473
declare { <2 x i32>, <2 x i32>, <2 x i32> } @fixed_vec_qux(<2 x i32>)
439-
440474
declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
475+
declare { <vscale x 4 x i64>, <vscale x 4 x i64> } @scalable_vec_masked_bar_i64(<vscale x 4 x i64>, <vscale x 4 x i1>)
441476

442477
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" }
443478
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar(fixed_vec_bar)" }
444479
attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_baz(fixed_vec_baz)" }
445480
attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
446481
attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar_named(fixed_vec_bar)" }
447482
attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_qux(fixed_vec_qux)" }
483+
attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar_i64(scalable_vec_masked_bar_i64)" }

0 commit comments

Comments
 (0)