Skip to content

Commit 07cb536

Browse files
committed
Fixups
1 parent 27d0cec commit 07cb536

File tree

4 files changed

+142
-8
lines changed

4 files changed

+142
-8
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3725,10 +3725,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
37253725

37263726
// ExtractValue instructions must be uniform, because the operands are
37273727
// known to be loop-invariant.
3728-
auto *EVI = dyn_cast<ExtractValueInst>(&I);
3729-
if (EVI && IsOutOfScope(EVI->getAggregateOperand())) {
3730-
AddToWorklistIfAllowed(EVI);
3731-
continue;
3728+
if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
3729+
if (IsOutOfScope(EVI->getAggregateOperand())) {
3730+
AddToWorklistIfAllowed(EVI);
3731+
continue;
3732+
}
3733+
assert(isa<CallInst>(EVI->getAggregateOperand()) &&
3734+
"Expected aggregate value to be call return value");
37323735
}
37333736

37343737
// If there's no pointer operand, there's nothing to do.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
128128
assert(R->getNumOperands() == 2 && "expected single level extractvalue");
129129
auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
130130
auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
131-
unsigned Idx = CI->getZExtValue();
132-
return StructTy->getTypeAtIndex(Idx);
131+
return StructTy->getTypeAtIndex(CI->getZExtValue());
133132
}
134133
default:
135134
break;

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,8 +1409,7 @@ void VPWidenRecipe::execute(VPTransformState &State) {
14091409
assert(getNumOperands() == 2 && "expected single level extractvalue");
14101410
Value *Op = State.get(getOperand(0));
14111411
auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
1412-
unsigned Idx = CI->getZExtValue();
1413-
Value *Extract = Builder.CreateExtractValue(Op, Idx);
1412+
Value *Extract = Builder.CreateExtractValue(Op, CI->getZExtValue());
14141413
State.set(this, Extract);
14151414
break;
14161415
}
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg < %s -S -o - | FileCheck %s --check-prefix=CHECK
2+
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-COST
3+
; REQUIRES: asserts
4+
5+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6+
target triple = "aarch64--linux-gnu"
7+
8+
; CHECK-COST-LABEL: struct_return_widen
9+
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
10+
; CHECK-COST: Cost of 10 for VF 2: WIDEN-CALL ir<%call> = call @foo(ir<%in_val>) (using library function: fixed_vec_foo)
11+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
12+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
13+
14+
define void @struct_return_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
15+
; CHECK-LABEL: define void @struct_return_widen(
16+
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
17+
; CHECK-NEXT: [[ENTRY:.*]]:
18+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
19+
; CHECK: [[VECTOR_BODY]]:
20+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
21+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
22+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
23+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 4
24+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x half>, ptr [[TMP1]], align 4
25+
; CHECK-NEXT: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD]])
26+
; CHECK-NEXT: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1]])
27+
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 0
28+
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 0
29+
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP2]], 1
30+
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x half>, <2 x half> } [[TMP3]], 1
31+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
32+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 4
33+
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr [[TMP8]], align 4
34+
; CHECK-NEXT: store <2 x half> [[TMP5]], ptr [[TMP9]], align 4
35+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
36+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP10]], i64 4
37+
; CHECK-NEXT: store <2 x half> [[TMP6]], ptr [[TMP10]], align 4
38+
; CHECK-NEXT: store <2 x half> [[TMP7]], ptr [[TMP11]], align 4
39+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
40+
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
41+
; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
42+
; CHECK: [[EXIT]]:
43+
; CHECK-NEXT: ret void
44+
;
45+
entry:
46+
br label %for.body
47+
48+
for.body:
49+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
50+
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
51+
%in_val = load half, ptr %arrayidx, align 4
52+
%call = tail call { half, half } @foo(half %in_val) #0
53+
%extract_a = extractvalue { half, half } %call, 0
54+
%extract_b = extractvalue { half, half } %call, 1
55+
%arrayidx2 = getelementptr inbounds half, ptr %out_a, i64 %iv
56+
store half %extract_a, ptr %arrayidx2, align 4
57+
%arrayidx4 = getelementptr inbounds half, ptr %out_b, i64 %iv
58+
store half %extract_b, ptr %arrayidx4, align 4
59+
%iv.next = add nuw nsw i64 %iv, 1
60+
%exitcond.not = icmp eq i64 %iv.next, 1024
61+
br i1 %exitcond.not, label %exit, label %for.body
62+
63+
exit:
64+
ret void
65+
}
66+
67+
; CHECK-COST-LABEL: struct_return_replicate
68+
; CHECK-COST: LV: Found an estimated cost of 10 for VF 1 For instruction: %call = tail call { half, half } @foo(half %in_val) #0
69+
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
70+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
71+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
72+
73+
define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
74+
; CHECK-LABEL: define void @struct_return_replicate(
75+
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
76+
; CHECK-NEXT: [[ENTRY:.*]]:
77+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
78+
; CHECK: [[VECTOR_BODY]]:
79+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
80+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[IN]], i64 [[INDEX]]
81+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x half>, ptr [[TMP0]], align 4
82+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 0
83+
; CHECK-NEXT: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1]]) #[[ATTR0:[0-9]+]]
84+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[WIDE_LOAD]], i64 1
85+
; CHECK-NEXT: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3]]) #[[ATTR0]]
86+
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { half, half } [[TMP2]], 0
87+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[TMP5]], i64 0
88+
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { half, half } [[TMP2]], 1
89+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x half> poison, half [[TMP7]], i64 0
90+
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { half, half } [[TMP4]], 0
91+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x half> [[TMP6]], half [[TMP9]], i64 1
92+
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { half, half } [[TMP4]], 1
93+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x half> [[TMP8]], half [[TMP11]], i64 1
94+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, ptr [[OUT_A]], i64 [[INDEX]]
95+
; CHECK-NEXT: store <2 x half> [[TMP10]], ptr [[TMP13]], align 4
96+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, ptr [[OUT_B]], i64 [[INDEX]]
97+
; CHECK-NEXT: store <2 x half> [[TMP12]], ptr [[TMP14]], align 4
98+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
99+
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
100+
; CHECK-NEXT: br i1 [[TMP15]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
101+
; CHECK: [[EXIT]]:
102+
; CHECK-NEXT: ret void
103+
;
104+
entry:
105+
br label %for.body
106+
107+
for.body:
108+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
109+
%arrayidx = getelementptr inbounds half, ptr %in, i64 %iv
110+
%in_val = load half, ptr %arrayidx, align 4
111+
; #3 does not have a fixed-size vector mapping (so replication is used)
112+
%call = tail call { half, half } @foo(half %in_val) #1
113+
%extract_a = extractvalue { half, half } %call, 0
114+
%extract_b = extractvalue { half, half } %call, 1
115+
%arrayidx2 = getelementptr inbounds half, ptr %out_a, i64 %iv
116+
store half %extract_a, ptr %arrayidx2, align 4
117+
%arrayidx4 = getelementptr inbounds half, ptr %out_b, i64 %iv
118+
store half %extract_b, ptr %arrayidx4, align 4
119+
%iv.next = add nuw nsw i64 %iv, 1
120+
%exitcond.not = icmp eq i64 %iv.next, 1024
121+
br i1 %exitcond.not, label %exit, label %for.body
122+
123+
exit:
124+
ret void
125+
}
126+
127+
declare { half, half } @foo(half)
128+
129+
declare { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half>)
130+
declare { <vscale x 4 x half>, <vscale x 4 x half> } @scalable_vec_masked_foo(<vscale x 4 x half>, <vscale x 4 x i1>)
131+
132+
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" }
133+
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }

0 commit comments

Comments
 (0)