Skip to content

Commit e127e33

Browse files
committed
Merge branch 'vplan-narrow-interleave-mem-only2' into vplan-narrow-interleave
2 parents 3fd2b8d + 89d4f13 commit e127e33

7 files changed

+577
-621
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7697,7 +7697,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76977697
OrigLoop->getHeader()->getContext());
76987698
VPlanTransforms::materializeBroadcasts(BestVPlan);
76997699
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
7700-
VPlanTransforms::narrowInterleaveGroups(BestVPlan, BestVF);
7700+
VPlanTransforms::narrowInterleaveGroups(
7701+
BestVPlan, BestVF,
7702+
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
77017703
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
77027704
VPlanTransforms::removeDeadRecipes(BestVPlan);
77037705
VPlanTransforms::convertToConcreteRecipes(BestVPlan);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,21 +2248,49 @@ static bool supportedLoad(VPWidenRecipe *R0, VPValue *V, unsigned Idx) {
22482248
}
22492249

22502250
/// Returns true if \p IR is a full interleave group with factor and number of
2251-
/// members both equal to \p VF.
2251+
/// members both equal to \p VF. The interleave group must also access the full
2252+
/// vector width \p VectorRegWidth.
22522253
static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
2253-
unsigned VF) {
2254+
unsigned VF, VPTypeAnalysis &TypeInfo,
2255+
unsigned VectorRegWidth) {
22542256
if (!InterleaveR)
22552257
return false;
2258+
Type *GroupElementTy = nullptr;
2259+
if (InterleaveR->getStoredValues().empty()) {
2260+
GroupElementTy = TypeInfo.inferScalarType(InterleaveR->getVPValue(0));
2261+
if (!all_of(InterleaveR->definedValues(),
2262+
[&TypeInfo, GroupElementTy](VPValue *Op) {
2263+
return TypeInfo.inferScalarType(Op) == GroupElementTy;
2264+
}))
2265+
return false;
2266+
} else {
2267+
GroupElementTy =
2268+
TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]);
2269+
if (!all_of(InterleaveR->getStoredValues(),
2270+
[&TypeInfo, GroupElementTy](VPValue *Op) {
2271+
return TypeInfo.inferScalarType(Op) == GroupElementTy;
2272+
}))
2273+
return false;
2274+
}
2275+
2276+
unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF;
2277+
22562278
auto IG = InterleaveR->getInterleaveGroup();
2257-
return IG->getFactor() == VF && IG->getNumMembers() == VF;
2279+
return IG->getFactor() == VF && IG->getNumMembers() == VF &&
2280+
GroupSize == VectorRegWidth;
22582281
}
22592282

2260-
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
2283+
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
2284+
unsigned VectorRegWidth) {
22612285
using namespace llvm::VPlanPatternMatch;
22622286
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
22632287
if (VF.isScalable() || !VectorLoop)
22642288
return;
22652289

2290+
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
2291+
Type *CanonicalIVType = CanonicalIV->getScalarType();
2292+
VPTypeAnalysis TypeInfo(CanonicalIVType);
2293+
22662294
unsigned FixedVF = VF.getFixedValue();
22672295
SmallVector<VPInterleaveRecipe *> StoreGroups;
22682296
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
@@ -2285,7 +2313,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
22852313
continue;
22862314

22872315
// Bail out on non-consecutive interleave groups.
2288-
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF))
2316+
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
2317+
VectorRegWidth))
22892318
return;
22902319

22912320
// Skip read interleave groups.
@@ -2326,6 +2355,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
23262355
if (StoreGroups.empty())
23272356
return;
23282357

2358+
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
23292359
auto Narrow = [](VPRecipeBase *R) -> VPValue * {
23302360
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
23312361
// Narrow interleave group to wide load, as transformed VPlan will only
@@ -2376,5 +2406,4 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF) {
23762406
Inc->setOperand(
23772407
1, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
23782408
removeDeadRecipes(Plan);
2379-
LLVM_DEBUG(dbgs() << "Narrowed interleave\n");
23802409
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,12 @@ struct VPlanTransforms {
194194

195195
/// Try to convert a plan with interleave groups with VF elements to a plan
196196
/// with the interleave groups replaced by wide loads and stores processing VF
197-
/// elements. This effectively is a very simple form of loop-aware SLP, where
198-
/// we use interleave groups to identify candidates.
199-
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF);
197+
/// elements, if all transformed interleave groups access the full vector
198+
/// width (checked via \o VectorRegWidth). This effectively is a very simple
199+
/// form of loop-aware SLP, where we use interleave groups to identify
200+
/// candidates.
201+
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
202+
unsigned VectorRegWidth);
200203
};
201204

202205
} // namespace llvm
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
5+
target triple = "arm64-apple-macosx"
6+
7+
define void @test_complex_add_float(ptr %res, ptr noalias %A, ptr noalias %B, i64 %N) {
8+
; CHECK-LABEL: define void @test_complex_add_float(
9+
; CHECK-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
10+
; CHECK-NEXT: [[ENTRY:.*]]:
11+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
12+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
13+
; CHECK: [[VECTOR_PH]]:
14+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
15+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
16+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
17+
; CHECK: [[VECTOR_BODY]]:
18+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
19+
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
20+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
21+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV]]
22+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[TMP1]]
23+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV]]
24+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[TMP1]]
25+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x float>, ptr [[GEP_A_0]], align 4
26+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
27+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
28+
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4
29+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
30+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x float> [[WIDE_VEC2]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
31+
; CHECK-NEXT: [[WIDE_VEC5:%.*]] = load <8 x float>, ptr [[GEP_B_0]], align 4
32+
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <8 x float> [[WIDE_VEC5]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
33+
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <8 x float> [[WIDE_VEC5]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
34+
; CHECK-NEXT: [[WIDE_VEC8:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
35+
; CHECK-NEXT: [[STRIDED_VEC9:%.*]] = shufflevector <8 x float> [[WIDE_VEC8]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
36+
; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = shufflevector <8 x float> [[WIDE_VEC8]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
37+
; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[STRIDED_VEC]], [[STRIDED_VEC6]]
38+
; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[STRIDED_VEC3]], [[STRIDED_VEC9]]
39+
; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
40+
; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[STRIDED_VEC4]], [[STRIDED_VEC10]]
41+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[IV]]
42+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[TMP1]]
43+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
44+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
45+
; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
46+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
47+
; CHECK-NEXT: [[INTERLEAVED_VEC11:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
48+
; CHECK-NEXT: store <8 x float> [[INTERLEAVED_VEC11]], ptr [[TMP11]], align 4
49+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
50+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
51+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52+
; CHECK: [[MIDDLE_BLOCK]]:
53+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
54+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
55+
; CHECK: [[SCALAR_PH]]:
56+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
57+
; CHECK-NEXT: br label %[[LOOP:.*]]
58+
; CHECK: [[LOOP]]:
59+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
60+
; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV1]]
61+
; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV1]]
62+
; CHECK-NEXT: [[L_A_0:%.*]] = load float, ptr [[GEP_A_2]], align 4
63+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_A_2]], i64 4
64+
; CHECK-NEXT: [[L_A_1:%.*]] = load float, ptr [[GEP_A_1]], align 4
65+
; CHECK-NEXT: [[L_B_0:%.*]] = load float, ptr [[GEP_B_2]], align 4
66+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[L_A_0]], [[L_B_0]]
67+
; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_B_2]], i64 4
68+
; CHECK-NEXT: [[L_B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
69+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_A_1]], [[L_B_1]]
70+
; CHECK-NEXT: [[GEP_RES_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[IV1]]
71+
; CHECK-NEXT: store float [[ADD_0]], ptr [[GEP_RES_0]], align 4
72+
; CHECK-NEXT: [[GEP_RES_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_RES_0]], i64 4
73+
; CHECK-NEXT: store float [[ADD_1]], ptr [[GEP_RES_1]], align 4
74+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
75+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
76+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
77+
; CHECK: [[EXIT]]:
78+
; CHECK-NEXT: ret void
79+
;
80+
entry:
81+
br label %loop
82+
83+
loop:
84+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
85+
%gep.A.0 = getelementptr inbounds nuw { float, float }, ptr %A, i64 %iv
86+
%gep.B.0 = getelementptr inbounds nuw { float, float }, ptr %B, i64 %iv
87+
%l.A.0 = load float, ptr %gep.A.0, align 4
88+
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A.0, i64 4
89+
%l.A.1 = load float, ptr %gep.A.1, align 4
90+
%l.B.0 = load float, ptr %gep.B.0, align 4
91+
%add.0 = fadd float %l.A.0, %l.B.0
92+
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B.0, i64 4
93+
%l.B.1 = load float, ptr %gep.B.1, align 4
94+
%add.1 = fadd float %l.A.1, %l.B.1
95+
%gep.res.0 = getelementptr inbounds nuw { float, float }, ptr %res, i64 %iv
96+
store float %add.0, ptr %gep.res.0, align 4
97+
%gep.res.1 = getelementptr inbounds nuw i8, ptr %gep.res.0, i64 4
98+
store float %add.1, ptr %gep.res.1, align 4
99+
%iv.next = add nuw nsw i64 %iv, 1
100+
%ec = icmp eq i64 %iv.next, %N
101+
br i1 %ec, label %exit, label %loop
102+
103+
exit:
104+
ret void
105+
}
106+
107+
define void @test_complex_add_double(ptr %res, ptr noalias %A, ptr noalias %B, i64 %N) {
108+
; CHECK-LABEL: define void @test_complex_add_double(
109+
; CHECK-SAME: ptr [[RES:%.*]], ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) {
110+
; CHECK-NEXT: [[ENTRY:.*]]:
111+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
112+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
113+
; CHECK: [[VECTOR_PH]]:
114+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
115+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
116+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
117+
; CHECK: [[VECTOR_BODY]]:
118+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
119+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
120+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
121+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP0]]
122+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP1]]
123+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP0]]
124+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[TMP1]]
125+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
126+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP3]], align 4
127+
; CHECK-NEXT: [[STRIDED_VEC7:%.*]] = load <2 x double>, ptr [[TMP4]], align 4
128+
; CHECK-NEXT: [[STRIDED_VEC10:%.*]] = load <2 x double>, ptr [[TMP5]], align 4
129+
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], [[STRIDED_VEC7]]
130+
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[STRIDED_VEC4]], [[STRIDED_VEC10]]
131+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP0]]
132+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP1]]
133+
; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 4
134+
; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP11]], align 4
135+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
136+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
137+
; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
138+
; CHECK: [[MIDDLE_BLOCK]]:
139+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
140+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
141+
; CHECK: [[SCALAR_PH]]:
142+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
143+
; CHECK-NEXT: br label %[[LOOP:.*]]
144+
; CHECK: [[LOOP]]:
145+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
146+
; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[IV]]
147+
; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i64 [[IV]]
148+
; CHECK-NEXT: [[L_A_0:%.*]] = load double, ptr [[GEP_A_0]], align 4
149+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_A_0]], i64 8
150+
; CHECK-NEXT: [[L_A_1:%.*]] = load double, ptr [[GEP_A_1]], align 4
151+
; CHECK-NEXT: [[L_B_0:%.*]] = load double, ptr [[GEP_B_0]], align 4
152+
; CHECK-NEXT: [[ADD_0:%.*]] = fadd double [[L_A_0]], [[L_B_0]]
153+
; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_B_0]], i64 8
154+
; CHECK-NEXT: [[L_B_1:%.*]] = load double, ptr [[GEP_B_1]], align 4
155+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd double [[L_A_1]], [[L_B_1]]
156+
; CHECK-NEXT: [[GEP_RES_0:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[IV]]
157+
; CHECK-NEXT: store double [[ADD_0]], ptr [[GEP_RES_0]], align 4
158+
; CHECK-NEXT: [[GEP_RES_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_RES_0]], i64 8
159+
; CHECK-NEXT: store double [[ADD_1]], ptr [[GEP_RES_1]], align 4
160+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
161+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
162+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
163+
; CHECK: [[EXIT]]:
164+
; CHECK-NEXT: ret void
165+
;
166+
entry:
167+
br label %loop
168+
169+
loop:
170+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
171+
%gep.A.0 = getelementptr inbounds nuw { double, double }, ptr %A, i64 %iv
172+
%gep.B.0 = getelementptr inbounds nuw { double, double }, ptr %B, i64 %iv
173+
%l.A.0 = load double, ptr %gep.A.0, align 4
174+
%gep.A.1 = getelementptr inbounds nuw i8, ptr %gep.A.0, i64 8
175+
%l.A.1 = load double, ptr %gep.A.1, align 4
176+
%l.B.0 = load double, ptr %gep.B.0, align 4
177+
%add.0 = fadd double %l.A.0, %l.B.0
178+
%gep.B.1 = getelementptr inbounds nuw i8, ptr %gep.B.0, i64 8
179+
%l.B.1 = load double, ptr %gep.B.1, align 4
180+
%add.1 = fadd double %l.A.1, %l.B.1
181+
%gep.res.0 = getelementptr inbounds nuw { double, double }, ptr %res, i64 %iv
182+
store double %add.0, ptr %gep.res.0, align 4
183+
%gep.res.1 = getelementptr inbounds nuw i8, ptr %gep.res.0, i64 8
184+
store double %add.1, ptr %gep.res.1, align 4
185+
%iv.next = add nuw nsw i64 %iv, 1
186+
%ec = icmp eq i64 %iv.next, %N
187+
br i1 %ec, label %exit, label %loop
188+
189+
exit:
190+
ret void
191+
}

0 commit comments

Comments
 (0)