Skip to content

Commit e85fd3c

Browse files
committed
Revert "[LV] Complete load groups and release store groups in presence of dependency"
This reverts commit eaf6117 (D155520). There's an ASAN build failure that needs investigation.
1 parent 334bbc0 commit e85fd3c

File tree

3 files changed

+88
-125
lines changed

3 files changed

+88
-125
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 37 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,11 +1158,14 @@ void InterleavedAccessInfo::analyzeInterleaving(
11581158
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
11591159
<< '\n');
11601160
GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1161-
if (B->mayWriteToMemory())
1162-
StoreGroups.insert(GroupB);
1163-
else
1164-
LoadGroups.insert(GroupB);
1161+
} else if (CompletedLoadGroups.contains(GroupB)) {
1162+
// Skip B if no new instructions can be added to its load group.
1163+
continue;
11651164
}
1165+
if (B->mayWriteToMemory())
1166+
StoreGroups.insert(GroupB);
1167+
else
1168+
LoadGroups.insert(GroupB);
11661169
}
11671170

11681171
for (auto AI = std::next(BI); AI != E; ++AI) {
@@ -1188,59 +1191,38 @@ void InterleavedAccessInfo::analyzeInterleaving(
11881191
// Because accesses (2) and (3) are dependent, we can group (2) with (1)
11891192
// but not with (4). If we did, the dependent access (3) would be within
11901193
// the boundaries of the (2, 4) group.
1191-
//
1192-
auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1193-
StrideEntry *A) -> Instruction * {
1194-
for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1195-
Instruction *MemberOfGroupB = Group->getMember(Index);
1196-
if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1197-
A, &*AccessStrideInfo.find(MemberOfGroupB)))
1198-
return MemberOfGroupB;
1194+
if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1195+
// If a dependence exists and A is already in a group, we know that A
1196+
// must be a store since A precedes B and WAR dependences are allowed.
1197+
// Thus, A would be sunk below B. We release A's group to prevent this
1198+
// illegal code motion. A will then be free to form another group with
1199+
// instructions that precede it.
1200+
if (isInterleaved(A)) {
1201+
InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1202+
1203+
LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1204+
"dependence between " << *A << " and "<< *B << '\n');
1205+
1206+
StoreGroups.remove(StoreGroup);
1207+
releaseGroup(StoreGroup);
11991208
}
1200-
return nullptr;
1201-
};
1202-
1203-
if (A->mayWriteToMemory()) { // Otherwise dependencies are tolerable.
1204-
Instruction *DependentInst = nullptr;
1205-
// If GroupB is a load group, we have to compare AI against all
1206-
// members of GroupB because if any load within GroupB has a dependency
1207-
// on AI, we need to mark GroupB as complete and also release the
1208-
// store GroupA (if A belongs to one). The former prevents incorrect
1209-
// hoisting of load B above store A while the latter prevents incorrect
1210-
// sinking of store A below load B.
1211-
if (GroupB && LoadGroups.contains(GroupB))
1212-
DependentInst = DependentMember(GroupB, &*AI);
1213-
else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1214-
DependentInst = B;
1215-
1216-
if (DependentInst) {
1217-
auto GroupA = getInterleaveGroup(A);
1218-
// A has a store dependence on B (or on some load within GroupB) and
1219-
// is part of a store group. Release A's group to prevent illegal
1220-
// sinking of A below B. A will then be free to form another group
1221-
// with instructions that precede it.
1222-
if (GroupA && StoreGroups.contains(GroupA)) {
1223-
LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1224-
"dependence between "
1225-
<< *A << " and " << *DependentInst << '\n');
1226-
StoreGroups.remove(GroupA);
1227-
releaseGroup(GroupA);
1228-
}
1229-
// If B is a load and part of an interleave group, no earlier loads
1230-
// can be added to B's interleave group, because this would mean the
1231-
// DependentInst would move across store A. Mark the interleave group
1232-
// as complete.
1233-
if (GroupB && LoadGroups.contains(GroupB)) {
1234-
LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1235-
<< " as complete.\n");
1236-
CompletedLoadGroups.insert(GroupB);
1237-
}
1209+
// If B is a load and part of an interleave group, no earlier loads can
1210+
// be added to B's interleave group, because this would mean the load B
1211+
// would need to be moved across store A. Mark the interleave group as
1212+
// complete.
1213+
if (GroupB && isa<LoadInst>(B)) {
1214+
LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1215+
<< " as complete.\n");
1216+
1217+
CompletedLoadGroups.insert(GroupB);
12381218
}
1239-
}
1240-
if (CompletedLoadGroups.contains(GroupB)) {
1241-
// Skip trying to add A to B, continue to look for other conflicting A's
1242-
// in groups to be released.
1243-
continue;
1219+
1220+
// If a dependence exists and A is not already in a group (or it was
1221+
// and we just released it), B might be hoisted above A (if B is a
1222+
// load) or another store might be sunk below A (if B is a store). In
1223+
// either case, we can't add additional instructions to B's group. B
1224+
// will only form a group with instructions that it precedes.
1225+
break;
12441226
}
12451227

12461228
// At this point, we've checked for illegal code motion. If either A or B

llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll

Lines changed: 32 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ exit:
121121
; compare against the obstructing stores (%l2 versus the store) there is no
122122
; dependency. However, the other load in %l2's interleave group (%l3) does
123123
; obstruct with the store.
124+
; FIXME: The test case is currently mis-compiled.
124125
define void @pr63602_2(ptr %arr) {
125126
; CHECK-LABEL: define void @pr63602_2
126127
; CHECK-SAME: (ptr [[ARR:%.*]]) {
@@ -139,64 +140,40 @@ define void @pr63602_2(ptr %arr) {
139140
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
140141
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = add i64 1, [[TMP5]]
141142
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX2]], 0
142-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX2]], 3
143-
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX2]], 6
144-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX2]], 9
145-
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 4
146-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP10]]
147-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
148-
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP12]], align 4
143+
; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 4
144+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
145+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -2
146+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4
149147
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
150-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]]
151-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
152-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
153-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
154-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 0
148+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
149+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
150+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP1]]
151+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
152+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP3]]
153+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP4]]
154+
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 0
155+
; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4
156+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 1
157+
; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP11]], align 4
158+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 2
159+
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4
160+
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[STRIDED_VEC4]], i32 3
155161
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
156-
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 1
157-
; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP14]], align 4
158-
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 2
159-
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP15]], align 4
160-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[STRIDED_VEC]], i32 3
161-
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP16]], align 4
162-
; CHECK-NEXT: [[TMP21:%.*]] = add nuw nsw i64 [[TMP6]], 2
163-
; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[TMP7]], 2
164-
; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP8]], 2
165-
; CHECK-NEXT: [[TMP24:%.*]] = add nuw nsw i64 [[TMP9]], 2
166-
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP21]]
167-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP22]]
168-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP23]]
169-
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP24]]
170-
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP13]], align 4
171-
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP14]], align 4
172-
; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP15]], align 4
173-
; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP16]], align 4
174-
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0
175-
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 1
176-
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP31]], i32 2
177-
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP32]], i32 3
178-
; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP25]], align 4
179-
; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP26]], align 4
180-
; CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP27]], align 4
181-
; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP28]], align 4
182-
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> poison, i32 [[TMP37]], i32 0
183-
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 1
184-
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP39]], i32 2
185-
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP40]], i32 3
186-
; CHECK-NEXT: [[TMP45:%.*]] = add <4 x i32> [[TMP36]], [[TMP44]]
187-
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0
188-
; CHECK-NEXT: store i32 [[TMP46]], ptr [[TMP13]], align 4
189-
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1
190-
; CHECK-NEXT: store i32 [[TMP47]], ptr [[TMP14]], align 4
191-
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2
192-
; CHECK-NEXT: store i32 [[TMP48]], ptr [[TMP15]], align 4
193-
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3
194-
; CHECK-NEXT: store i32 [[TMP49]], ptr [[TMP16]], align 4
162+
; CHECK-NEXT: [[TMP18:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]]
163+
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP18]], i32 0
164+
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP10]], align 4
165+
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP18]], i32 1
166+
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP11]], align 4
167+
; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i32 2
168+
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP12]], align 4
169+
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP18]], i32 3
170+
; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP13]], align 4
195171
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
196-
; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
197-
; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
172+
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
173+
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
198174
; CHECK: middle.block:
199-
; CHECK-NEXT: br label [[SCALAR_PH]]
175+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 17, 16
176+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
200177
; CHECK: scalar.ph:
201178
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 49, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
202179
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 52, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY]] ]
@@ -218,7 +195,7 @@ define void @pr63602_2(ptr %arr) {
218195
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_IV_2]], align 4
219196
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 3
220197
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[IV_2]], 50
221-
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
198+
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
222199
; CHECK: exit:
223200
; CHECK-NEXT: ret void
224201
;

llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@
33
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
44
target triple = "x86_64-apple-macos"
55

6-
; %l2 load and the preceeding store has a dependency. We should not sink
6+
; This is currently miscompiled.
7+
; %l2 load and the preceeding store has a dependency. However, we currently sink
78
; that store into the last store (by creating an interleaved store group). This
8-
; means the loaded %l2 will have incorrect value.
9+
; means the loaded %l2 has incorrect value.
10+
; We do not release this store group correctly because the next interleave group
11+
; chosen compares only the memory access of last load in program (%l3) against the dependent store location
12+
; (%gep.iv.1.plus.2) and they are different, thereby incorrectly assuming no
13+
; dependency. We need to compare against all loads in that interleaved group
14+
; (%l2 is part of it).
915
define void @avoid_sinking_store_across_load(ptr %arr) {
1016
; CHECK-LABEL: define void @avoid_sinking_store_across_load
1117
; CHECK-SAME: (ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -22,28 +28,26 @@ define void @avoid_sinking_store_across_load(ptr %arr) {
2228
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
2329
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 4
2430
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
25-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
31+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -2
2632
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP4]], align 4
2733
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
34+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
35+
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
2836
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]]
2937
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
3038
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP6]]
31-
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], <i32 25, i32 25, i32 25, i32 25>
39+
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC5]], <i32 25, i32 25, i32 25, i32 25>
3240
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
33-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0
34-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0
35-
; CHECK-NEXT: [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP10]], align 4
36-
; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
37-
; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
38-
; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]]
39-
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP11]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
41+
; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC]]
42+
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
4043
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4144
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
4245
; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 12, i64 12, i64 12, i64 12>
43-
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
44-
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
46+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
47+
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4548
; CHECK: middle.block:
46-
; CHECK-NEXT: br label [[SCALAR_PH]]
49+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 17, 16
50+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4751
; CHECK: scalar.ph:
4852
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 49, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
4953
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ 52, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY]] ]
@@ -66,7 +70,7 @@ define void @avoid_sinking_store_across_load(ptr %arr) {
6670
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP_IV_2]], align 4
6771
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 3
6872
; CHECK-NEXT: [[ICMP:%.*]] = icmp ugt i64 [[IV_2]], 50
69-
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
73+
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
7074
; CHECK: exit:
7175
; CHECK-NEXT: ret void
7276
;

0 commit comments

Comments
 (0)