Skip to content

Commit 1472d3c

Browse files
committed
Fix memory error in Vectorizer::mergeEquivalenceClasses()
The issue is resolved by reversing the order of two lines: const auto &VecTo = EQClasses[KeyTo]; const auto &VecFrom = EQClasses[KeyFrom]; The entry for KeyTo may not exist before the subscript operator, unlike KeyFrom. Therefore, the vector of instructions for KeyTo must be requested first to avoid invalidation of the reference to the instructions vector for KeyFrom. This commit also slightly adjusts the debug printings and adds one more LIT case.
1 parent aba407e commit 1472d3c

File tree

2 files changed

+47
-13
lines changed

2 files changed

+47
-13
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,20 +1345,17 @@ void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
13451345
LLVM_DEBUG({
13461346
dbgs() << "LSV: mergeEquivalenceClasses: before merging:\n";
13471347
for (const auto &EC : EQClasses) {
1348-
dbgs() << " Key: ([" << std::get<0>(EC.first)
1349-
<< "]: " << *std::get<0>(EC.first) << ", " << std::get<1>(EC.first)
1350-
<< ", " << std::get<2>(EC.first) << ", "
1351-
<< static_cast<int>(std::get<3>(EC.first)) << ")\n";
1348+
dbgs() << " Key: {" << EC.first << "}\n";
13521349
for (const auto &Inst : EC.second)
1353-
dbgs() << "\tInst: " << *Inst << '\n';
1350+
dbgs() << " Inst: " << *Inst << '\n';
13541351
}
13551352
});
13561353
LLVM_DEBUG({
13571354
dbgs() << "LSV: mergeEquivalenceClasses: RedKeyToUOMap:\n";
13581355
for (const auto &RedKeyToUO : RedKeyToUOMap) {
1359-
dbgs() << " Reduced key: (" << std::get<0>(RedKeyToUO.first) << ", "
1356+
dbgs() << " Reduced key: {" << std::get<0>(RedKeyToUO.first) << ", "
13601357
<< std::get<1>(RedKeyToUO.first) << ", "
1361-
<< static_cast<int>(std::get<2>(RedKeyToUO.first)) << ") --> "
1358+
<< static_cast<int>(std::get<2>(RedKeyToUO.first)) << "} --> "
13621359
<< RedKeyToUO.second.size() << " underlying objects:\n";
13631360
for (auto UObject : RedKeyToUO.second)
13641361
dbgs() << " [" << UObject << "]: " << *UObject << '\n';
@@ -1402,8 +1399,10 @@ void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
14021399
std::get<2>(RedKey)};
14031400
EqClassKey KeyTo{UltimateTarget, std::get<0>(RedKey), std::get<1>(RedKey),
14041401
std::get<2>(RedKey)};
1405-
const auto &VecFrom = EQClasses[KeyFrom];
1402+
// The entry for KeyFrom is guarantted to exist, unlike KeyTo. Thus,
1403+
// request the reference to the instructions vector for KeyTo first.
14061404
const auto &VecTo = EQClasses[KeyTo];
1405+
const auto &VecFrom = EQClasses[KeyFrom];
14071406
SmallVector<Instruction *, 8> MergedVec;
14081407
std::merge(VecFrom.begin(), VecFrom.end(), VecTo.begin(), VecTo.end(),
14091408
std::back_inserter(MergedVec),
@@ -1417,12 +1416,9 @@ void Vectorizer::mergeEquivalenceClasses(EquivalenceClassMap &EQClasses) const {
14171416
LLVM_DEBUG({
14181417
dbgs() << "LSV: mergeEquivalenceClasses: after merging:\n";
14191418
for (const auto &EC : EQClasses) {
1420-
dbgs() << " Key: ([" << std::get<0>(EC.first)
1421-
<< "]: " << *std::get<0>(EC.first) << ", " << std::get<1>(EC.first)
1422-
<< ", " << std::get<2>(EC.first) << ", "
1423-
<< static_cast<int>(std::get<3>(EC.first)) << ")\n";
1419+
dbgs() << " Key: {" << EC.first << "}\n";
14241420
for (const auto &Inst : EC.second)
1425-
dbgs() << "\tInst: " << *Inst << '\n';
1421+
dbgs() << " Inst: " << *Inst << '\n';
14261422
}
14271423
});
14281424
}

llvm/test/Transforms/LoadStoreVectorizer/X86/massive_indirection.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,41 @@ define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 1
140140
.exit_point:
141141
ret void
142142
}
143+
144+
; The regression test for merging equivalence classes. It is reduced and adapted
145+
; for LSV from llvm/test/CodeGen/NVPTX/variadics-backend.ll, which failed at
146+
; post-commit checks with memory sanitizer on the initial attempt to implement
147+
; the merging of the equivalence classes.
148+
define void @variadics1(ptr %vlist) {
149+
; CHECK-LABEL: define void @variadics1(
150+
; CHECK-SAME: ptr [[VLIST:%.*]]) #[[ATTR0]] {
151+
; CHECK-NEXT: [[ARGP_CUR7_ALIGNED2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[VLIST]], i64 0)
152+
; CHECK-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr i8, ptr [[ARGP_CUR7_ALIGNED2]], i64 8
153+
; CHECK-NEXT: [[X0:%.*]] = getelementptr i8, ptr [[ARGP_NEXT8]], i32 7
154+
; CHECK-NEXT: [[ARGP_CUR11_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X0]], i64 0)
155+
; CHECK-NEXT: [[ARGP_NEXT12:%.*]] = getelementptr i8, ptr [[ARGP_CUR11_ALIGNED]], i64 8
156+
; CHECK-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[ARGP_NEXT12]], i32 7
157+
; CHECK-NEXT: [[ARGP_CUR16_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X2]], i64 0)
158+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 4294967296
159+
; CHECK-NEXT: [[X31:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
160+
; CHECK-NEXT: [[X42:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
161+
; CHECK-NEXT: [[X5:%.*]] = fadd double [[X42]], [[X31]]
162+
; CHECK-NEXT: store double [[X5]], ptr null, align 8
163+
; CHECK-NEXT: ret void
164+
;
165+
%argp.cur7.aligned2 = call ptr @llvm.ptrmask.p0.i64(ptr %vlist, i64 0)
166+
%argp.next8 = getelementptr i8, ptr %argp.cur7.aligned2, i64 8
167+
%x0 = getelementptr i8, ptr %argp.next8, i32 7
168+
%argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x0, i64 0)
169+
%argp.next12 = getelementptr i8, ptr %argp.cur11.aligned, i64 8
170+
%x2 = getelementptr i8, ptr %argp.next12, i32 7
171+
%argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x2, i64 0)
172+
%x3 = load double, ptr %argp.cur16.aligned, align 8
173+
%argp.cur16.aligned_off8 = getelementptr i8, ptr %argp.cur16.aligned, i32 8
174+
%x4 = load double, ptr %argp.cur16.aligned_off8, align 8
175+
%x5 = fadd double %x4, %x3
176+
store double %x5, ptr null, align 8
177+
ret void
178+
}
179+
180+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)

0 commit comments

Comments
 (0)