Skip to content

Commit 9f3c559

Browse files
[SLP]Fix loads sorting for loads from diffrent basic blocks
Patch fixes lookup for loads from different basic blocks. Originally, the code checked is the main key (combined with parent basic block) was created, but did not include the key into LoadsMap. When the code looked for the load pointer in LoadsMap, it skipped check for parent basic block and could mix loads from different basic blocks (but the same underlying pointer). Currently, it does lead to any issues, since later the code compares parent basic blocks and sorts loads properly. But it increases compile time and affects compile time. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #111521
1 parent a65a5fe commit 9f3c559

File tree

2 files changed

+13
-13
lines changed

2 files changed

+13
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18588,14 +18588,14 @@ class HorizontalReduction {
1858818588
8>
1858918589
PossibleReducedVals;
1859018590
initReductionOps(Root);
18591-
DenseMap<Value *, SmallVector<LoadInst *>> LoadsMap;
18591+
DenseMap<std::pair<size_t, Value *>, SmallVector<LoadInst *>> LoadsMap;
1859218592
SmallSet<size_t, 2> LoadKeyUsed;
1859318593

1859418594
auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) {
1859518595
Key = hash_combine(hash_value(LI->getParent()), Key);
1859618596
Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
18597-
if (LoadKeyUsed.contains(Key)) {
18598-
auto LIt = LoadsMap.find(Ptr);
18597+
if (!LoadKeyUsed.insert(Key).second) {
18598+
auto LIt = LoadsMap.find(std::make_pair(Key, Ptr));
1859918599
if (LIt != LoadsMap.end()) {
1860018600
for (LoadInst *RLI : LIt->second) {
1860118601
if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
@@ -18617,8 +18617,8 @@ class HorizontalReduction {
1861718617
}
1861818618
}
1861918619
}
18620-
LoadKeyUsed.insert(Key);
18621-
LoadsMap.try_emplace(Ptr).first->second.push_back(LI);
18620+
LoadsMap.try_emplace(std::make_pair(Key, Ptr))
18621+
.first->second.push_back(LI);
1862218622
return hash_value(LI->getPointerOperand());
1862318623
};
1862418624

llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,10 @@ define i32 @maxi8_wrong_parent(i32) {
980980
; SSE4-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
981981
; SSE4-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
982982
; SSE4-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
983-
; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP2]]
984-
; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP2]]
985-
; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
986-
; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP5]], i32 [[TMP6]]
983+
; SSE4-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP5]]
984+
; SSE4-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP5]]
985+
; SSE4-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP6]], [[TMP2]]
986+
; SSE4-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP6]], i32 [[TMP2]]
987987
; SSE4-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
988988
; SSE4-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
989989
; SSE4-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]]
@@ -999,10 +999,10 @@ define i32 @maxi8_wrong_parent(i32) {
999999
; AVX-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8
10001000
; AVX-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 7), align 4
10011001
; AVX-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
1002-
; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP2]]
1003-
; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP2]]
1004-
; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]]
1005-
; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP5]], i32 [[TMP6]]
1002+
; AVX-NEXT: [[OP_RDX:%.*]] = icmp sgt i32 [[TMP7]], [[TMP5]]
1003+
; AVX-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 [[TMP7]], i32 [[TMP5]]
1004+
; AVX-NEXT: [[OP_RDX2:%.*]] = icmp sgt i32 [[TMP6]], [[TMP2]]
1005+
; AVX-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[TMP6]], i32 [[TMP2]]
10061006
; AVX-NEXT: [[OP_RDX4:%.*]] = icmp sgt i32 [[OP_RDX1]], [[OP_RDX3]]
10071007
; AVX-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]]
10081008
; AVX-NEXT: [[OP_RDX6:%.*]] = icmp sgt i32 [[OP_RDX5]], [[TMP3]]

0 commit comments

Comments
 (0)