Skip to content

Commit e051b2d

Browse files
committed
[SLP] Cluster SortedBases before sorting.
In order to enforce a strict-weak ordering, this patch clusters the bases that are being sorted by the root - the first value in a gep chain. The sorting is then performed in each cluster.
1 parent 8425aa2 commit e051b2d

File tree

2 files changed

+34
-19
lines changed

2 files changed

+34
-19
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4843,25 +4843,40 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
48434843
return false;
48444844

48454845
// If we have a better order, also sort the base pointers by increasing
4846-
// (variable) values if possible, to try and keep the order more regular.
4847-
SmallVector<std::pair<Value *, Value *>> SortedBases;
4848-
for (auto &Base : Bases)
4849-
SortedBases.emplace_back(Base.first,
4850-
Base.first->stripInBoundsConstantOffsets());
4851-
llvm::stable_sort(SortedBases, [](std::pair<Value *, Value *> V1,
4852-
std::pair<Value *, Value *> V2) {
4853-
const Value *V = V2.second;
4854-
while (auto *Gep = dyn_cast<GetElementPtrInst>(V)) {
4855-
if (Gep->getOperand(0) == V1.second)
4856-
return true;
4857-
V = Gep->getOperand(0);
4846+
// (variable) values if possible, to try and keep the order more regular. In
4847+
// order to create a valid strict-weak order we cluster by the Root of gep
4848+
// chains and sort within each.
4849+
SmallVector<std::tuple<Value *, Value *, Value *>> SortedBases;
4850+
for (auto &Base : Bases) {
4851+
Value *Strip = Base.first->stripInBoundsConstantOffsets();
4852+
Value *Root = Strip;
4853+
while (auto *Gep = dyn_cast<GetElementPtrInst>(Root))
4854+
Root = Gep->getOperand(0);
4855+
SortedBases.emplace_back(Base.first, Strip, Root);
4856+
}
4857+
if (SortedBases.size() <= 16) {
4858+
auto Begin = SortedBases.begin();
4859+
auto End = SortedBases.end();
4860+
while (Begin != End) {
4861+
Value *Root = std::get<2>(*Begin);
4862+
auto Mid = std::stable_partition(
4863+
Begin, End, [&Root](auto V) { return std::get<2>(V) == Root; });
4864+
std::stable_sort(Begin, Mid, [](auto V1, auto V2) {
4865+
const Value *V = std::get<1>(V2);
4866+
while (auto *Gep = dyn_cast<GetElementPtrInst>(V)) {
4867+
if (Gep->getOperand(0) == std::get<1>(V1))
4868+
return true;
4869+
V = Gep->getOperand(0);
4870+
}
4871+
return false;
4872+
});
4873+
Begin = Mid;
48584874
}
4859-
return false;
4860-
});
4875+
}
48614876

48624877
// Collect the final order of sorted indices
48634878
for (auto Base : SortedBases)
4864-
for (auto &T : Bases[Base.first])
4879+
for (auto &T : Bases[std::get<0>(Base)])
48654880
SortedIndices.push_back(std::get<2>(T));
48664881

48674882
assert(SortedIndices.size() == VL.size() &&

llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -428,14 +428,14 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
428428
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
429429
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
430430
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
431-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
432-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
431+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
432+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
433433
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
434434
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
435435
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
436436
; CHECK-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[TMP12]] to <16 x i32>
437-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
438-
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
437+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
438+
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
439439
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
440440
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
441441
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>

0 commit comments

Comments
 (0)