Skip to content

Commit 915ee0b

Browse files
authored
UTC: support debug output from LDist (#93208)
Tweak the LoopDistribute debug output to be prefixed with "LDist: ", get it to be stable, and extend update_analyze_test_checks.py trivially to support this output.
1 parent edbbc83 commit 915ee0b

File tree

7 files changed

+275
-46
lines changed

7 files changed

+275
-46
lines changed

llvm/lib/Transforms/Scalar/LoopDistribute.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "llvm/ADT/DepthFirstIterator.h"
2727
#include "llvm/ADT/EquivalenceClasses.h"
2828
#include "llvm/ADT/STLExtras.h"
29-
#include "llvm/ADT/SmallPtrSet.h"
29+
#include "llvm/ADT/SetVector.h"
3030
#include "llvm/ADT/SmallVector.h"
3131
#include "llvm/ADT/Statistic.h"
3232
#include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
120120
/// Maintains the set of instructions of the loop for a partition before
121121
/// cloning. After cloning, it hosts the new loop.
122122
class InstPartition {
123-
using InstructionSet = SmallPtrSet<Instruction *, 8>;
123+
using InstructionSet = SmallSetVector<Instruction *, 8>;
124124

125125
public:
126126
InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ class InstPartition {
166166
// Insert instructions from the loop that we depend on.
167167
for (Value *V : I->operand_values()) {
168168
auto *I = dyn_cast<Instruction>(V);
169-
if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
169+
if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
170170
Worklist.push_back(I);
171171
}
172172
}
@@ -231,17 +231,16 @@ class InstPartition {
231231
}
232232
}
233233

234-
void print() const {
235-
if (DepCycle)
236-
dbgs() << " (cycle)\n";
234+
void print(raw_ostream &OS) const {
235+
OS << (DepCycle ? " (cycle)\n" : "\n");
237236
for (auto *I : Set)
238237
// Prefix with the block name.
239-
dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
238+
OS << " " << I->getParent()->getName() << ":" << *I << "\n";
240239
}
241240

242-
void printBlocks() const {
241+
void printBlocks(raw_ostream &OS) const {
243242
for (auto *BB : getDistributedLoop()->getBlocks())
244-
dbgs() << *BB;
243+
OS << *BB;
245244
}
246245

247246
private:
@@ -368,11 +367,11 @@ class InstPartitionContainer {
368367
std::tie(LoadToPart, NewElt) =
369368
LoadToPartition.insert(std::make_pair(Inst, PartI));
370369
if (!NewElt) {
371-
LLVM_DEBUG(dbgs()
372-
<< "Merging partitions due to this load in multiple "
373-
<< "partitions: " << PartI << ", " << LoadToPart->second
374-
<< "\n"
375-
<< *Inst << "\n");
370+
LLVM_DEBUG(
371+
dbgs()
372+
<< "LDist: Merging partitions due to this load in multiple "
373+
<< "partitions: " << PartI << ", " << LoadToPart->second << "\n"
374+
<< *Inst << "\n");
376375

377376
auto PartJ = I;
378377
do {
@@ -530,8 +529,8 @@ class InstPartitionContainer {
530529
void print(raw_ostream &OS) const {
531530
unsigned Index = 0;
532531
for (const auto &P : PartitionContainer) {
533-
OS << "Partition " << Index++ << " (" << &P << "):\n";
534-
P.print();
532+
OS << "LDist: Partition " << Index++ << ":";
533+
P.print(OS);
535534
}
536535
}
537536

@@ -545,11 +544,11 @@ class InstPartitionContainer {
545544
}
546545
#endif
547546

548-
void printBlocks() const {
547+
void printBlocks(raw_ostream &OS) const {
549548
unsigned Index = 0;
550549
for (const auto &P : PartitionContainer) {
551-
dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
552-
P.printBlocks();
550+
OS << "LDist: Partition " << Index++ << ":";
551+
P.printBlocks(OS);
553552
}
554553
}
555554

@@ -628,7 +627,7 @@ class MemoryInstructionDependences {
628627
const SmallVectorImpl<Dependence> &Dependences) {
629628
Accesses.append(Instructions.begin(), Instructions.end());
630629

631-
LLVM_DEBUG(dbgs() << "Backward dependences:\n");
630+
LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
632631
for (const auto &Dep : Dependences)
633632
if (Dep.isPossiblyBackward()) {
634633
// Note that the designations source and destination follow the program
@@ -738,27 +737,27 @@ class LoopDistributeForLoop {
738737
for (auto *Inst : DefsUsedOutside)
739738
Partitions.addToNewNonCyclicPartition(Inst);
740739

741-
LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
740+
LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
742741
if (Partitions.getSize() < 2)
743742
return fail("CantIsolateUnsafeDeps",
744743
"cannot isolate unsafe dependencies");
745744

746745
// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
747746
// should be able to vectorize these together.
748747
Partitions.mergeBeforePopulating();
749-
LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
748+
LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
750749
if (Partitions.getSize() < 2)
751750
return fail("CantIsolateUnsafeDeps",
752751
"cannot isolate unsafe dependencies");
753752

754753
// Now, populate the partitions with non-memory operations.
755754
Partitions.populateUsedSet();
756-
LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
755+
LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
757756

758757
// In order to preserve original lexical order for loads, keep them in the
759758
// partition that we set up in the MemoryInstructionDependences loop.
760759
if (Partitions.mergeToAvoidDuplicatedLoads()) {
761-
LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
760+
LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
762761
<< Partitions);
763762
if (Partitions.getSize() < 2)
764763
return fail("CantIsolateUnsafeDeps",
@@ -782,7 +781,8 @@ class LoopDistributeForLoop {
782781
if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
783782
return fail("HeuristicDisabled", "distribution heuristic disabled");
784783

785-
LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
784+
LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
785+
<< L->getHeader()->getName() << "\n");
786786
// We're done forming the partitions set up the reverse mapping from
787787
// instructions to partitions.
788788
Partitions.setupPartitionIdOnInstructions();
@@ -810,7 +810,7 @@ class LoopDistributeForLoop {
810810

811811
MDNode *OrigLoopID = L->getLoopID();
812812

813-
LLVM_DEBUG(dbgs() << "\nPointers:\n");
813+
LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
814814
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
815815
LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
816816
LVer.versionLoop(DefsUsedOutside);
@@ -833,8 +833,8 @@ class LoopDistributeForLoop {
833833
// Now, we remove the instruction from each loop that don't belong to that
834834
// partition.
835835
Partitions.removeUnusedInsts();
836-
LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
837-
LLVM_DEBUG(Partitions.printBlocks());
836+
LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
837+
LLVM_DEBUG(Partitions.printBlocks(dbgs()));
838838

839839
if (LDistVerify) {
840840
LI->verify(*DT);
@@ -856,7 +856,7 @@ class LoopDistributeForLoop {
856856
LLVMContext &Ctx = F->getContext();
857857
bool Forced = isForced().value_or(false);
858858

859-
LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
859+
LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
860860

861861
// With Rpass-missed report that distribution failed.
862862
ORE->emit([&]() {

llvm/test/Transforms/LoopDistribute/debug-print.ll

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
12
; REQUIRES: asserts
23
; RUN: opt -passes=loop-distribute -enable-loop-distribute \
34
; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
45

56
define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
67
; CHECK-LABEL: 'f'
7-
; CHECK: LDist: Found a candidate loop: for.body
8-
; CHECK: Backward dependences:
9-
; CHECK-NEXT: Backward:
10-
; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
11-
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
12-
; CHECK: Seeded partitions:
13-
; CHECK: Partition 0
14-
; CHECK: Partition 1
15-
; CHECK: Partition 2
16-
; CHECK: Partition 3
17-
; CHECK: Distributing loop
8+
; CHECK-NEXT: LDist: Found a candidate loop: for.body
9+
; CHECK-NEXT: LDist: Backward dependences:
10+
; CHECK-NEXT: Backward:
11+
; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
12+
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
13+
; CHECK-NEXT: LDist: Seeded partitions:
14+
; CHECK-NEXT: LDist: Partition 0: (cycle)
15+
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
16+
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
17+
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
18+
; CHECK-NEXT: LDist: Partition 1:
19+
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
20+
; CHECK-NEXT: LDist: Partition 2:
21+
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
22+
; CHECK-NEXT: LDist: Partition 3:
23+
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
24+
; CHECK-NEXT: LDist: Merged partitions:
25+
; CHECK-NEXT: LDist: Partition 0: (cycle)
26+
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
27+
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
28+
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
29+
; CHECK-NEXT: LDist: Partition 1:
30+
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
31+
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
32+
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
33+
; CHECK-NEXT: LDist: Populated partitions:
34+
; CHECK-NEXT: LDist: Partition 0: (cycle)
35+
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
36+
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
37+
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
38+
; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
39+
; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
40+
; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
41+
; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
42+
; CHECK-NEXT: for.body: %mul.a = mul i32 %load.b, %load.a
43+
; CHECK-NEXT: for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
44+
; CHECK-NEXT: for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
45+
; CHECK-NEXT: for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
46+
; CHECK-NEXT: LDist: Partition 1:
47+
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
48+
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
49+
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
50+
; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
51+
; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
52+
; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
53+
; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
54+
; CHECK-NEXT: for.body: %mul.c = mul i32 %loadD, %load.strided.a
55+
; CHECK-NEXT: for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
56+
; CHECK-NEXT: for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
57+
; CHECK-NEXT: for.body: %mul = mul i64 %ind, %stride
58+
; CHECK-NEXT: for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
59+
; CHECK-NEXT: LDist: Distributing loop: for.body
60+
; CHECK-NEXT: LDist: Pointers:
61+
; CHECK-NEXT: LDist: After removing unused Instrs:
62+
; CHECK-NEXT: LDist: Partition 0:
63+
; CHECK-NEXT: for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
64+
; CHECK-NEXT: %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
65+
; CHECK-NEXT: %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
66+
; CHECK-NEXT: %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
67+
; CHECK-NEXT: %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
68+
; CHECK-NEXT: %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
69+
; CHECK-NEXT: %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
70+
; CHECK-NEXT: %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
71+
; CHECK-NEXT: %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr %a, i64 %add.ldist1
72+
; CHECK-NEXT: store i32 %mul.a.ldist1, ptr %gep.a.plus4.ldist1, align 4
73+
; CHECK-NEXT: %exitcond.ldist1 = icmp eq i64 %add.ldist1, 20
74+
; CHECK-NEXT: br i1 %exitcond.ldist1, label %for.body.ph, label %for.body.ldist1
75+
; CHECK-NEXT: LDist: Partition 1:
76+
; CHECK-NEXT: for.body: ; preds = %for.body, %for.body.ph
77+
; CHECK-NEXT: %ind = phi i64 [ 0, %for.body.ph ], [ %add, %for.body ]
78+
; CHECK-NEXT: %add = add nuw nsw i64 %ind, 1
79+
; CHECK-NEXT: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
80+
; CHECK-NEXT: %loadD = load i32, ptr %gep.d, align 4
81+
; CHECK-NEXT: %mul = mul i64 %ind, %stride
82+
; CHECK-NEXT: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
83+
; CHECK-NEXT: %load.strided.a = load i32, ptr %gep.strided.a, align 4
84+
; CHECK-NEXT: %mul.c = mul i32 %loadD, %load.strided.a
85+
; CHECK-NEXT: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
86+
; CHECK-NEXT: store i32 %mul.c, ptr %gep.c, align 4
87+
; CHECK-NEXT: %exitcond = icmp eq i64 %add, 20
88+
; CHECK-NEXT: br i1 %exitcond, label %exit.loopexit1, label %for.body
89+
;
1890
entry:
1991
br label %for.body
2092

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; RUN: opt -passes=loop-distribute -enable-loop-distribute \
2+
; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
3+
4+
define void @ldist(i1 %c, ptr %A, ptr %B, ptr %C) {
5+
entry:
6+
br label %for.body
7+
8+
for.body: ; preds = %if.end, %entry
9+
%iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
10+
%lv = load i16, ptr %A, align 1
11+
store i16 %lv, ptr %A, align 1
12+
br i1 %c, label %if.then, label %if.end
13+
14+
if.then: ; preds = %for.body
15+
%lv2 = load i16, ptr %A, align 1
16+
br label %if.end
17+
18+
if.end: ; preds = %if.then, %for.body
19+
%c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
20+
%lv3 = load i16, ptr %c.sink
21+
%iv.next = add nuw nsw i16 %iv, 1
22+
%tobool.not = icmp eq i16 %iv.next, 1000
23+
br i1 %tobool.not, label %for.end.loopexit, label %for.body
24+
25+
for.end.loopexit: ; preds = %if.end
26+
ret void
27+
}

0 commit comments

Comments
 (0)