Skip to content

UTC: support debug output from LDist #93208

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 29 additions & 29 deletions llvm/lib/Transforms/Scalar/LoopDistribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
Expand Down Expand Up @@ -120,7 +120,7 @@ namespace {
/// Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
class InstPartition {
using InstructionSet = SmallPtrSet<Instruction *, 8>;
using InstructionSet = SmallSetVector<Instruction *, 8>;

public:
InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
Expand Down Expand Up @@ -166,7 +166,7 @@ class InstPartition {
// Insert instructions from the loop that we depend on.
for (Value *V : I->operand_values()) {
auto *I = dyn_cast<Instruction>(V);
if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
Worklist.push_back(I);
}
}
Expand Down Expand Up @@ -231,17 +231,16 @@ class InstPartition {
}
}

void print() const {
if (DepCycle)
dbgs() << " (cycle)\n";
void print(raw_ostream &OS) const {
OS << (DepCycle ? " (cycle)\n" : "\n");
for (auto *I : Set)
// Prefix with the block name.
dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
OS << " " << I->getParent()->getName() << ":" << *I << "\n";
}

void printBlocks() const {
void printBlocks(raw_ostream &OS) const {
for (auto *BB : getDistributedLoop()->getBlocks())
dbgs() << *BB;
OS << *BB;
}

private:
Expand Down Expand Up @@ -368,11 +367,11 @@ class InstPartitionContainer {
std::tie(LoadToPart, NewElt) =
LoadToPartition.insert(std::make_pair(Inst, PartI));
if (!NewElt) {
LLVM_DEBUG(dbgs()
<< "Merging partitions due to this load in multiple "
<< "partitions: " << PartI << ", " << LoadToPart->second
<< "\n"
<< *Inst << "\n");
LLVM_DEBUG(
dbgs()
<< "LDist: Merging partitions due to this load in multiple "
<< "partitions: " << PartI << ", " << LoadToPart->second << "\n"
<< *Inst << "\n");

auto PartJ = I;
do {
Expand Down Expand Up @@ -530,8 +529,8 @@ class InstPartitionContainer {
void print(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
OS << "Partition " << Index++ << " (" << &P << "):\n";
P.print();
OS << "LDist: Partition " << Index++ << ":";
P.print(OS);
}
}

Expand All @@ -545,11 +544,11 @@ class InstPartitionContainer {
}
#endif

void printBlocks() const {
void printBlocks(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
P.printBlocks();
OS << "LDist: Partition " << Index++ << ":";
P.printBlocks(OS);
}
}

Expand Down Expand Up @@ -628,7 +627,7 @@ class MemoryInstructionDependences {
const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());

LLVM_DEBUG(dbgs() << "Backward dependences:\n");
LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
for (const auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
Expand Down Expand Up @@ -738,27 +737,27 @@ class LoopDistributeForLoop {
for (auto *Inst : DefsUsedOutside)
Partitions.addToNewNonCyclicPartition(Inst);

LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");

// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
// should be able to vectorize these together.
Partitions.mergeBeforePopulating();
LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");

// Now, populate the partitions with non-memory operations.
Partitions.populateUsedSet();
LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);

// In order to preserve original lexical order for loads, keep them in the
// partition that we set up in the MemoryInstructionDependences loop.
if (Partitions.mergeToAvoidDuplicatedLoads()) {
LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
<< Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
Expand All @@ -782,7 +781,8 @@ class LoopDistributeForLoop {
if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");

LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
<< L->getHeader()->getName() << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
Partitions.setupPartitionIdOnInstructions();
Expand Down Expand Up @@ -810,7 +810,7 @@ class LoopDistributeForLoop {

MDNode *OrigLoopID = L->getLoopID();

LLVM_DEBUG(dbgs() << "\nPointers:\n");
LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
Expand All @@ -833,8 +833,8 @@ class LoopDistributeForLoop {
// Now, we remove the instruction from each loop that don't belong to that
// partition.
Partitions.removeUnusedInsts();
LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
LLVM_DEBUG(Partitions.printBlocks());
LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
LLVM_DEBUG(Partitions.printBlocks(dbgs()));

if (LDistVerify) {
LI->verify(*DT);
Expand All @@ -856,7 +856,7 @@ class LoopDistributeForLoop {
LLVMContext &Ctx = F->getContext();
bool Forced = isForced().value_or(false);

LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");

// With Rpass-missed report that distribution failed.
ORE->emit([&]() {
Expand Down
94 changes: 83 additions & 11 deletions llvm/test/Transforms/LoopDistribute/debug-print.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; REQUIRES: asserts
; RUN: opt -passes=loop-distribute -enable-loop-distribute \
; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s

define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
; CHECK-LABEL: 'f'
; CHECK: LDist: Found a candidate loop: for.body
; CHECK: Backward dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK: Seeded partitions:
; CHECK: Partition 0
; CHECK: Partition 1
; CHECK: Partition 2
; CHECK: Partition 3
; CHECK: Distributing loop
; CHECK-NEXT: LDist: Found a candidate loop: for.body
; CHECK-NEXT: LDist: Backward dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK-NEXT: LDist: Seeded partitions:
; CHECK-NEXT: LDist: Partition 0: (cycle)
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK-NEXT: LDist: Partition 1:
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
; CHECK-NEXT: LDist: Partition 2:
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
; CHECK-NEXT: LDist: Partition 3:
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
; CHECK-NEXT: LDist: Merged partitions:
; CHECK-NEXT: LDist: Partition 0: (cycle)
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK-NEXT: LDist: Partition 1:
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
; CHECK-NEXT: LDist: Populated partitions:
; CHECK-NEXT: LDist: Partition 0: (cycle)
; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
; CHECK-NEXT: for.body: %mul.a = mul i32 %load.b, %load.a
; CHECK-NEXT: for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
; CHECK-NEXT: for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
; CHECK-NEXT: for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
; CHECK-NEXT: LDist: Partition 1:
; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
; CHECK-NEXT: for.body: %mul.c = mul i32 %loadD, %load.strided.a
; CHECK-NEXT: for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
; CHECK-NEXT: for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
; CHECK-NEXT: for.body: %mul = mul i64 %ind, %stride
; CHECK-NEXT: for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
; CHECK-NEXT: LDist: Distributing loop: for.body
; CHECK-NEXT: LDist: Pointers:
; CHECK-NEXT: LDist: After removing unused Instrs:
; CHECK-NEXT: LDist: Partition 0:
; CHECK-NEXT: for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
; CHECK-NEXT: %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
; CHECK-NEXT: %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
; CHECK-NEXT: %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
; CHECK-NEXT: %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
; CHECK-NEXT: %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
; CHECK-NEXT: %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
; CHECK-NEXT: %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
; CHECK-NEXT: %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr %a, i64 %add.ldist1
; CHECK-NEXT: store i32 %mul.a.ldist1, ptr %gep.a.plus4.ldist1, align 4
; CHECK-NEXT: %exitcond.ldist1 = icmp eq i64 %add.ldist1, 20
; CHECK-NEXT: br i1 %exitcond.ldist1, label %for.body.ph, label %for.body.ldist1
; CHECK-NEXT: LDist: Partition 1:
; CHECK-NEXT: for.body: ; preds = %for.body, %for.body.ph
; CHECK-NEXT: %ind = phi i64 [ 0, %for.body.ph ], [ %add, %for.body ]
; CHECK-NEXT: %add = add nuw nsw i64 %ind, 1
; CHECK-NEXT: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
; CHECK-NEXT: %loadD = load i32, ptr %gep.d, align 4
; CHECK-NEXT: %mul = mul i64 %ind, %stride
; CHECK-NEXT: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
; CHECK-NEXT: %load.strided.a = load i32, ptr %gep.strided.a, align 4
; CHECK-NEXT: %mul.c = mul i32 %loadD, %load.strided.a
; CHECK-NEXT: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
; CHECK-NEXT: store i32 %mul.c, ptr %gep.c, align 4
; CHECK-NEXT: %exitcond = icmp eq i64 %add, 20
; CHECK-NEXT: br i1 %exitcond, label %exit.loopexit1, label %for.body
;
entry:
br label %for.body

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
; RUN: opt -passes=loop-distribute -enable-loop-distribute \
; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s

define void @ldist(i1 %c, ptr %A, ptr %B, ptr %C) {
entry:
br label %for.body

for.body: ; preds = %if.end, %entry
%iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
%lv = load i16, ptr %A, align 1
store i16 %lv, ptr %A, align 1
br i1 %c, label %if.then, label %if.end

if.then: ; preds = %for.body
%lv2 = load i16, ptr %A, align 1
br label %if.end

if.end: ; preds = %if.then, %for.body
%c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
%lv3 = load i16, ptr %c.sink
%iv.next = add nuw nsw i16 %iv, 1
%tobool.not = icmp eq i16 %iv.next, 1000
br i1 %tobool.not, label %for.end.loopexit, label %for.body

for.end.loopexit: ; preds = %if.end
ret void
}
Loading
Loading