-
Notifications
You must be signed in to change notification settings - Fork 14.3k
UTC: support debug output from LDist #93208
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesTweak the LoopDistribute debug output to be stable, and extend update_analyze_test_checks.py trivially to support this output. -- 8< -- Patch is 38.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93208.diff 15 Files Affected:
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c5..269a9efeac642 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -385,6 +385,11 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
/// Return the source code span of the loop.
LocRange getLocRange() const;
+ /// Return a string containing the location of the loop (file name + line
+ /// number if present, otherwise module name). Meant to be used for debug
+ /// printing within LLVM_DEBUG.
+ std::string getLocStr() const;
+
StringRef getName() const {
if (BasicBlock *Header = getHeader())
if (Header->hasName())
@@ -690,7 +695,6 @@ llvm::MDNode *
makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
llvm::ArrayRef<llvm::MDNode *> AddAttrs);
-
} // namespace llvm
#endif
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bc8b9b8479e4f..a537805bfd8b0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2374,9 +2374,9 @@ void MemoryDepChecker::Dependence::print(
bool LoopAccessInfo::canAnalyzeLoop() {
// We need to have a loop header.
- LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
- << TheLoop->getHeader()->getParent()->getName() << ": "
- << TheLoop->getHeader()->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
+ << TheLoop->getHeader()->getParent()->getName() << "' from "
+ << TheLoop->getLocStr() << "\n");
// We can only analyze innermost loops.
if (!TheLoop->isInnermost()) {
@@ -2403,6 +2403,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
return false;
}
+ LLVM_DEBUG(dbgs() << "LAA: Found a loop: " << TheLoop->getHeader()->getName()
+ << "\n");
return true;
}
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0f..d7bddb0bae1b6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -663,6 +663,17 @@ Loop::LocRange Loop::getLocRange() const {
return LocRange();
}
+std::string Loop::getLocStr() const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ if (const DebugLoc LoopDbgLoc = getStartLoc())
+ LoopDbgLoc.print(OS);
+ else
+ // Just print the module name.
+ OS << getHeader()->getParent()->getParent()->getModuleIdentifier();
+ return Result;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 626888c74bad8..abd46dc6c1b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -26,7 +26,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
/// Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
class InstPartition {
- using InstructionSet = SmallPtrSet<Instruction *, 8>;
+ using InstructionSet = SetVector<Instruction *>;
public:
InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ class InstPartition {
// Insert instructions from the loop that we depend on.
for (Value *V : I->operand_values()) {
auto *I = dyn_cast<Instruction>(V);
- if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+ if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
Worklist.push_back(I);
}
}
@@ -231,17 +231,16 @@ class InstPartition {
}
}
- void print() const {
- if (DepCycle)
- dbgs() << " (cycle)\n";
+ void print(raw_ostream &OS) const {
+ OS << (DepCycle ? " (cycle)\n" : "\n");
for (auto *I : Set)
// Prefix with the block name.
- dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
+ OS << " " << I->getParent()->getName() << ":" << *I << "\n";
}
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
for (auto *BB : getDistributedLoop()->getBlocks())
- dbgs() << *BB;
+ OS << *BB;
}
private:
@@ -368,11 +367,11 @@ class InstPartitionContainer {
std::tie(LoadToPart, NewElt) =
LoadToPartition.insert(std::make_pair(Inst, PartI));
if (!NewElt) {
- LLVM_DEBUG(dbgs()
- << "Merging partitions due to this load in multiple "
- << "partitions: " << PartI << ", " << LoadToPart->second
- << "\n"
- << *Inst << "\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "LDist: Merging partitions due to this load in multiple "
+ << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
+ << *Inst << "\n");
auto PartJ = I;
do {
@@ -530,8 +529,8 @@ class InstPartitionContainer {
void print(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- OS << "Partition " << Index++ << " (" << &P << "):\n";
- P.print();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.print(OS);
}
}
@@ -545,11 +544,11 @@ class InstPartitionContainer {
}
#endif
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
- P.printBlocks();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.printBlocks(OS);
}
}
@@ -628,7 +627,7 @@ class MemoryInstructionDependences {
const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
- LLVM_DEBUG(dbgs() << "Backward dependences:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
for (const auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
@@ -659,9 +658,9 @@ class LoopDistributeForLoop {
bool processLoop() {
assert(L->isInnermost() && "Only process inner loops.");
- LLVM_DEBUG(dbgs() << "\nLDist: In \""
- << L->getHeader()->getParent()->getName()
- << "\" checking " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
+ << L->getHeader()->getParent()->getName() << "' from "
+ << L->getLocStr() << "\n");
// Having a single exit block implies there's also one exiting block.
if (!L->getExitBlock())
@@ -686,6 +685,9 @@ class LoopDistributeForLoop {
if (!Dependences || Dependences->empty())
return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
+ LLVM_DEBUG(dbgs() << "LDist: Found a loop: " << L->getHeader()->getName()
+ << "\n");
+
InstPartitionContainer Partitions(L, LI, DT);
// First, go through each memory operation and assign them to consecutive
@@ -735,7 +737,7 @@ class LoopDistributeForLoop {
for (auto *Inst : DefsUsedOutside)
Partitions.addToNewNonCyclicPartition(Inst);
- LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
@@ -743,19 +745,19 @@ class LoopDistributeForLoop {
// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
// should be able to vectorize these together.
Partitions.mergeBeforePopulating();
- LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
// Now, populate the partitions with non-memory operations.
Partitions.populateUsedSet();
- LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
// In order to preserve original lexical order for loads, keep them in the
// partition that we set up in the MemoryInstructionDependences loop.
if (Partitions.mergeToAvoidDuplicatedLoads()) {
- LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+ LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
<< Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
@@ -779,7 +781,8 @@ class LoopDistributeForLoop {
if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");
- LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
+ << L->getHeader()->getName() << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
Partitions.setupPartitionIdOnInstructions();
@@ -807,7 +810,7 @@ class LoopDistributeForLoop {
MDNode *OrigLoopID = L->getLoopID();
- LLVM_DEBUG(dbgs() << "\nPointers:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
@@ -830,8 +833,8 @@ class LoopDistributeForLoop {
// Now, we remove the instruction from each loop that don't belong to that
// partition.
Partitions.removeUnusedInsts();
- LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
- LLVM_DEBUG(Partitions.printBlocks());
+ LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
+ LLVM_DEBUG(Partitions.printBlocks(dbgs()));
if (LDistVerify) {
LI->verify(*DT);
@@ -853,7 +856,7 @@ class LoopDistributeForLoop {
LLVMContext &Ctx = F->getContext();
bool Forced = isForced().value_or(false);
- LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
// With Rpass-missed report that distribution failed.
ORE->emit([&]() {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d64aaa75922b..2726df59f464d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,23 +1027,6 @@ static void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
} // end namespace llvm
-#ifndef NDEBUG
-/// \return string containing a file name and a line # for the given loop.
-static std::string getDebugLocString(const Loop *L) {
- std::string Result;
- if (L) {
- raw_string_ostream OS(Result);
- if (const DebugLoc LoopDbgLoc = L->getStartLoc())
- LoopDbgLoc.print(OS);
- else
- // Just print the module name.
- OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
- OS.flush();
- }
- return Result;
-}
-#endif
-
namespace llvm {
// Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -9836,13 +9819,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert((EnableVPlanNativePath || L->isInnermost()) &&
"VPlan-native path is not enabled. Only process inner loops.");
-#ifndef NDEBUG
- const std::string DebugLocStr = getDebugLocString(L);
-#endif /* NDEBUG */
-
LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '"
<< L->getHeader()->getParent()->getName() << "' from "
- << DebugLocStr << "\n");
+ << L->getLocStr() << "\n");
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
@@ -10112,7 +10091,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
} else if (VectorizeLoop && !InterleaveLoop) {
LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
- << ") in " << DebugLocStr << '\n');
+ << ") in " << L->getLocStr() << '\n');
ORE->emit([&]() {
return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
L->getStartLoc(), L->getHeader())
@@ -10120,7 +10099,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
} else if (VectorizeLoop && InterleaveLoop) {
LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
- << ") in " << DebugLocStr << '\n');
+ << ") in " << L->getLocStr() << '\n');
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 65f94a7d8fdb4..1e53eda68b1a2 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -6,7 +6,8 @@
; A[i+1] = A[i] + 1;
; }
-; CHECK: LAA: Found a loop in negative_step: loop
+; CHECK-LABEL: 'negative_step'
+; CHECK: LAA: Found a loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
@@ -37,7 +38,8 @@ exit:
; A[i-1] = A[i] + 1;
; }
-; CHECK: LAA: Found a loop in positive_step: loop
+; CHECK-LABEL: 'positive_step'
+; CHECK: LAA: Found a loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index 82a884a637259..8019bc76d2f0f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -24,7 +24,7 @@ loop.end:
ret void
}
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize'
; CHECK: LAA: Bad stride - Scalable object:
define void @regression_test_loop_access_scalable_typesize(ptr %input_ptr) {
entry:
@@ -42,7 +42,7 @@ end:
ret void
}
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize_nonscalable_object
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize_nonscalable_object'
; CHECK: LAA: Bad stride - Scalable object:
define void @regression_test_loop_access_scalable_typesize_nonscalable_object(ptr %input_ptr) {
entry:
diff --git a/llvm/test/Transforms/LoopDistribute/debug-print.ll b/llvm/test/Transforms/LoopDistribute/debug-print.ll
new file mode 100644
index 0000000000000..3727c78b17c92
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/debug-print.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT: LDist: Found a loop: for.body
+; CHECK-NEXT: LDist: Backward dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
+; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Seeded partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: LDist: Partition 2:
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: LDist: Partition 3:
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: LDist: Merged partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: LDist: Populated partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT: for.body: %mul.a = mul i32 %load.b, %load.a
+; CHECK-NEXT: for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
+; CHECK-NEXT: for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
+; CHECK-NEXT: for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT: for.body: %mul.c = mul i32 %loadD, %load.strided.a
+; CHECK-NEXT: for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+; CHECK-NEXT: for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+; CHECK-NEXT: for.body: %mul = mul i64 %ind, %stride
+; CHECK-NEXT: for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+; CHECK-NEXT: LDist: Distributing loop: for.body
+; CHECK-NEXT: LDist: Pointers:
+; CHECK-NEXT: LDist: After removing unused Instrs:
+; CHECK-NEXT: LDist: Partition 0:
+; CHECK-NEXT: for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
+; CHECK-NEXT: %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
+; CHECK-NEXT: %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
+; CHECK-NEXT: %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
+; CHECK-NEXT: %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
+; CHECK-NEXT: %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
+; CHECK-NEXT: %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
+; CHECK-NEXT: %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
+; CHECK-NEXT: %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr...
[truncated]
|
@llvm/pr-subscribers-testing-tools Author: Ramkumar Ramachandra (artagnon) ChangesTweak the LoopDistribute debug output to be stable, and extend update_analyze_test_checks.py trivially to support this output. -- 8< -- Patch is 38.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93208.diff 15 Files Affected:
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c5..269a9efeac642 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -385,6 +385,11 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
/// Return the source code span of the loop.
LocRange getLocRange() const;
+ /// Return a string containing the location of the loop (file name + line
+ /// number if present, otherwise module name). Meant to be used for debug
+ /// printing within LLVM_DEBUG.
+ std::string getLocStr() const;
+
StringRef getName() const {
if (BasicBlock *Header = getHeader())
if (Header->hasName())
@@ -690,7 +695,6 @@ llvm::MDNode *
makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
llvm::ArrayRef<llvm::MDNode *> AddAttrs);
-
} // namespace llvm
#endif
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bc8b9b8479e4f..a537805bfd8b0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2374,9 +2374,9 @@ void MemoryDepChecker::Dependence::print(
bool LoopAccessInfo::canAnalyzeLoop() {
// We need to have a loop header.
- LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
- << TheLoop->getHeader()->getParent()->getName() << ": "
- << TheLoop->getHeader()->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
+ << TheLoop->getHeader()->getParent()->getName() << "' from "
+ << TheLoop->getLocStr() << "\n");
// We can only analyze innermost loops.
if (!TheLoop->isInnermost()) {
@@ -2403,6 +2403,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
return false;
}
+ LLVM_DEBUG(dbgs() << "LAA: Found a loop: " << TheLoop->getHeader()->getName()
+ << "\n");
return true;
}
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0f..d7bddb0bae1b6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -663,6 +663,17 @@ Loop::LocRange Loop::getLocRange() const {
return LocRange();
}
+std::string Loop::getLocStr() const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ if (const DebugLoc LoopDbgLoc = getStartLoc())
+ LoopDbgLoc.print(OS);
+ else
+ // Just print the module name.
+ OS << getHeader()->getParent()->getParent()->getModuleIdentifier();
+ return Result;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 626888c74bad8..abd46dc6c1b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -26,7 +26,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
/// Maintains the set of instructions of the loop for a partition before
/// cloning. After cloning, it hosts the new loop.
class InstPartition {
- using InstructionSet = SmallPtrSet<Instruction *, 8>;
+ using InstructionSet = SetVector<Instruction *>;
public:
InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ class InstPartition {
// Insert instructions from the loop that we depend on.
for (Value *V : I->operand_values()) {
auto *I = dyn_cast<Instruction>(V);
- if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+ if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
Worklist.push_back(I);
}
}
@@ -231,17 +231,16 @@ class InstPartition {
}
}
- void print() const {
- if (DepCycle)
- dbgs() << " (cycle)\n";
+ void print(raw_ostream &OS) const {
+ OS << (DepCycle ? " (cycle)\n" : "\n");
for (auto *I : Set)
// Prefix with the block name.
- dbgs() << " " << I->getParent()->getName() << ":" << *I << "\n";
+ OS << " " << I->getParent()->getName() << ":" << *I << "\n";
}
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
for (auto *BB : getDistributedLoop()->getBlocks())
- dbgs() << *BB;
+ OS << *BB;
}
private:
@@ -368,11 +367,11 @@ class InstPartitionContainer {
std::tie(LoadToPart, NewElt) =
LoadToPartition.insert(std::make_pair(Inst, PartI));
if (!NewElt) {
- LLVM_DEBUG(dbgs()
- << "Merging partitions due to this load in multiple "
- << "partitions: " << PartI << ", " << LoadToPart->second
- << "\n"
- << *Inst << "\n");
+ LLVM_DEBUG(
+ dbgs()
+ << "LDist: Merging partitions due to this load in multiple "
+ << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
+ << *Inst << "\n");
auto PartJ = I;
do {
@@ -530,8 +529,8 @@ class InstPartitionContainer {
void print(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- OS << "Partition " << Index++ << " (" << &P << "):\n";
- P.print();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.print(OS);
}
}
@@ -545,11 +544,11 @@ class InstPartitionContainer {
}
#endif
- void printBlocks() const {
+ void printBlocks(raw_ostream &OS) const {
unsigned Index = 0;
for (const auto &P : PartitionContainer) {
- dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
- P.printBlocks();
+ OS << "LDist: Partition " << Index++ << ":";
+ P.printBlocks(OS);
}
}
@@ -628,7 +627,7 @@ class MemoryInstructionDependences {
const SmallVectorImpl<Dependence> &Dependences) {
Accesses.append(Instructions.begin(), Instructions.end());
- LLVM_DEBUG(dbgs() << "Backward dependences:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
for (const auto &Dep : Dependences)
if (Dep.isPossiblyBackward()) {
// Note that the designations source and destination follow the program
@@ -659,9 +658,9 @@ class LoopDistributeForLoop {
bool processLoop() {
assert(L->isInnermost() && "Only process inner loops.");
- LLVM_DEBUG(dbgs() << "\nLDist: In \""
- << L->getHeader()->getParent()->getName()
- << "\" checking " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
+ << L->getHeader()->getParent()->getName() << "' from "
+ << L->getLocStr() << "\n");
// Having a single exit block implies there's also one exiting block.
if (!L->getExitBlock())
@@ -686,6 +685,9 @@ class LoopDistributeForLoop {
if (!Dependences || Dependences->empty())
return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
+ LLVM_DEBUG(dbgs() << "LDist: Found a loop: " << L->getHeader()->getName()
+ << "\n");
+
InstPartitionContainer Partitions(L, LI, DT);
// First, go through each memory operation and assign them to consecutive
@@ -735,7 +737,7 @@ class LoopDistributeForLoop {
for (auto *Inst : DefsUsedOutside)
Partitions.addToNewNonCyclicPartition(Inst);
- LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
@@ -743,19 +745,19 @@ class LoopDistributeForLoop {
// Run the merge heuristics: Merge non-cyclic adjacent partitions since we
// should be able to vectorize these together.
Partitions.mergeBeforePopulating();
- LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
"cannot isolate unsafe dependencies");
// Now, populate the partitions with non-memory operations.
Partitions.populateUsedSet();
- LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+ LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
// In order to preserve original lexical order for loads, keep them in the
// partition that we set up in the MemoryInstructionDependences loop.
if (Partitions.mergeToAvoidDuplicatedLoads()) {
- LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+ LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
<< Partitions);
if (Partitions.getSize() < 2)
return fail("CantIsolateUnsafeDeps",
@@ -779,7 +781,8 @@ class LoopDistributeForLoop {
if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
return fail("HeuristicDisabled", "distribution heuristic disabled");
- LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
+ << L->getHeader()->getName() << "\n");
// We're done forming the partitions set up the reverse mapping from
// instructions to partitions.
Partitions.setupPartitionIdOnInstructions();
@@ -807,7 +810,7 @@ class LoopDistributeForLoop {
MDNode *OrigLoopID = L->getLoopID();
- LLVM_DEBUG(dbgs() << "\nPointers:\n");
+ LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
LVer.versionLoop(DefsUsedOutside);
@@ -830,8 +833,8 @@ class LoopDistributeForLoop {
// Now, we remove the instruction from each loop that don't belong to that
// partition.
Partitions.removeUnusedInsts();
- LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
- LLVM_DEBUG(Partitions.printBlocks());
+ LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
+ LLVM_DEBUG(Partitions.printBlocks(dbgs()));
if (LDistVerify) {
LI->verify(*DT);
@@ -853,7 +856,7 @@ class LoopDistributeForLoop {
LLVMContext &Ctx = F->getContext();
bool Forced = isForced().value_or(false);
- LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
+ LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
// With Rpass-missed report that distribution failed.
ORE->emit([&]() {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d64aaa75922b..2726df59f464d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,23 +1027,6 @@ static void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
} // end namespace llvm
-#ifndef NDEBUG
-/// \return string containing a file name and a line # for the given loop.
-static std::string getDebugLocString(const Loop *L) {
- std::string Result;
- if (L) {
- raw_string_ostream OS(Result);
- if (const DebugLoc LoopDbgLoc = L->getStartLoc())
- LoopDbgLoc.print(OS);
- else
- // Just print the module name.
- OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
- OS.flush();
- }
- return Result;
-}
-#endif
-
namespace llvm {
// Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -9836,13 +9819,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert((EnableVPlanNativePath || L->isInnermost()) &&
"VPlan-native path is not enabled. Only process inner loops.");
-#ifndef NDEBUG
- const std::string DebugLocStr = getDebugLocString(L);
-#endif /* NDEBUG */
-
LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '"
<< L->getHeader()->getParent()->getName() << "' from "
- << DebugLocStr << "\n");
+ << L->getLocStr() << "\n");
LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
@@ -10112,7 +10091,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
} else if (VectorizeLoop && !InterleaveLoop) {
LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
- << ") in " << DebugLocStr << '\n');
+ << ") in " << L->getLocStr() << '\n');
ORE->emit([&]() {
return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
L->getStartLoc(), L->getHeader())
@@ -10120,7 +10099,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
});
} else if (VectorizeLoop && InterleaveLoop) {
LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
- << ") in " << DebugLocStr << '\n');
+ << ") in " << L->getLocStr() << '\n');
LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 65f94a7d8fdb4..1e53eda68b1a2 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -6,7 +6,8 @@
; A[i+1] = A[i] + 1;
; }
-; CHECK: LAA: Found a loop in negative_step: loop
+; CHECK-LABEL: 'negative_step'
+; CHECK: LAA: Found a loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
@@ -37,7 +38,8 @@ exit:
; A[i-1] = A[i] + 1;
; }
-; CHECK: LAA: Found a loop in positive_step: loop
+; CHECK-LABEL: 'positive_step'
+; CHECK: LAA: Found a loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index 82a884a637259..8019bc76d2f0f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -24,7 +24,7 @@ loop.end:
ret void
}
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize'
; CHECK: LAA: Bad stride - Scalable object:
define void @regression_test_loop_access_scalable_typesize(ptr %input_ptr) {
entry:
@@ -42,7 +42,7 @@ end:
ret void
}
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize_nonscalable_object
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize_nonscalable_object'
; CHECK: LAA: Bad stride - Scalable object:
define void @regression_test_loop_access_scalable_typesize_nonscalable_object(ptr %input_ptr) {
entry:
diff --git a/llvm/test/Transforms/LoopDistribute/debug-print.ll b/llvm/test/Transforms/LoopDistribute/debug-print.ll
new file mode 100644
index 0000000000000..3727c78b17c92
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/debug-print.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN: -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT: LDist: Found a loop: for.body
+; CHECK-NEXT: LDist: Backward dependences:
+; CHECK-NEXT: Backward:
+; CHECK-NEXT: %load.a = load i32, ptr %gep.a, align 4 ->
+; CHECK-NEXT: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Seeded partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: LDist: Partition 2:
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: LDist: Partition 3:
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: LDist: Merged partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: LDist: Populated partitions:
+; CHECK-NEXT: LDist: Partition 0: (cycle)
+; CHECK-NEXT: for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT: for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT: for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT: for.body: %mul.a = mul i32 %load.b, %load.a
+; CHECK-NEXT: for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
+; CHECK-NEXT: for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
+; CHECK-NEXT: for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
+; CHECK-NEXT: LDist: Partition 1:
+; CHECK-NEXT: for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT: for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT: for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT: for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT: for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT: for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT: for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT: for.body: %mul.c = mul i32 %loadD, %load.strided.a
+; CHECK-NEXT: for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+; CHECK-NEXT: for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+; CHECK-NEXT: for.body: %mul = mul i64 %ind, %stride
+; CHECK-NEXT: for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+; CHECK-NEXT: LDist: Distributing loop: for.body
+; CHECK-NEXT: LDist: Pointers:
+; CHECK-NEXT: LDist: After removing unused Instrs:
+; CHECK-NEXT: LDist: Partition 0:
+; CHECK-NEXT: for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
+; CHECK-NEXT: %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
+; CHECK-NEXT: %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
+; CHECK-NEXT: %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
+; CHECK-NEXT: %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
+; CHECK-NEXT: %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
+; CHECK-NEXT: %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
+; CHECK-NEXT: %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
+; CHECK-NEXT: %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr...
[truncated]
|
Gentle ping. I'm planning to make improvements to LoopDistribute, and this patch is a prerequisite. |
Tweak the LoopDistribute debug output to be stable, and extend update_analyze_test_checks.py trivially to support this output.
Rebase and ping. |
Gentle ping. This patch should be straightforward. |
llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good to also mentioned that this updates LDist in the patch, or split up the patches into 2, one updating the debug output and one for UTC
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for clarification re print() already taking OS
LGTM
Tweak the LoopDistribute debug output to be prefixed with "LDist: ", get it to be stable, and extend update_analyze_test_checks.py trivially to support this output.
Tweak the LoopDistribute debug output to be prefixed with "LDist: ", get it to be stable, and extend update_analyze_test_checks.py trivially to support this output.
Tweak the LoopDistribute debug output to be prefixed with "LDist: ", get it to be stable, and extend update_analyze_test_checks.py trivially to support this output.