Skip to content

[AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata #109813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 65 additions & 18 deletions llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
Expand Down Expand Up @@ -85,7 +86,43 @@ using PhiMap = MapVector<PHINode *, BBValueVector>;
using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;

using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
using BBPredicates = DenseMap<BasicBlock *, Value *>;

using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;

class CondBranchWeights {
uint32_t TrueWeight;
uint32_t FalseWeight;

CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}

public:
static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
assert(Br.isConditional());

uint64_t T, F;
if (!extractBranchWeights(Br, T, F))
return std::nullopt;

return CondBranchWeights(T, F);
}

static void setMetadata(BranchInst &Br,
const MaybeCondBranchWeights &Weights) {
assert(Br.isConditional());
if (!Weights)
return;
uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
setBranchWeights(Br, Arr, false);
}

CondBranchWeights invert() const {
return CondBranchWeights{FalseWeight, TrueWeight};
}
};

using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>;

using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;

Expand Down Expand Up @@ -271,7 +308,7 @@ class StructurizeCFG {

void analyzeLoops(RegionNode *N);

Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert);

void gatherPredicates(RegionNode *N);

Expand Down Expand Up @@ -449,16 +486,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
}

/// Build the condition for one edge
Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
bool Invert) {
ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
bool Invert) {
Value *Cond = Invert ? BoolFalse : BoolTrue;
MaybeCondBranchWeights Weights;

if (Term->isConditional()) {
Cond = Term->getCondition();
Weights = CondBranchWeights::tryParse(*Term);

if (Idx != (unsigned)Invert)
if (Idx != (unsigned)Invert) {
Cond = invertCondition(Cond);
if (Weights)
Weights = Weights->invert();
}
}
return Cond;
return {Cond, Weights};
}

/// Analyze the predecessors of each block and build up predicates
Expand Down Expand Up @@ -490,8 +533,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {

Pred[Other] = BoolFalse;
Pred[P] = BoolTrue;
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
}
}
Expand All @@ -512,9 +555,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {

BasicBlock *Entry = R->getEntry();
if (Visited.count(Entry))
Pred[Entry] = BoolTrue;
Pred[Entry] = {BoolTrue, std::nullopt};
else
LPred[Entry] = BoolFalse;
LPred[Entry] = {BoolFalse, std::nullopt};
}
}
}
Expand Down Expand Up @@ -578,12 +621,14 @@ void StructurizeCFG::insertConditions(bool Loops) {
Dominator.addBlock(Parent);

Value *ParentValue = nullptr;
for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
MaybeCondBranchWeights ParentWeights = std::nullopt;
for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) {
BasicBlock *BB = BBAndPred.first;
Value *Pred = BBAndPred.second;
auto [Pred, Weight] = BBAndPred.second;

if (BB == Parent) {
ParentValue = Pred;
ParentWeights = Weight;
break;
}
PhiInserter.AddAvailableValue(BB, Pred);
Expand All @@ -592,6 +637,7 @@ void StructurizeCFG::insertConditions(bool Loops) {

if (ParentValue) {
Term->setCondition(ParentValue);
CondBranchWeights::setMetadata(*Term, ParentWeights);
} else {
if (!Dominator.resultIsRememberedBlock())
PhiInserter.AddAvailableValue(Dominator.result(), Default);
Expand All @@ -607,7 +653,7 @@ void StructurizeCFG::simplifyConditions() {
for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
auto &Preds = I.second;
for (auto &J : Preds) {
auto &Cond = J.second;
Value *Cond = J.second.first;
Instruction *Inverted;
if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
!Cond->use_empty()) {
Expand Down Expand Up @@ -904,9 +950,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) {
/// Does BB dominate all the predicates of Node?
bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
BBPredicates &Preds = Predicates[Node->getEntry()];
return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
return DT->dominates(BB, Pred.first);
});
return llvm::all_of(Preds,
[&](std::pair<BasicBlock *, ValueWeightPair> Pred) {
return DT->dominates(BB, Pred.first);
});
}

/// Can we predict that this node will always be called?
Expand All @@ -918,9 +965,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
if (!PrevNode)
return true;

for (std::pair<BasicBlock*, Value*> Pred : Preds) {
for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) {
BasicBlock *BB = Pred.first;
Value *V = Pred.second;
Value *V = Pred.second.first;

if (V != BoolTrue)
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ define amdgpu_ps i32 @if_else(i32 %0) {
; OPT-LABEL: define amdgpu_ps i32 @if_else(
; OPT-SAME: i32 [[TMP0:%.*]]) {
; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0
; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]]
; OPT-NEXT: br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]]
; OPT: [[FLOW]]:
; OPT-NEXT: [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ]
; OPT-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ]
Expand Down Expand Up @@ -40,7 +40,7 @@ define amdgpu_ps void @loop_if_break(i32 %n) {
; OPT: [[LOOP]]:
; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ]
; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0
; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]]
; OPT-NEXT: br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]]
; OPT: [[LOOP_BODY]]:
; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1
; OPT-NEXT: br label %[[FLOW]]
Expand Down Expand Up @@ -70,3 +70,7 @@ exit: ; preds = %loop
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!0 = !{!"branch_weights", i32 1000, i32 1}
;.
; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000}
; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1}
;.
Loading