Skip to content

[AMDGPU][StructurizeCFG] Maintain branch MD_prof metadata #109813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 25, 2024

Conversation

jmmartinez
Copy link
Contributor

Currently StructurizeCFG drops branch_weight metadata .
This metadata can be generated from user annotations in the source code like:

if (...) [[likely]] {
}

@jmmartinez
Copy link
Contributor Author

Pre-commited test in #109812

@llvmbot
Copy link
Member

llvmbot commented Sep 24, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Juan Manuel Martinez Caamaño (jmmartinez)

Changes

Currently StructurizeCFG drops branch_weight metadata .
This metadata can be generated from user annotations in the source code like:

if (...) [[likely]] {
}

Full diff: https://github.com/llvm/llvm-project/pull/109813.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+69-18)
  • (added) llvm/test/CodeGen/AMDGPU/structurizer-keep-perf-md.ll (+76)
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index aca8225cebb3fd..563ce402fd44fe 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
@@ -47,6 +48,7 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
 #include <cassert>
+#include <optional>
 #include <utility>
 
 using namespace llvm;
@@ -85,7 +87,46 @@ using PhiMap = MapVector<PHINode *, BBValueVector>;
 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
 
 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
-using BBPredicates = DenseMap<BasicBlock *, Value *>;
+
+using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
+
+class CondBranchWeights {
+  uint32_t TrueWeight;
+  uint32_t FalseWeight;
+
+public:
+  CondBranchWeights(unsigned T, unsigned F) : TrueWeight(T), FalseWeight(F) {}
+
+  static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
+    assert(Br.isConditional());
+
+    SmallVector<uint32_t, 2> Weights;
+    if (!extractBranchWeights(Br, Weights))
+      return std::nullopt;
+
+    if (Weights.size() != 2)
+      return std::nullopt;
+
+    return CondBranchWeights{Weights[0], Weights[1]};
+  }
+
+  static void setMetadata(BranchInst &Br,
+                          MaybeCondBranchWeights const &Weights) {
+    assert(Br.isConditional());
+    if (!Weights)
+      return;
+    uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
+    setBranchWeights(Br, Arr, false);
+  }
+
+  CondBranchWeights invert() const {
+    return CondBranchWeights{FalseWeight, TrueWeight};
+  }
+};
+
+using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>;
+
+using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>;
 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
 
@@ -271,7 +312,7 @@ class StructurizeCFG {
 
   void analyzeLoops(RegionNode *N);
 
-  Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
+  ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
 
   void gatherPredicates(RegionNode *N);
 
@@ -449,16 +490,22 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
 }
 
 /// Build the condition for one edge
-Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
-                                      bool Invert) {
+ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
+                                               bool Invert) {
   Value *Cond = Invert ? BoolFalse : BoolTrue;
+  MaybeCondBranchWeights Weights = std::nullopt;
+
   if (Term->isConditional()) {
     Cond = Term->getCondition();
+    Weights = CondBranchWeights::tryParse(*Term);
 
-    if (Idx != (unsigned)Invert)
+    if (Idx != (unsigned)Invert) {
       Cond = invertCondition(Cond);
+      if (Weights)
+        Weights = Weights->invert();
+    }
   }
-  return Cond;
+  return {Cond, Weights};
 }
 
 /// Analyze the predecessors of each block and build up predicates
@@ -490,8 +537,8 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
             if (Visited.count(Other) && !Loops.count(Other) &&
                 !Pred.count(Other) && !Pred.count(P)) {
 
-              Pred[Other] = BoolFalse;
-              Pred[P] = BoolTrue;
+              Pred[Other] = {BoolFalse, std::nullopt};
+              Pred[P] = {BoolTrue, std::nullopt};
               continue;
             }
           }
@@ -512,9 +559,9 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
 
       BasicBlock *Entry = R->getEntry();
       if (Visited.count(Entry))
-        Pred[Entry] = BoolTrue;
+        Pred[Entry] = {BoolTrue, std::nullopt};
       else
-        LPred[Entry] = BoolFalse;
+        LPred[Entry] = {BoolFalse, std::nullopt};
     }
   }
 }
@@ -578,12 +625,14 @@ void StructurizeCFG::insertConditions(bool Loops) {
     Dominator.addBlock(Parent);
 
     Value *ParentValue = nullptr;
-    for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
+    MaybeCondBranchWeights ParentWeights = std::nullopt;
+    for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) {
       BasicBlock *BB = BBAndPred.first;
-      Value *Pred = BBAndPred.second;
+      Value *Pred = BBAndPred.second.first;
 
       if (BB == Parent) {
         ParentValue = Pred;
+        ParentWeights = BBAndPred.second.second;
         break;
       }
       PhiInserter.AddAvailableValue(BB, Pred);
@@ -592,6 +641,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
 
     if (ParentValue) {
       Term->setCondition(ParentValue);
+      CondBranchWeights::setMetadata(*Term, ParentWeights);
     } else {
       if (!Dominator.resultIsRememberedBlock())
         PhiInserter.AddAvailableValue(Dominator.result(), Default);
@@ -607,7 +657,7 @@ void StructurizeCFG::simplifyConditions() {
   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
     auto &Preds = I.second;
     for (auto &J : Preds) {
-      auto &Cond = J.second;
+      auto &Cond = J.second.first;
       Instruction *Inverted;
       if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
           !Cond->use_empty()) {
@@ -904,9 +954,10 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) {
 /// Does BB dominate all the predicates of Node?
 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
   BBPredicates &Preds = Predicates[Node->getEntry()];
-  return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
-    return DT->dominates(BB, Pred.first);
-  });
+  return llvm::all_of(Preds,
+                      [&](std::pair<BasicBlock *, ValueWeightPair> Pred) {
+                        return DT->dominates(BB, Pred.first);
+                      });
 }
 
 /// Can we predict that this node will always be called?
@@ -918,9 +969,9 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
   if (!PrevNode)
     return true;
 
-  for (std::pair<BasicBlock*, Value*> Pred : Preds) {
+  for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) {
     BasicBlock *BB = Pred.first;
-    Value *V = Pred.second;
+    Value *V = Pred.second.first;
 
     if (V != BoolTrue)
       return false;
diff --git a/llvm/test/CodeGen/AMDGPU/structurizer-keep-perf-md.ll b/llvm/test/CodeGen/AMDGPU/structurizer-keep-perf-md.ll
new file mode 100644
index 00000000000000..d036d6cbca7b9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/structurizer-keep-perf-md.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-- -S -structurizecfg %s | FileCheck -check-prefix=OPT %s
+
+define amdgpu_ps i32 @if_else(i32 %0) {
+; OPT-LABEL: define amdgpu_ps i32 @if_else(
+; OPT-SAME: i32 [[TMP0:%.*]]) {
+; OPT-NEXT:    [[C:%.*]] = icmp ne i32 [[TMP0]], 0
+; OPT-NEXT:    br i1 [[C]], label %[[FALSE:.*]], label %[[FLOW:.*]], !prof [[PROF0:![0-9]+]]
+; OPT:       [[FLOW]]:
+; OPT-NEXT:    [[TMP2:%.*]] = phi i32 [ 33, %[[FALSE]] ], [ undef, [[TMP1:%.*]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = phi i1 [ false, %[[FALSE]] ], [ true, [[TMP1]] ]
+; OPT-NEXT:    br i1 [[TMP3]], label %[[TRUE:.*]], label %[[EXIT:.*]]
+; OPT:       [[TRUE]]:
+; OPT-NEXT:    br label %[[EXIT]]
+; OPT:       [[FALSE]]:
+; OPT-NEXT:    br label %[[FLOW]]
+; OPT:       [[EXIT]]:
+; OPT-NEXT:    [[RET:%.*]] = phi i32 [ [[TMP2]], %[[FLOW]] ], [ 42, %[[TRUE]] ]
+; OPT-NEXT:    ret i32 [[RET]]
+;
+  %c = icmp eq i32 %0, 0
+  br i1 %c, label %true, label %false, !prof !0
+
+true:                                             ; preds = %1
+  br label %exit
+
+false:                                            ; preds = %1
+  br label %exit
+
+exit:                                             ; preds = %false, %true
+  %ret = phi i32 [ 42, %true ], [ 33, %false ]
+  ret i32 %ret
+}
+
+define amdgpu_ps void @loop_if_break(i32 %n) {
+; OPT-LABEL: define amdgpu_ps void @loop_if_break(
+; OPT-SAME: i32 [[N:%.*]]) {
+; OPT-NEXT:  [[ENTRY:.*]]:
+; OPT-NEXT:    br label %[[LOOP:.*]]
+; OPT:       [[LOOP]]:
+; OPT-NEXT:    [[I:%.*]] = phi i32 [ [[N]], %[[ENTRY]] ], [ [[TMP0:%.*]], %[[FLOW:.*]] ]
+; OPT-NEXT:    [[C:%.*]] = icmp ugt i32 [[I]], 0
+; OPT-NEXT:    br i1 [[C]], label %[[LOOP_BODY:.*]], label %[[FLOW]], !prof [[PROF1:![0-9]+]]
+; OPT:       [[LOOP_BODY]]:
+; OPT-NEXT:    [[I_NEXT:%.*]] = sub i32 [[I]], 1
+; OPT-NEXT:    br label %[[FLOW]]
+; OPT:       [[FLOW]]:
+; OPT-NEXT:    [[TMP0]] = phi i32 [ [[I_NEXT]], %[[LOOP_BODY]] ], [ undef, %[[LOOP]] ]
+; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[LOOP_BODY]] ], [ true, %[[LOOP]] ]
+; OPT-NEXT:    br i1 [[TMP1]], label %[[EXIT:.*]], label %[[LOOP]]
+; OPT:       [[EXIT]]:
+; OPT-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop_body, %entry
+  %i = phi i32 [ %n, %entry ], [ %i.next, %loop_body ]
+  %c = icmp ugt i32 %i, 0
+  br i1 %c, label %loop_body, label %exit, !prof !0
+
+loop_body:                                        ; preds = %loop
+  %i.next = sub i32 %i, 1
+  br label %loop
+
+exit:                                             ; preds = %loop
+  ret void
+}
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!0 = !{!"branch_weights", i32 1000, i32 1}
+;.
+; OPT: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000}
+; OPT: [[PROF1]] = !{!"branch_weights", i32 1000, i32 1}
+;.

@jmmartinez jmmartinez force-pushed the amd/if-cvt-v2-structurizer branch from e903c5a to d43033f Compare September 25, 2024 07:26
@jmmartinez jmmartinez force-pushed the amd/if-cvt-v2-structurizer branch from d43033f to 680d4af Compare September 25, 2024 10:17
@jmmartinez jmmartinez merged commit b40ff5a into llvm:main Sep 25, 2024
8 checks passed
jmmartinez added a commit to jmmartinez/llvm-project that referenced this pull request Sep 25, 2024
Currently `StructurizeCFG` drops branch_weight metadata .
This metadata can be generated from user annotations in the source code
like:

```cpp
if (...) [[likely]] {
}
```
searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Apr 11, 2025
Currently `StructurizeCFG` drops branch_weight metadata .
This metadata can be generated from user annotations in the source code
like:

```cpp
if (...) [[likely]] {
}
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants