-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LowerSwitch] Don't let pass manager handle the dependency #68662
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Ruiling, Song (ruiling) ChangesSome passes has limitation that only support simple terminators: branch/unreachable/return. Right now, they ask the pass manager to add LowerSwitch pass to eliminate Full diff: https://github.com/llvm/llvm-project/pull/68662.diff 13 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 1ac0f053f67b509..c9feed08b90827e 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -702,6 +702,8 @@ BasicBlock *CreateControlFlowHub(
// successors
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
+// Check whether the block only has simple terminator: br/brcond/unreachable/ret
+bool hasOnlySimpleTerminator(const BasicBlock *BB);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 9ad841c3c8a54a5..3cdaac69ac9e1d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -46,6 +46,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -114,8 +115,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<TargetTransformInfoWrapperPass>();
@@ -192,6 +191,9 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index fac5695c7beaf17..b5f003911b4e6f9 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -353,7 +354,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -368,7 +368,6 @@ char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
@@ -1173,6 +1172,9 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
this->DT = DT;
Func = R->getEntry()->getParent();
+ for (auto &BB : *Func)
+ assert(hasOnlySimpleTerminator(&BB));
+
ParentRegion = R;
orderNodes();
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 389aab39dec3409..3297b446b1b7703 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -2076,3 +2076,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->setCondition(NewCond);
PBI->swapSuccessors();
}
+
+bool llvm::hasOnlySimpleTerminator(const BasicBlock *BB) {
+ auto *Term = BB->getTerminator();
+ if (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+ isa<BranchInst>(Term))
+ return true;
+ return false;
+}
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index dda236167363db8..8192a63aacee977 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
@@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); }
INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible",
"Convert irreducible control-flow into natural loops",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
@@ -317,6 +314,9 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
bool Changed = false;
SmallVector<Loop *, 8> WorkList;
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 2b706858cbedd25..e5adbe5133cf457 100644
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -39,8 +39,6 @@ void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
- // This is a cluster of orthogonal Transforms
- AU.addPreservedID(LowerSwitchID);
}
namespace {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8c781f59ff5a4b0..8c0db3bc771b1ea 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LowerSwitchID);
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreservedID(LowerSwitchID);
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() {
INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits",
"Fixup each natural loop to have a single exit block",
false /* Only looks at CFG */, false /* Analysis Pass */)
-INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits",
@@ -234,6 +231,9 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ for (auto &BB : F)
+ assert(hasOnlySimpleTerminator(&BB));
+
return runImpl(LI, DT);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index b939c8d2e339de4..bf2f6e983b529f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -60,8 +60,6 @@
; GCN-O0-NEXT: Cycle Info Analysis
; GCN-O0-NEXT: Uniformity Analysis
; GCN-O0-NEXT: Unify divergent function exit nodes
-; GCN-O0-NEXT: Lazy Value Information Analysis
-; GCN-O0-NEXT: Lower SwitchInst's to branches
; GCN-O0-NEXT: Dominator Tree Construction
; GCN-O0-NEXT: Natural Loop Information
; GCN-O0-NEXT: Convert irreducible control-flow into natural loops
@@ -251,8 +249,6 @@
; GCN-O1-NEXT: Code sinking
; GCN-O1-NEXT: Post-Dominator Tree Construction
; GCN-O1-NEXT: Unify divergent function exit nodes
-; GCN-O1-NEXT: Lazy Value Information Analysis
-; GCN-O1-NEXT: Lower SwitchInst's to branches
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Convert irreducible control-flow into natural loops
@@ -537,8 +533,6 @@
; GCN-O1-OPTS-NEXT: Code sinking
; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Unify divergent function exit nodes
-; GCN-O1-OPTS-NEXT: Lazy Value Information Analysis
-; GCN-O1-OPTS-NEXT: Lower SwitchInst's to branches
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Natural Loop Information
; GCN-O1-OPTS-NEXT: Convert irreducible control-flow into natural loops
@@ -841,8 +835,6 @@
; GCN-O2-NEXT: Code sinking
; GCN-O2-NEXT: Post-Dominator Tree Construction
; GCN-O2-NEXT: Unify divergent function exit nodes
-; GCN-O2-NEXT: Lazy Value Information Analysis
-; GCN-O2-NEXT: Lower SwitchInst's to branches
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Convert irreducible control-flow into natural loops
@@ -1159,8 +1151,6 @@
; GCN-O3-NEXT: Code sinking
; GCN-O3-NEXT: Post-Dominator Tree Construction
; GCN-O3-NEXT: Unify divergent function exit nodes
-; GCN-O3-NEXT: Lazy Value Information Analysis
-; GCN-O3-NEXT: Lower SwitchInst's to branches
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Convert irreducible control-flow into natural loops
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index f58bed3b98e97c3..0a68820f902a1e8 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -1,5 +1,5 @@
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s
; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
; Add an extra verifier runs. There were some cases where invalid IR
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index f7479c847559254..8d2a312b3463272 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -S -mtriple=amdgcn-- -lowerswitch -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; Ensure two if.break calls, for both the inner and outer loops
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
index 56d7fc335911ec6..1eef7b967f6d998 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
; A test with a divergent unreachable block and uniform return block. The
; compiler needs to create a regions that includes them so that
diff --git a/llvm/test/Transforms/FixIrreducible/switch.ll b/llvm/test/Transforms/FixIrreducible/switch.ll
index efa431091252287..57b92c08ed3115b 100644
--- a/llvm/test/Transforms/FixIrreducible/switch.ll
+++ b/llvm/test/Transforms/FixIrreducible/switch.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -fix-irreducible -S | FileCheck %s
+; RUN: opt < %s -passes='lowerswitch,fix-irreducible' -S | FileCheck %s
define void @loop_1(i32 %Value, i1 %PredEntry, i1 %PredD) {
; CHECK-LABEL: @loop_1(
diff --git a/llvm/test/Transforms/StructurizeCFG/switch.ll b/llvm/test/Transforms/StructurizeCFG/switch.ll
index 445c0ab0c2d8552..153b6d1914e67a2 100644
--- a/llvm/test/Transforms/StructurizeCFG/switch.ll
+++ b/llvm/test/Transforms/StructurizeCFG/switch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes='lowerswitch,structurizecfg' %s -o - | FileCheck %s
; The structurizecfg pass cannot handle switch instructions, so we need to
; make sure the lower switch pass is always run before structurizecfg.
|
@@ -60,8 +60,6 @@ | |||
; GCN-O0-NEXT: Cycle Info Analysis | |||
; GCN-O0-NEXT: Uniformity Analysis | |||
; GCN-O0-NEXT: Unify divergent function exit nodes | |||
; GCN-O0-NEXT: Lazy Value Information Analysis | |||
; GCN-O0-NEXT: Lower SwitchInst's to branches |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing this is nice. I prototyped an alternative way to do it: jayfoad@c03cbd4
See also #59830
if (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) || | ||
isa<BranchInst>(Term)) | ||
return true; | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) || | |
isa<BranchInst>(Term)) | |
return true; | |
return false; | |
return isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) || | |
isa<BranchInst>(Term); |
I have no objections, but why do you think it is better to do it this way? |
In https://reviews.llvm.org/D89026, @aeubanks said "The NPM doesn't support adding a non-analysis pass as a dependency of another, so I had to add -lowerswitch to some tests or pin them to the legacy PM." I guess it is a burden to support such kind of non-analysis pass dependency in NPM and it is not quite useful in general. |
I'm not really seeing the value in doing this unless we go toward completely removing the whole "required pass" concept of the legacy PM. But the legacy PM treats dependent analyses and passes the same way, so you'd have to rewrite the legacy PM and you might as well port everything to the new PM instead. |
Some passes has limitation that only support simple terminators: branch/unreachable/return. Right now, they ask the pass manager to add LowerSwitch pass to eliminate `switch`. Let's manage such kind of pass dependency by ourselves. Also add the assertion in the related passes.
f3254d1
to
5cbd101
Compare
By saying "doing this", do you mean the change here? The change here will help remove one extra run of this pass for AMDGPU and also make the tests for these passes can run under NPM. I think we want to switch to NPM anyway, right? |
That's obviously not gonna happen. But just because the legacy PM supports using a pass like an analysis, that does not mean we have use it like that in this case.
That is a clear win. The question is: do we want to do it like in this patch? Or by declaring that more passes preserve LowerSwitch as in jayfoad@c03cbd4. |
If I understand the situation correctly, your change only work for legacy pass manager? As our goal is to switch to New PM, I think it is better to just remove the |
Yes. The new pass manager does not support codegen passes. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't understand that this saves a pass run, this lgtm then
(also I thought I already sent out these comments, sorry for the delay)
@@ -702,6 +702,8 @@ BasicBlock *CreateControlFlowHub( | |||
// successors | |||
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder); | |||
|
|||
// Check whether the block only has simple terminator: br/brcond/unreachable/ret | |||
bool hasOnlySimpleTerminator(const BasicBlock *BB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if all users are checking all blocks, we should just make this API do that, instead of check a single block
@@ -192,6 +191,9 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( | |||
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT, | |||
const PostDominatorTree &PDT, | |||
const UniformityInfo &UA) { | |||
for (auto &BB : F) | |||
assert(hasOnlySimpleTerminator(&BB)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should be something like assert(hasOnlySimpleTerminator(&BB) && "Need to run lower-switch");
so people know what to do if the assert fires
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion, but I think there might be also other unsupported terminator goes here, so I just add a message "unsupported block terminator".
Some passes has limitation that only support simple terminators: branch/unreachable/return. Right now, they ask the pass manager to add LowerSwitch pass to eliminate
switch
. Let's manage such kind of pass dependency by ourselves. Also add the assertion in the related passes.