-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Support select optimization #80124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/wangpc-pp/spr/main.riscv-support-select-optimization
Are you sure you want to change the base?
[RISCV] Support select optimization #80124
Conversation
Created using spr 1.3.4
@llvm/pr-subscribers-backend-risc-v Author: Wang Pengcheng (wangpc-pp) ChangesAArch64 has enabled this in https://reviews.llvm.org/D138990, and And, similar optimization like #77284 is added too. Patch is 59.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80124.diff 7 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 58bf5e8fdefbd..a1600a48900cd 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1046,6 +1046,14 @@ def FeatureFastUnalignedAccess
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
+def FeaturePredictableSelectIsExpensive
+ : SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive",
+ "true", "Prefer likely predicted branches over selects">;
+
+def FeatureEnableSelectOptimize
+ : SubtargetFeature<"enable-select-opt", "EnableSelectOptimize", "true",
+ "Enable the select optimize pass for select loop heuristics">;
+
def TuneNoOptimizedZeroStrideLoad
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
"false", "Hasn't optimized (perform fewer memory operations)"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 82836346d8832..02fa067c59094 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1374,6 +1374,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
+ PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
+
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2285c99d79010..fdf1c023fff87 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -101,6 +101,11 @@ static cl::opt<bool> EnableMISchedLoadClustering(
cl::desc("Enable load clustering in the machine scheduler"),
cl::init(false));
+static cl::opt<bool>
+ EnableSelectOpt("riscv-select-opt", cl::Hidden,
+ cl::desc("Enable select to branch optimizations"),
+ cl::init(true));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -445,6 +450,9 @@ void RISCVPassConfig::addIRPasses() {
if (EnableLoopDataPrefetch)
addPass(createLoopDataPrefetchPass());
+ if (EnableSelectOpt && getOptLevel() == CodeGenOptLevel::Aggressive)
+ addPass(createSelectOptimizePass());
+
addPass(createRISCVGatherScatterLoweringPass());
addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fe1cdb2dfa423..aad2786623dcb 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -34,6 +34,9 @@ static cl::opt<unsigned> SLPMaxVF(
"exclusively by SLP vectorizer."),
cl::Hidden);
+static cl::opt<bool> EnableOrLikeSelectOpt("enable-riscv-or-like-select",
+ cl::init(true), cl::Hidden);
+
InstructionCost
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
TTI::TargetCostKind CostKind) {
@@ -1594,3 +1597,15 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
C2.NumIVMuls, C2.NumBaseAdds,
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
}
+
+bool RISCVTTIImpl::shouldTreatInstructionLikeSelect(const Instruction *I) {
+ // For the binary operators (e.g. or) we need to be more careful than
+ // selects, here we only transform them if they are already at a natural
+ // break point in the code - the end of a block with an unconditional
+ // terminator.
+ if (EnableOrLikeSelectOpt && I->getOpcode() == Instruction::Or &&
+ isa<BranchInst>(I->getNextNode()) &&
+ cast<BranchInst>(I->getNextNode())->isUnconditional())
+ return true;
+ return BaseT::shouldTreatInstructionLikeSelect(I);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 0747a778fe9a2..7925e7bad275f 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -364,6 +364,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool shouldFoldTerminatingConditionAfterLSR() const {
return true;
}
+
+ bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
+ bool shouldTreatInstructionLikeSelect(const Instruction *I);
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index e7db8ef9d5aff..62c1af52e6c20 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -34,6 +34,15 @@
; CHECK-NEXT: Optimization Remark Emitter
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Loop Data Prefetch
+; CHECK-NEXT: Post-Dominator Tree Construction
+; CHECK-NEXT: Branch Probability Analysis
+; CHECK-NEXT: Block Frequency Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
+; CHECK-NEXT: Optimization Remark Emitter
+; CHECK-NEXT: Optimize selects
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: RISC-V gather/scatter lowering
; CHECK-NEXT: Interleaved Access Pass
; CHECK-NEXT: RISC-V CodeGenPrepare
diff --git a/llvm/test/CodeGen/RISCV/selectopt.ll b/llvm/test/CodeGen/RISCV/selectopt.ll
new file mode 100644
index 0000000000000..2bc3cee4e30f6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/selectopt.ll
@@ -0,0 +1,873 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -select-optimize -mtriple=riscv64 -S < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-SELECT
+; RUN: opt -select-optimize -mtriple=riscv64 -mattr=+enable-select-opt -S < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-BRANCH
+; RUN: opt -select-optimize -mtriple=riscv64 -mattr=+enable-select-opt,+predictable-select-expensive -S < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-BRANCH
+
+%struct.st = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 }
+
+; This test has a select at the end of if.then, which is better transformed to a branch on OoO cores.
+
+define void @replace(ptr nocapture noundef %newst, ptr noundef %t, ptr noundef %h, i64 noundef %c, i64 noundef %rc, i64 noundef %ma, i64 noundef %n) {
+; CHECK-SELECT-LABEL: @replace(
+; CHECK-SELECT-NEXT: entry:
+; CHECK-SELECT-NEXT: [[T1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[NEWST:%.*]], i64 0, i32 2
+; CHECK-SELECT-NEXT: store ptr [[T:%.*]], ptr [[T1]], align 8
+; CHECK-SELECT-NEXT: [[H3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 3
+; CHECK-SELECT-NEXT: store ptr [[H:%.*]], ptr [[H3]], align 8
+; CHECK-SELECT-NEXT: [[ORG_C:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 8
+; CHECK-SELECT-NEXT: store i64 [[C:%.*]], ptr [[ORG_C]], align 8
+; CHECK-SELECT-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 1
+; CHECK-SELECT-NEXT: store i64 [[C]], ptr [[C6]], align 8
+; CHECK-SELECT-NEXT: [[FLOW:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 7
+; CHECK-SELECT-NEXT: store i64 [[RC:%.*]], ptr [[FLOW]], align 8
+; CHECK-SELECT-NEXT: [[CONV:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-SELECT-NEXT: store i32 [[CONV]], ptr [[NEWST]], align 8
+; CHECK-SELECT-NEXT: [[FLOW10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 1, i32 7
+; CHECK-SELECT-NEXT: [[TMP0:%.*]] = load i64, ptr [[FLOW10]], align 8
+; CHECK-SELECT-NEXT: [[FLOW12:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 2, i32 7
+; CHECK-SELECT-NEXT: [[TMP1:%.*]] = load i64, ptr [[FLOW12]], align 8
+; CHECK-SELECT-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-SELECT-NEXT: [[CONV15:%.*]] = select i1 [[CMP13]], i64 2, i64 3
+; CHECK-SELECT-NEXT: [[CMP16_NOT149:%.*]] = icmp sgt i64 [[CONV15]], [[MA:%.*]]
+; CHECK-SELECT-NEXT: br i1 [[CMP16_NOT149]], label [[WHILE_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK-SELECT: land.rhs:
+; CHECK-SELECT-NEXT: [[CMP_0151:%.*]] = phi i64 [ [[CMP_1:%.*]], [[IF_END87:%.*]] ], [ [[CONV15]], [[ENTRY:%.*]] ]
+; CHECK-SELECT-NEXT: [[POS_0150:%.*]] = phi i64 [ [[CMP_0151]], [[IF_END87]] ], [ 1, [[ENTRY]] ]
+; CHECK-SELECT-NEXT: [[SUB:%.*]] = add nsw i64 [[CMP_0151]], -1
+; CHECK-SELECT-NEXT: [[FLOW19:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 7
+; CHECK-SELECT-NEXT: [[TMP2:%.*]] = load i64, ptr [[FLOW19]], align 8
+; CHECK-SELECT-NEXT: [[CMP20:%.*]] = icmp sgt i64 [[TMP2]], [[RC]]
+; CHECK-SELECT-NEXT: br i1 [[CMP20]], label [[WHILE_BODY:%.*]], label [[WHILE_END]]
+; CHECK-SELECT: while.body:
+; CHECK-SELECT-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]]
+; CHECK-SELECT-NEXT: [[T24:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 2
+; CHECK-SELECT-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T24]], align 8
+; CHECK-SELECT-NEXT: [[SUB25:%.*]] = add nsw i64 [[POS_0150]], -1
+; CHECK-SELECT-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]]
+; CHECK-SELECT-NEXT: [[T27:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 2
+; CHECK-SELECT-NEXT: store ptr [[TMP3]], ptr [[T27]], align 8
+; CHECK-SELECT-NEXT: [[H30:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 3
+; CHECK-SELECT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[H30]], align 8
+; CHECK-SELECT-NEXT: [[H33:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 3
+; CHECK-SELECT-NEXT: store ptr [[TMP4]], ptr [[H33]], align 8
+; CHECK-SELECT-NEXT: [[C36:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 1
+; CHECK-SELECT-NEXT: [[TMP5:%.*]] = load i64, ptr [[C36]], align 8
+; CHECK-SELECT-NEXT: [[C39:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 1
+; CHECK-SELECT-NEXT: store i64 [[TMP5]], ptr [[C39]], align 8
+; CHECK-SELECT-NEXT: [[TMP6:%.*]] = load i64, ptr [[C36]], align 8
+; CHECK-SELECT-NEXT: [[ORG_C45:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 8
+; CHECK-SELECT-NEXT: store i64 [[TMP6]], ptr [[ORG_C45]], align 8
+; CHECK-SELECT-NEXT: [[FLOW51:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 7
+; CHECK-SELECT-NEXT: store i64 [[TMP2]], ptr [[FLOW51]], align 8
+; CHECK-SELECT-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX18]], align 8
+; CHECK-SELECT-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX26]], align 8
+; CHECK-SELECT-NEXT: store ptr [[T]], ptr [[T24]], align 8
+; CHECK-SELECT-NEXT: store ptr [[H]], ptr [[H30]], align 8
+; CHECK-SELECT-NEXT: store i64 [[C]], ptr [[C36]], align 8
+; CHECK-SELECT-NEXT: [[ORG_C69:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 8
+; CHECK-SELECT-NEXT: store i64 [[C]], ptr [[ORG_C69]], align 8
+; CHECK-SELECT-NEXT: store i64 [[RC]], ptr [[FLOW19]], align 8
+; CHECK-SELECT-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX18]], align 8
+; CHECK-SELECT-NEXT: [[MUL:%.*]] = shl nsw i64 [[CMP_0151]], 1
+; CHECK-SELECT-NEXT: [[ADD:%.*]] = or i64 [[MUL]], 1
+; CHECK-SELECT-NEXT: [[CMP77_NOT:%.*]] = icmp sgt i64 [[ADD]], [[MA]]
+; CHECK-SELECT-NEXT: br i1 [[CMP77_NOT]], label [[IF_END87]], label [[IF_THEN:%.*]]
+; CHECK-SELECT: if.then:
+; CHECK-SELECT-NEXT: [[SUB79:%.*]] = add nsw i64 [[MUL]], -1
+; CHECK-SELECT-NEXT: [[FLOW81:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB79]], i32 7
+; CHECK-SELECT-NEXT: [[TMP8:%.*]] = load i64, ptr [[FLOW81]], align 8
+; CHECK-SELECT-NEXT: [[FLOW83:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[MUL]], i32 7
+; CHECK-SELECT-NEXT: [[TMP9:%.*]] = load i64, ptr [[FLOW83]], align 8
+; CHECK-SELECT-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP8]], [[TMP9]]
+; CHECK-SELECT-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP84]], i64 [[ADD]], i64 [[MUL]]
+; CHECK-SELECT-NEXT: br label [[IF_END87]]
+; CHECK-SELECT: if.end87:
+; CHECK-SELECT-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ]
+; CHECK-SELECT-NEXT: [[CMP16_NOT:%.*]] = icmp sgt i64 [[CMP_1]], [[MA]]
+; CHECK-SELECT-NEXT: br i1 [[CMP16_NOT]], label [[WHILE_END]], label [[LAND_RHS]]
+; CHECK-SELECT: while.end:
+; CHECK-SELECT-NEXT: ret void
+;
+; CHECK-BRANCH-LABEL: @replace(
+; CHECK-BRANCH-NEXT: entry:
+; CHECK-BRANCH-NEXT: [[T1:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[NEWST:%.*]], i64 0, i32 2
+; CHECK-BRANCH-NEXT: store ptr [[T:%.*]], ptr [[T1]], align 8
+; CHECK-BRANCH-NEXT: [[H3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 3
+; CHECK-BRANCH-NEXT: store ptr [[H:%.*]], ptr [[H3]], align 8
+; CHECK-BRANCH-NEXT: [[ORG_C:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 8
+; CHECK-BRANCH-NEXT: store i64 [[C:%.*]], ptr [[ORG_C]], align 8
+; CHECK-BRANCH-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 1
+; CHECK-BRANCH-NEXT: store i64 [[C]], ptr [[C6]], align 8
+; CHECK-BRANCH-NEXT: [[FLOW:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 0, i32 7
+; CHECK-BRANCH-NEXT: store i64 [[RC:%.*]], ptr [[FLOW]], align 8
+; CHECK-BRANCH-NEXT: [[CONV:%.*]] = trunc i64 [[N:%.*]] to i32
+; CHECK-BRANCH-NEXT: store i32 [[CONV]], ptr [[NEWST]], align 8
+; CHECK-BRANCH-NEXT: [[FLOW10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 1, i32 7
+; CHECK-BRANCH-NEXT: [[TMP0:%.*]] = load i64, ptr [[FLOW10]], align 8
+; CHECK-BRANCH-NEXT: [[FLOW12:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 2, i32 7
+; CHECK-BRANCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[FLOW12]], align 8
+; CHECK-BRANCH-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
+; CHECK-BRANCH-NEXT: [[CONV15:%.*]] = select i1 [[CMP13]], i64 2, i64 3
+; CHECK-BRANCH-NEXT: [[CMP16_NOT149:%.*]] = icmp sgt i64 [[CONV15]], [[MA:%.*]]
+; CHECK-BRANCH-NEXT: br i1 [[CMP16_NOT149]], label [[WHILE_END:%.*]], label [[LAND_RHS:%.*]]
+; CHECK-BRANCH: land.rhs:
+; CHECK-BRANCH-NEXT: [[CMP_0151:%.*]] = phi i64 [ [[CMP_1:%.*]], [[IF_END87:%.*]] ], [ [[CONV15]], [[ENTRY:%.*]] ]
+; CHECK-BRANCH-NEXT: [[POS_0150:%.*]] = phi i64 [ [[CMP_0151]], [[IF_END87]] ], [ 1, [[ENTRY]] ]
+; CHECK-BRANCH-NEXT: [[SUB:%.*]] = add nsw i64 [[CMP_0151]], -1
+; CHECK-BRANCH-NEXT: [[FLOW19:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 7
+; CHECK-BRANCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[FLOW19]], align 8
+; CHECK-BRANCH-NEXT: [[CMP20:%.*]] = icmp sgt i64 [[TMP2]], [[RC]]
+; CHECK-BRANCH-NEXT: br i1 [[CMP20]], label [[WHILE_BODY:%.*]], label [[WHILE_END]]
+; CHECK-BRANCH: while.body:
+; CHECK-BRANCH-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]]
+; CHECK-BRANCH-NEXT: [[T24:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 2
+; CHECK-BRANCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T24]], align 8
+; CHECK-BRANCH-NEXT: [[SUB25:%.*]] = add nsw i64 [[POS_0150]], -1
+; CHECK-BRANCH-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]]
+; CHECK-BRANCH-NEXT: [[T27:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 2
+; CHECK-BRANCH-NEXT: store ptr [[TMP3]], ptr [[T27]], align 8
+; CHECK-BRANCH-NEXT: [[H30:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 3
+; CHECK-BRANCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[H30]], align 8
+; CHECK-BRANCH-NEXT: [[H33:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 3
+; CHECK-BRANCH-NEXT: store ptr [[TMP4]], ptr [[H33]], align 8
+; CHECK-BRANCH-NEXT: [[C36:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 1
+; CHECK-BRANCH-NEXT: [[TMP5:%.*]] = load i64, ptr [[C36]], align 8
+; CHECK-BRANCH-NEXT: [[C39:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 1
+; CHECK-BRANCH-NEXT: store i64 [[TMP5]], ptr [[C39]], align 8
+; CHECK-BRANCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[C36]], align 8
+; CHECK-BRANCH-NEXT: [[ORG_C45:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 8
+; CHECK-BRANCH-NEXT: store i64 [[TMP6]], ptr [[ORG_C45]], align 8
+; CHECK-BRANCH-NEXT: [[FLOW51:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB25]], i32 7
+; CHECK-BRANCH-NEXT: store i64 [[TMP2]], ptr [[FLOW51]], align 8
+; CHECK-BRANCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX18]], align 8
+; CHECK-BRANCH-NEXT: store i32 [[TMP7]], ptr [[ARRAYIDX26]], align 8
+; CHECK-BRANCH-NEXT: store ptr [[T]], ptr [[T24]], align 8
+; CHECK-BRANCH-NEXT: store ptr [[H]], ptr [[H30]], align 8
+; CHECK-BRANCH-NEXT: store i64 [[C]], ptr [[C36]], align 8
+; CHECK-BRANCH-NEXT: [[ORG_C69:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB]], i32 8
+; CHECK-BRANCH-NEXT: store i64 [[C]], ptr [[ORG_C69]], align 8
+; CHECK-BRANCH-NEXT: store i64 [[RC]], ptr [[FLOW19]], align 8
+; CHECK-BRANCH-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX18]], align 8
+; CHECK-BRANCH-NEXT: [[MUL:%.*]] = shl nsw i64 [[CMP_0151]], 1
+; CHECK-BRANCH-NEXT: [[ADD:%.*]] = or i64 [[MUL]], 1
+; CHECK-BRANCH-NEXT: [[CMP77_NOT:%.*]] = icmp sgt i64 [[ADD]], [[MA]]
+; CHECK-BRANCH-NEXT: br i1 [[CMP77_NOT]], label [[IF_END87]], label [[IF_THEN:%.*]]
+; CHECK-BRANCH: if.then:
+; CHECK-BRANCH-NEXT: [[SUB79:%.*]] = add nsw i64 [[MUL]], -1
+; CHECK-BRANCH-NEXT: [[FLOW81:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[SUB79]], i32 7
+; CHECK-BRANCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[FLOW81]], align 8
+; CHECK-BRANCH-NEXT: [[FLOW83:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[NEWST]], i64 [[MUL]], i32 7
+; CHECK-BRANCH-NEXT: [[TMP9:%.*]] = load i64, ptr [[FLOW83]], align 8
+; CHECK-BRANCH-NEXT: [[CMP84:%.*]] = icmp slt i64 [[TMP8]], [[TMP9]]
+; CHECK-BRANCH-NEXT: [[CMP84_FROZEN:%.*]] = freeze i1 [[CMP84]]
+; CHECK-BRANCH-NEXT: br i1 [[CMP84_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]]
+; CHECK-BRANCH: select.false:
+; CHECK-BRANCH-NEXT: br label [[SELECT_END]]
+; CHECK-BRANCH: select.end:
+; CHECK-BRANCH-NEXT: [[SPEC_SELECT:%.*]] = phi i64 [ [[ADD]], [[IF_THEN]] ], [ [[MUL]], [[SELECT_FALSE]] ]
+; CHECK-BRANCH-NEXT: br label [[IF_END87]]
+; CHECK-BRANCH: if.end87:
+; CHECK-BRANCH-NEXT: [[CMP_1]] = phi i64 [ [[MUL]], [[WHILE_BODY]] ], [ [[SPEC_SELECT]], [[SELECT_END]] ]
+; CHECK-BRANCH-NEXT: [[CMP16_NOT:%.*]] = icmp sgt i64 [[CMP_1]], [[MA]]
+; CHECK-BRANCH-NEXT: br i1 [[CMP16_NOT]], label [[WHILE_END]], label [[LAND_RHS]]
+; CHECK-BRANCH: while.end:
+; CHECK-BRANCH-NEXT: ret void
+;
+entry:
+ %t1 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 2
+ store ptr %t, ptr %t1, align 8
+ %h3 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 3
+ store ptr %h, ptr %h3, align 8
+ %org_c = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 8
+ store i64 %c, ptr %org_c, align 8
+ %c6 = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 1
+ store i64 %c, ptr %c6, align 8
+ %flow = getelementptr inbounds %struct.st, ptr %newst, i64 0, i32 7
+ stor...
[truncated]
|
Please give the measurement data in this review or a direct link to it. I tried searching for it, and did not immediately find it. |
It's in the Phabricator link (https://reviews.llvm.org/D138990):
The performance gain is related to core implementation. For RISCV, the subtarget feature |
Created using spr 1.3.4
JFYI, I don't find the AArch64 data particularly convincing for RISCV. The magnitude of the change even on AArch64 is small, and could easily be swung one direction or the other by differences in implementation between the backends. |
Yeah! The result will differ for different targets/CPUs. One RISCV data for SPEC 2006 (which is not universal I think) on an OoO RISCV CPU, options:
The geomean is: 0.295%. |
Created using spr 1.3.4
Gentle ping. |
Ping. |
@@ -101,6 +101,11 @@ static cl::opt<bool> EnableMISchedLoadClustering( | |||
cl::desc("Enable load clustering in the machine scheduler"), | |||
cl::init(false)); | |||
|
|||
static cl::opt<bool> | |||
EnableSelectOpt("riscv-select-opt", cl::Hidden, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If no in tree targets use this, should we default to false?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have already disabled it via enableSelectOptimize()
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But we added a bunch of passes to the pipeline. Does that have compile time impact?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, this point makes sence to me. This pass adds several analysis passes (most of them can be cached), so it may impact compile time.
I think the impact won't be large, since the pass is early out before these analysises actully run when enableSelectOptimize
returns false .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@topperc WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the impact won't be large, since the pass is early out before these analysises actully run when enableSelectOptimize returns false .
The pass manager will run the analysis passes before the runOnFunction in the select optimize pass gets called. Unless those analysis passes do lazy updates and only compute something when they are queried, they will run before the early out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the added passes have been run before, so they may be cached?
Ping. Any more concerns? |
Do we have any data without Zicond? The worst case Zicond sequence is czero.eqz+czero.nez+or which is kind of expensive. Curious if this is pointing to Zicond being used too aggressively. |
Sorry, I didn't run it with this configuration. |
Ping.
func0: # @func0
li a2, 5
mul a1, a0, a0
bge a2, a0, .LBB0_2
addw a0, a1, a2
ret
.LBB0_2: # %select.false
li a2, 13
addw a0, a1, a2
ret |
#97708 is splitted out for adding |
AArch64 has enabled this in https://reviews.llvm.org/D138990, and
the measurement data still stands for RISCV.
And, similar optimization like #77284 is added too.
After this patch, the highly predictable branch will be converted
back to branches instead using selects.