Skip to content

[InlineCost] Correct the default branch cost for the switch statement #85160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions llvm/lib/Analysis/InlineCost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -701,21 +701,26 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {

void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
bool DefaultDestUndefined) override {
if (!DefaultDestUndefined)
addCost(2 * InstrCost);
// If suitable for a jump table, consider the cost for the table size and
// branch to destination.
// Maximum valid cost increased in this function.
if (JumpTableSize) {
// Suppose a default branch includes one compare and one conditional
// branch if it's reachable.
if (!DefaultDestUndefined)
addCost(2 * InstrCost);
// Suppose a jump table requires one load and one jump instruction.
int64_t JTCost =
static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost;
addCost(JTCost);
return;
}

if (NumCaseCluster <= 3) {
// Suppose a comparison includes one compare and one conditional branch.
addCost(NumCaseCluster * 2 * InstrCost);
// We can reduce a set of instructions if the default branch is
// undefined.
addCost((NumCaseCluster - DefaultDestUndefined) * 2 * InstrCost);
return;
}

Expand Down Expand Up @@ -1152,7 +1157,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
// FIXME: These constants are taken from the heuristic-based cost visitor.
// These should be removed entirely in a later revision to avoid reliance on
// heuristics in the ML inliner.
static constexpr int JTCostMultiplier = 4;
static constexpr int JTCostMultiplier = 2;
static constexpr int CaseClusterCostMultiplier = 2;
static constexpr int SwitchDefaultDestCostMultiplier = 2;
static constexpr int SwitchCostMultiplier = 2;
Expand Down Expand Up @@ -1235,11 +1240,10 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {

void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
bool DefaultDestUndefined) override {
if (!DefaultDestUndefined)
increment(InlineCostFeatureIndex::switch_default_dest_penalty,
SwitchDefaultDestCostMultiplier * InstrCost);

if (JumpTableSize) {
if (!DefaultDestUndefined)
increment(InlineCostFeatureIndex::switch_default_dest_penalty,
SwitchDefaultDestCostMultiplier * InstrCost);
int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
JTCostMultiplier * InstrCost;
increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
Expand All @@ -1248,7 +1252,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {

if (NumCaseCluster <= 3) {
increment(InlineCostFeatureIndex::case_cluster_penalty,
NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
(NumCaseCluster - DefaultDestUndefined) *
CaseClusterCostMultiplier * InstrCost);
return;
}

Expand Down
130 changes: 130 additions & 0 deletions llvm/test/Transforms/Inline/inline-cost-switch-default.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
; RUN: opt -S -passes=inline %s -debug-only=inline-cost -min-jump-table-entries=4 --disable-output 2>&1 | FileCheck %s -check-prefix=LOOKUPTABLE -match-full-lines
; RUN: opt -S -passes=inline %s -debug-only=inline-cost -min-jump-table-entries=5 --disable-output 2>&1 | FileCheck %s -check-prefix=SWITCH -match-full-lines
; REQUIRES: asserts

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i64 @main(i64 %a) {
%b = call i64 @small_switch_default(i64 %a)
%c = call i64 @small_switch_no_default(i64 %a)
%d = call i64 @lookup_table_default(i64 %a)
%e = call i64 @lookup_table_no_default(i64 %a)
ret i64 %b
}

; SWITCH-LABEL: Analyzing call of small_switch_default{{.*}}
; SWITCH: Cost: 0
define i64 @small_switch_default(i64 %a) {
switch i64 %a, label %default_branch [
i64 -1, label %branch_0
i64 8, label %branch_1
i64 52, label %branch_2
]

branch_0:
br label %exit

branch_1:
br label %exit

branch_2:
br label %exit

default_branch:
br label %exit

exit:
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 3, %default_branch ]
ret i64 %b
}

; SWITCH-LABEL: Analyzing call of small_switch_no_default{{.*}}
; SWITCH: Cost: -10
define i64 @small_switch_no_default(i64 %a) {
switch i64 %a, label %unreachabledefault [
i64 -1, label %branch_0
i64 8, label %branch_1
i64 52, label %branch_2
]

branch_0:
br label %exit

branch_1:
br label %exit

branch_2:
br label %exit

unreachabledefault:
unreachable

exit:
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ]
ret i64 %b
}

; LOOKUPTABLE-LABEL: Analyzing call of lookup_table_default{{.*}}
; LOOKUPTABLE: Cost: 10
; SWITCH-LABEL: Analyzing call of lookup_table_default{{.*}}
; SWITCH: Cost: 20
define i64 @lookup_table_default(i64 %a) {
switch i64 %a, label %default_branch [
i64 0, label %branch_0
i64 1, label %branch_1
i64 2, label %branch_2
i64 3, label %branch_3
]

branch_0:
br label %exit

branch_1:
br label %exit

branch_2:
br label %exit

branch_3:
br label %exit

default_branch:
br label %exit

exit:
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 7, %branch_3 ], [ 3, %default_branch ]
ret i64 %b
}

; LOOKUPTABLE-LABEL: Analyzing call of lookup_table_no_default{{.*}}
; LOOKUPTABLE: Cost: 0
; SWITCH-LABEL: Analyzing call of lookup_table_no_default{{.*}}
; SWITCH: Cost: 20
define i64 @lookup_table_no_default(i64 %a) {
switch i64 %a, label %unreachabledefault [
i64 0, label %branch_0
i64 1, label %branch_1
i64 2, label %branch_2
i64 3, label %branch_3
]

branch_0:
br label %exit

branch_1:
br label %exit

branch_2:
br label %exit

branch_3:
br label %exit

unreachabledefault:
unreachable

exit:
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 7, %branch_3 ]
ret i64 %b
}
21 changes: 2 additions & 19 deletions llvm/test/Transforms/Inline/inline-switch-default-2.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt %s -S -passes=inline -inline-threshold=21 | FileCheck %s
; RUN: opt %s -S -passes=inline -inline-threshold=11 | FileCheck %s
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix the FileCheck lines in this file; there's a bunch of checks for stuff like "LOOKUPTABLE" which don't correspond to a RUN line. (Maybe push this separately.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated: 971ec1f


; Check for scenarios without TTI.

Expand All @@ -16,24 +16,7 @@ define i64 @foo1(i64 %a) {
define i64 @foo2(i64 %a) {
; CHECK-LABEL: define i64 @foo2(
; CHECK-SAME: i64 [[A:%.*]]) {
; CHECK-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT_I:%.*]] [
; CHECK-NEXT: i64 0, label [[BRANCH_0_I:%.*]]
; CHECK-NEXT: i64 2, label [[BRANCH_2_I:%.*]]
; CHECK-NEXT: i64 4, label [[BRANCH_4_I:%.*]]
; CHECK-NEXT: i64 6, label [[BRANCH_6_I:%.*]]
; CHECK-NEXT: ]
; CHECK: branch_0.i:
; CHECK-NEXT: br label [[BAR2_EXIT:%.*]]
; CHECK: branch_2.i:
; CHECK-NEXT: br label [[BAR2_EXIT]]
; CHECK: branch_4.i:
; CHECK-NEXT: br label [[BAR2_EXIT]]
; CHECK: branch_6.i:
; CHECK-NEXT: br label [[BAR2_EXIT]]
; CHECK: unreachabledefault.i:
; CHECK-NEXT: unreachable
; CHECK: bar2.exit:
; CHECK-NEXT: [[B_I:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ]
; CHECK-NEXT: [[B_I:%.*]] = call i64 @bar2(i64 [[A]])
; CHECK-NEXT: ret i64 [[B_I]]
;
%b = call i64 @bar2(i64 %a)
Expand Down
25 changes: 5 additions & 20 deletions llvm/test/Transforms/Inline/inline-switch-default.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt %s -S -passes=inline -inline-threshold=26 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
; RUN: opt %s -S -passes=inline -inline-threshold=21 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH
; RUN: opt %s -S -passes=inline -inline-threshold=16 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
; RUN: opt %s -S -passes=inline -inline-threshold=11 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
Expand All @@ -22,6 +22,8 @@ define i64 @foo1(i64 %a) {
ret i64 %b
}

; Since the default branch is undefined behavior,
; we can inline `bar2`: https://github.com/llvm/llvm-project/issues/90929
define i64 @foo2(i64 %a) {
; LOOKUPTABLE-LABEL: define i64 @foo2(
; LOOKUPTABLE-SAME: i64 [[A:%.*]]) {
Expand All @@ -47,24 +49,7 @@ define i64 @foo2(i64 %a) {
;
; SWITCH-LABEL: define i64 @foo2(
; SWITCH-SAME: i64 [[A:%.*]]) {
; SWITCH-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT_I:%.*]] [
; SWITCH-NEXT: i64 0, label [[BRANCH_0_I:%.*]]
; SWITCH-NEXT: i64 2, label [[BRANCH_2_I:%.*]]
; SWITCH-NEXT: i64 4, label [[BRANCH_4_I:%.*]]
; SWITCH-NEXT: i64 6, label [[BRANCH_6_I:%.*]]
; SWITCH-NEXT: ]
; SWITCH: branch_0.i:
; SWITCH-NEXT: br label [[BAR2_EXIT:%.*]]
; SWITCH: branch_2.i:
; SWITCH-NEXT: br label [[BAR2_EXIT]]
; SWITCH: branch_4.i:
; SWITCH-NEXT: br label [[BAR2_EXIT]]
; SWITCH: branch_6.i:
; SWITCH-NEXT: br label [[BAR2_EXIT]]
; SWITCH: unreachabledefault.i:
; SWITCH-NEXT: unreachable
; SWITCH: bar2.exit:
; SWITCH-NEXT: [[B_I:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ]
; SWITCH-NEXT: [[B_I:%.*]] = call i64 @bar2(i64 [[A]])
; SWITCH-NEXT: ret i64 [[B_I]]
;
%b = call i64 @bar2(i64 %a)
Expand Down