Skip to content

Commit e3cf80c

Browse files
authored
BlockFrequencyInfoImpl: Avoid big numbers, increase precision for small spreads
BlockFrequencyInfo calculates block frequencies as Scaled64 numbers but as a last step converts them to unsigned 64bit integers (`BlockFrequency`). This improves the factors picked for this conversion so that: * Avoid big numbers close to UINT64_MAX to avoid users overflowing/saturating when adding multiply frequencies together or when multiplying with integers. This leaves the topmost 10 bits unused to allow for some room. * Spread the difference between hottest/coldest block as much as possible to increase precision. * If the hot/cold spread cannot be represented loose precision at the lower end, but keep the frequencies at the upper end for hot blocks differentiable.
1 parent 69ade08 commit e3cf80c

File tree

90 files changed

+1822
-1616
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+1822
-1616
lines changed

compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,25 @@
1010
// CHECK-NEXT: -: 4:
1111
// CHECK-NEXT: 1: 5: int a = 1;
1212
// CHECK-NEXT: 1: 6: if (a) {
13-
// CHECK-NEXT:branch 0 taken 1
14-
// CHECK-NEXT:branch 1 taken 0
13+
// CHECK-NEXT:branch 0 taken 0
14+
// CHECK-NEXT:branch 1 taken 1
1515
// CHECK-NEXT: 1: 7: var++;
1616
// CHECK-NEXT: 1: 8: }
1717
// CHECK-NEXT: -: 9:
1818
// CHECK-NEXT: 1: 10: if (a) {}
19-
// CHECK-NEXT:branch 0 taken 1
20-
// CHECK-NEXT:branch 1 taken 0
19+
// CHECK-NEXT:branch 0 taken 0
20+
// CHECK-NEXT:branch 1 taken 1
2121
// CHECK-NEXT: -: 11:
2222
// CHECK-NEXT: 1: 12: int b = 0;
2323
// CHECK-NEXT: 1: 13: if (b) {
24-
// CHECK-NEXT:branch 0 taken 0
25-
// CHECK-NEXT:branch 1 taken 1
24+
// CHECK-NEXT:branch 0 taken 1
25+
// CHECK-NEXT:branch 1 taken 0
2626
// CHECK-NEXT: #####: 14: var++;
2727
// CHECK-NEXT: #####: 15: }
2828
// CHECK-NEXT: -: 16:
2929
// CHECK-NEXT: 1: 17: if (b) {}
30-
// CHECK-NEXT:branch 0 taken 0
31-
// CHECK-NEXT:branch 1 taken 1
30+
// CHECK-NEXT:branch 0 taken 1
31+
// CHECK-NEXT:branch 1 taken 0
3232
// CHECK-NEXT: -: 18:
3333
// CHECK-NEXT: 1: 19: return 0;
3434
// CHECK-NEXT: -: 20:}

llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -481,30 +481,24 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
481481

482482
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
483483
const Scaled64 &Min, const Scaled64 &Max) {
484-
// Scale the Factor to a size that creates integers. Ideally, integers would
485-
// be scaled so that Max == UINT64_MAX so that they can be best
486-
// differentiated. However, in the presence of large frequency values, small
487-
// frequencies are scaled down to 1, making it impossible to differentiate
488-
// small, unequal numbers. When the spread between Min and Max frequencies
489-
// fits well within MaxBits, we make the scale be at least 8.
490-
const unsigned MaxBits = 64;
491-
const unsigned SpreadBits = (Max / Min).lg();
492-
Scaled64 ScalingFactor;
493-
if (SpreadBits <= MaxBits - 3) {
494-
// If the values are small enough, make the scaling factor at least 8 to
495-
// allow distinguishing small values.
496-
ScalingFactor = Min.inverse();
497-
ScalingFactor <<= 3;
498-
} else {
499-
// If the values need more than MaxBits to be represented, saturate small
500-
// frequency values down to 1 by using a scaling factor that benefits large
501-
// frequency values.
502-
ScalingFactor = Scaled64(1, MaxBits) / Max;
503-
}
484+
// Scale the Factor to a size that creates integers. If possible scale
485+
// integers so that Max == UINT64_MAX so that they can be best differentiated.
486+
// Is is possible that the range between min and max cannot be accurately
487+
// represented in a 64bit integer without either loosing precision for small
488+
// values (so small unequal numbers all map to 1) or saturaturing big numbers
489+
// loosing precision for big numbers (so unequal big numbers may map to
490+
// UINT64_MAX). We choose to loose precision for small numbers.
491+
const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT;
492+
// Users often add up multiple BlockFrequency values or multiply them with
493+
// things like instruction costs. Leave some room to avoid saturating
494+
// operations reaching UIN64_MAX too early.
495+
const unsigned Slack = 10;
496+
Scaled64 ScalingFactor = Scaled64(1, MaxBits - Slack) / Max;
504497

505498
// Translate the floats to integers.
506499
LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
507500
<< ", factor = " << ScalingFactor << "\n");
501+
(void)Min;
508502
for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
509503
Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
510504
BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());

llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ declare i32 @printf(i8*, ...)
5959

6060
; CHECK: Printing analysis {{.*}} for function 'main':
6161
; CHECK-NEXT: block-frequency-info: main
62-
define i32 @main() {
62+
define i32 @main() !prof !6 {
6363
entry:
6464
%retval = alloca i32, align 4
6565
%i = alloca i32, align 4
@@ -93,7 +93,7 @@ for.cond4: ; preds = %for.inc, %for.body3
9393
%cmp5 = icmp slt i32 %2, 100
9494
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
9595

96-
; CHECK: - for.body6: float = 500000.5, int = 4000004
96+
; CHECK: - for.body6: float = 1000000.0,{{.*}}count = 1000000
9797
for.body6: ; preds = %for.cond4
9898
call void @bar()
9999
br label %for.inc
@@ -143,7 +143,7 @@ for.cond16: ; preds = %for.inc19, %for.bod
143143
%cmp17 = icmp slt i32 %8, 10000
144144
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
145145

146-
; CHECK: - for.body18: float = 499999.9, int = 3999998
146+
; CHECK: - for.body18: float = 999999.5,{{.*}}count = 1000000
147147
for.body18: ; preds = %for.cond16
148148
call void @bar()
149149
br label %for.inc19
@@ -175,7 +175,7 @@ for.cond26: ; preds = %for.inc29, %for.end
175175
%cmp27 = icmp slt i32 %12, 1000000
176176
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
177177

178-
; CHECK: - for.body28: float = 499995.2, int = 3999961
178+
; CHECK: - for.body28: float = 1000224.3,{{.*}}count = 1000224
179179
for.body28: ; preds = %for.cond26
180180
call void @bar()
181181
br label %for.inc29
@@ -197,8 +197,9 @@ for.end31: ; preds = %for.cond26
197197
!llvm.ident = !{!0}
198198

199199
!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
200-
!1 = !{!"branch_weights", i32 101, i32 2}
201-
!2 = !{!"branch_weights", i32 10001, i32 101}
202-
!3 = !{!"branch_weights", i32 1000001, i32 10001}
203-
!4 = !{!"branch_weights", i32 1000001, i32 101}
204-
!5 = !{!"branch_weights", i32 1000001, i32 2}
200+
!1 = !{!"branch_weights", i32 100, i32 1}
201+
!2 = !{!"branch_weights", i32 10000, i32 100}
202+
!3 = !{!"branch_weights", i32 1000000, i32 10000}
203+
!4 = !{!"branch_weights", i32 1000000, i32 100}
204+
!5 = !{!"branch_weights", i32 1000000, i32 1}
205+
!6 = !{!"function_entry_count", i32 1}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; RUN: opt < %s -disable-output -passes="print<block-freq>" 2>&1 | FileCheck %s
2+
; Sanity check precision for small-ish min/max spread.
3+
4+
@g = global i32 0
5+
6+
; CHECK-LABEL: block-frequency-info: func0
7+
; CHECK: - entry: float = 1.0, {{.*}}, count = 1000
8+
; CHECK: - cmp0_true: float = 0.4, {{.*}}, count = 400
9+
; CHECK: - cmp0_false: float = 0.6, {{.*}}, count = 600
10+
; CHECK: - cmp1_true: float = 0.1, {{.*}}, count = 100
11+
; CHECK: - cmp1_false: float = 0.3, {{.*}}, count = 300
12+
; CHECK: - join: float = 1.0, {{.*}}, count = 1000
13+
14+
define void @func0(i32 %a0, i32 %a1) !prof !0 {
15+
entry:
16+
%cmp0 = icmp ne i32 %a0, 0
17+
br i1 %cmp0, label %cmp0_true, label %cmp0_false, !prof !1
18+
19+
cmp0_true:
20+
store volatile i32 1, ptr @g
21+
%cmp1 = icmp ne i32 %a1, 0
22+
br i1 %cmp1, label %cmp1_true, label %cmp1_false, !prof !2
23+
24+
cmp0_false:
25+
store volatile i32 2, ptr @g
26+
br label %join
27+
28+
cmp1_true:
29+
store volatile i32 3, ptr @g
30+
br label %join
31+
32+
cmp1_false:
33+
store volatile i32 4, ptr @g
34+
br label %join
35+
36+
join:
37+
store volatile i32 5, ptr @g
38+
ret void
39+
}
40+
41+
!0 = !{!"function_entry_count", i64 1000}
42+
!1 = !{!"branch_weights", i32 400, i32 600}
43+
!2 = !{!"branch_weights", i32 1, i32 3}

llvm/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 \
66
; RUN: 2>&1 | FileCheck -check-prefix=THRESHOLD %s
77

8-
; CHECK: remark: /tmp/kk.c:3:20: 1 spills 3.187500e+01 total spills cost 1 reloads 3.187500e+01 total reloads cost generated in loop{{$}}
8+
; CHECK: remark: /tmp/kk.c:3:20: 1 spills 3.200000e+01 total spills cost 1 reloads 3.200000e+01 total reloads cost generated in loop{{$}}
99
; THRESHOLD-NOT: remark
1010

1111
define void @fpr128(ptr %p) nounwind ssp {

llvm/test/CodeGen/AArch64/cfi-fixup.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ define i32 @f0(i32 %x) #0 {
88
; CHECK-NEXT: .cfi_def_cfa_offset 16
99
; CHECK-NEXT: .cfi_offset w30, -16
1010
; CHECK-NEXT: .cfi_remember_state
11-
; CHECK-NEXT: cbz w0, .LBB0_4
11+
; CHECK-NEXT: cbz w0, .LBB0_5
1212
; CHECK-NEXT: // %bb.1: // %entry
1313
; CHECK-NEXT: cmp w0, #2
14-
; CHECK-NEXT: b.eq .LBB0_5
14+
; CHECK-NEXT: b.eq .LBB0_4
1515
; CHECK-NEXT: // %bb.2: // %entry
1616
; CHECK-NEXT: cmp w0, #1
1717
; CHECK-NEXT: b.ne .LBB0_6
@@ -22,20 +22,20 @@ define i32 @f0(i32 %x) #0 {
2222
; CHECK-NEXT: .cfi_def_cfa_offset 0
2323
; CHECK-NEXT: .cfi_restore w30
2424
; CHECK-NEXT: ret
25-
; CHECK-NEXT: .LBB0_4:
25+
; CHECK-NEXT: .LBB0_4: // %if.then5
2626
; CHECK-NEXT: .cfi_restore_state
2727
; CHECK-NEXT: .cfi_remember_state
28-
; CHECK-NEXT: mov w0, #1
28+
; CHECK-NEXT: bl g0
29+
; CHECK-NEXT: mov w8, #1 // =0x1
30+
; CHECK-NEXT: sub w0, w8, w0
2931
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
3032
; CHECK-NEXT: .cfi_def_cfa_offset 0
3133
; CHECK-NEXT: .cfi_restore w30
3234
; CHECK-NEXT: ret
33-
; CHECK-NEXT: .LBB0_5: // %if.then5
35+
; CHECK-NEXT: .LBB0_5:
3436
; CHECK-NEXT: .cfi_restore_state
3537
; CHECK-NEXT: .cfi_remember_state
36-
; CHECK-NEXT: bl g0
37-
; CHECK-NEXT: mov w8, #1
38-
; CHECK-NEXT: sub w0, w8, w0
38+
; CHECK-NEXT: mov w0, #1 // =0x1
3939
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
4040
; CHECK-NEXT: .cfi_def_cfa_offset 0
4141
; CHECK-NEXT: .cfi_restore w30
@@ -115,7 +115,7 @@ define i32 @f2(i32 %x) #0 {
115115
; CHECK-NEXT: cbz w0, .LBB2_2
116116
; CHECK-NEXT: // %bb.1: // %if.end
117117
; CHECK-NEXT: bl g1
118-
; CHECK-NEXT: mov w8, #1
118+
; CHECK-NEXT: mov w8, #1 // =0x1
119119
; CHECK-NEXT: sub w0, w8, w0
120120
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
121121
; CHECK-NEXT: .cfi_def_cfa_offset 0

llvm/test/CodeGen/AArch64/redundant-mov-from-zero-extend.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define i32 @test(i32 %input, i32 %n, i32 %a) {
1010
; CHECK-NEXT: ret
1111
; CHECK-NEXT: .LBB0_2: // %bb.0
1212
; CHECK-NEXT: add w8, w0, w1
13-
; CHECK-NEXT: mov w0, #100
13+
; CHECK-NEXT: mov w0, #100 // =0x64
1414
; CHECK-NEXT: cmp w8, #4
1515
; CHECK-NEXT: b.hi .LBB0_5
1616
; CHECK-NEXT: // %bb.3: // %bb.0
@@ -25,19 +25,19 @@ define i32 @test(i32 %input, i32 %n, i32 %a) {
2525
; CHECK-NEXT: ret
2626
; CHECK-NEXT: .LBB0_5: // %bb.0
2727
; CHECK-NEXT: cmp w8, #200
28-
; CHECK-NEXT: b.ne .LBB0_10
28+
; CHECK-NEXT: b.ne .LBB0_9
2929
; CHECK-NEXT: // %bb.6: // %sw.bb7
3030
; CHECK-NEXT: add w0, w2, #7
3131
; CHECK-NEXT: ret
32-
; CHECK-NEXT: .LBB0_7: // %sw.bb1
33-
; CHECK-NEXT: add w0, w2, #3
34-
; CHECK-NEXT: ret
35-
; CHECK-NEXT: .LBB0_8: // %sw.bb3
32+
; CHECK-NEXT: .LBB0_7: // %sw.bb3
3633
; CHECK-NEXT: add w0, w2, #4
3734
; CHECK-NEXT: ret
38-
; CHECK-NEXT: .LBB0_9: // %sw.bb5
35+
; CHECK-NEXT: .LBB0_8: // %sw.bb5
3936
; CHECK-NEXT: add w0, w2, #5
40-
; CHECK-NEXT: .LBB0_10: // %return
37+
; CHECK-NEXT: .LBB0_9: // %return
38+
; CHECK-NEXT: ret
39+
; CHECK-NEXT: .LBB0_10: // %sw.bb1
40+
; CHECK-NEXT: add w0, w2, #3
4141
; CHECK-NEXT: ret
4242
entry:
4343
%b = add nsw i32 %input, %n

llvm/test/CodeGen/AArch64/win64-jumptable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ declare void @g(i32, i32)
4242
; CHECK-NEXT: .p2align 2
4343
; CHECK-NEXT: .LJTI0_0:
4444
; CHECK: .word .LBB0_2-.Ltmp0
45+
; CHECK: .word .LBB0_5-.Ltmp0
4546
; CHECK: .word .LBB0_3-.Ltmp0
4647
; CHECK: .word .LBB0_4-.Ltmp0
47-
; CHECK: .word .LBB0_5-.Ltmp0
4848
; CHECK: .text
4949
; CHECK: .seh_endproc
5050

llvm/test/CodeGen/AArch64/wineh-bti.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ lbl4:
4747

4848
; CHECK: .LBB0_3:
4949
; CHECK-NEXT: hint #36
50-
; CHECK-NEXT: mov w0, #2
50+
; CHECK-NEXT: mov w0, #4
5151

5252
; CHECK: .LBB0_4:
5353
; CHECK-NEXT: hint #36
54-
; CHECK-NEXT: mov w0, #4
54+
; CHECK-NEXT: mov w0, #2
5555

5656
; CHECK: .LBB0_5:
5757
; CHECK-NEXT: hint #36

llvm/test/CodeGen/AMDGPU/greedy-broken-ssa-verifier-error.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ machineFunctionInfo:
1515
body: |
1616
; GCN-LABEL: name: ra_introduces_vreg_def
1717
; GCN: [[COPY_V0:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18-
; GCN: [[COPY_V0]]:vgpr_32 =
18+
; GCN: [[COPY_V1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1919
bb.0:
2020
liveins: $vgpr0, $vgpr1
2121
%0:vgpr_32 = COPY $vgpr0

0 commit comments

Comments
 (0)