|
| 1 | +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003 |
| 2 | +// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE |
| 3 | +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003 |
| 4 | +// RUN: FileCheck %s --input-file %t --check-prefix=CHECK |
| 5 | + |
| 6 | +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003 |
| 7 | +// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE |
| 8 | +// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003 |
| 9 | +// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED |
| 10 | + |
| 11 | +// Check that we can use perf event filtering to generate multiple types of |
| 12 | +// source-level profiles from a single perf profile. In this case, we generate |
| 13 | +// a typical execution frequency profile using br_inst_retired.near_taken LBRs, |
| 14 | +// and a branch mispredict profile using br_misp_retired.all_branches sample |
| 15 | +// IPs. |
| 16 | + |
| 17 | +// Check that we can use --sample-period to compute LBR and IP-based profiles |
| 18 | +// which have comparable and absolute magnitudes. For example, in this case the |
| 19 | +// branch of interest (at source line offset 4) is in a loop body which is |
| 20 | +// executed ~20M times in total, and it's mispredicted about 9M times, yielding |
| 21 | +// a mispredict rate of roughly 0.45. |
| 22 | + |
| 23 | +// The source example below is based on perfKernelCpp/cmov_3, except a |
| 24 | +// misleading builtin is used to persuade the compiler not to use cmov, which |
| 25 | +// induces branch mispredicts. |
| 26 | + |
| 27 | +// CHECK: sel_arr:652547082:0 |
| 28 | +// CHECK: 3.1: 20225766 |
| 29 | +// CHECK: 3.2: 20225766 |
| 30 | +// CHECK: 4: 19838670 |
| 31 | +// CHECK: 5: 20225766 |
| 32 | + |
| 33 | +// UNPRED: sel_arr:18000054:0 |
| 34 | +// UNPRED: 3.1: 0 |
| 35 | +// UNPRED: 3.2: 0 |
| 36 | +// UNPRED: 4: 9000027 |
| 37 | +// UNPRED: 5: 0 |
| 38 | + |
| 39 | +// CHECK-RAW-PROFILE: 3 |
| 40 | +// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:9774174 |
| 41 | +// CHECK-RAW-PROFILE-NEXT: 2f0-310:10064496 |
| 42 | +// CHECK-RAW-PROFILE-NEXT: 2ff-310:10161270 |
| 43 | + |
| 44 | +// UNPRED-RAW-PROFILE: 1 |
| 45 | +// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9000027 |
| 46 | + |
| 47 | +// original code: |
| 48 | +// icx -fprofile-sample-generate lit.c |
| 49 | +#include <stdlib.h> |
| 50 | + |
| 51 | +#define N 20000 |
| 52 | +#define ITERS 10000 |
| 53 | + |
| 54 | +static int *m_s1, *m_s2, *m_s3, *m_dst; |
| 55 | + |
| 56 | +void init(void) { |
| 57 | + m_s1 = malloc(sizeof(int)*N); |
| 58 | + m_s2 = malloc(sizeof(int)*N); |
| 59 | + m_s3 = malloc(sizeof(int)*N); |
| 60 | + m_dst = malloc(sizeof(int)*N); |
| 61 | + srand(42); |
| 62 | + |
| 63 | + for (int i = 0; i < N; i++) { |
| 64 | + m_s1[i] = rand() % N; |
| 65 | + m_s2[i] = 0; |
| 66 | + m_s3[i] = 1; |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) { |
| 71 | +#pragma nounroll |
| 72 | +#pragma clang loop vectorize(disable) interleave(disable) |
| 73 | + for (int i = 0; i < N; i++) { |
| 74 | + int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i]; |
| 75 | + dst[i] = *p; |
| 76 | + } |
| 77 | +} |
| 78 | + |
| 79 | +int main(void) { |
| 80 | + init(); |
| 81 | + for(int i=0; i<ITERS; ++i) |
| 82 | + sel_arr(m_dst, m_s1, m_s2, m_s3); |
| 83 | + return 0; |
| 84 | +} |
0 commit comments