Skip to content

Commit c7b683c

Browse files
committed
[PGO][CUDA][HIP] Skip generating profile on the device stub and wrong-side functions.
- Skip generating profile data on `__global__` function in the host compilation. It's a host-side stub function only and don't have profile instrumentation generated on the real function body. The extra profile data results in the malformed instrumentation profile data. - Skip generating region mapping on functions in the wrong-side, i.e., + For the device compilation, skip host-only functions; and, + For the host compilation, skip device-only functions (including `__global__` functions.) - As the device-side profiling is not ready yet, only host-side profile code generation is checked. Differential Revision: https://reviews.llvm.org/D85276
1 parent 7866442 commit c7b683c

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

clang/lib/CodeGen/CodeGenPGO.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,11 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
773773
if (!D->hasBody())
774774
return;
775775

776+
// Skip CUDA/HIP kernel launch stub functions.
777+
if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
778+
D->hasAttr<CUDAGlobalAttr>())
779+
return;
780+
776781
bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
777782
llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
778783
if (!InstrumentRegions && !PGOReader)
@@ -831,6 +836,18 @@ bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
831836
if (!D->getBody())
832837
return true;
833838

839+
// Skip host-only functions in the CUDA device compilation and device-only
840+
// functions in the host compilation. Just roughly filter them out based on
841+
// the function attributes. If there are effectively host-only or device-only
842+
// ones, their coverage mapping may still be generated.
843+
if (CGM.getLangOpts().CUDA &&
844+
((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
845+
!D->hasAttr<CUDAGlobalAttr>()) ||
846+
(!CGM.getLangOpts().CUDAIsDevice &&
847+
(D->hasAttr<CUDAGlobalAttr>() ||
848+
(!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
849+
return true;
850+
834851
// Don't map the functions in system headers.
835852
const auto &SM = CGM.getContext().getSourceManager();
836853
auto Loc = D->getBody()->getBeginLoc();
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: echo "GPU binary would be here" > %t
2+
// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=PGOGEN %s
3+
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=COVMAP %s
4+
// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck --check-prefix=MAPPING %s
5+
6+
#include "Inputs/cuda.h"
7+
8+
// PGOGEN-NOT: @__profn_{{.*kernel.*}} =
9+
// COVMAP-COUNT-2: section "__llvm_covfun", comdat
10+
// COVMAP-NOT: section "__llvm_covfun", comdat
11+
// MAPPING-NOT: {{.*dfn.*}}:
12+
// MAPPING-NOT: {{.*kernel.*}}:
13+
14+
__device__ void dfn(int i) {}
15+
16+
__global__ void kernel(int i) { dfn(i); }
17+
18+
void host(void) {
19+
kernel<<<1, 1>>>(1);
20+
}

0 commit comments

Comments
 (0)