Skip to content

Commit 67ff66e

Browse files
[PGO][Offload] Fix offload coverage mapping (#143490)
This pull request fixes coverage mapping on GPU targets. - It adds an address space cast to the coverage mapping generation pass. - It reads the profiled function names from the ELF directly. Reading it from public globals was causing issues in cases where multiple device-code object files are linked together.
1 parent 3cef099 commit 67ff66e

File tree

5 files changed

+22
-31
lines changed

5 files changed

+22
-31
lines changed

clang/lib/CodeGen/CoverageMappingGen.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2622,8 +2622,9 @@ void CoverageMappingModuleGen::emit() {
26222622
CGM.addUsedGlobal(CovData);
26232623
// Create the deferred function records array
26242624
if (!FunctionNames.empty()) {
2625-
auto NamesArrTy = llvm::ArrayType::get(llvm::PointerType::getUnqual(Ctx),
2626-
FunctionNames.size());
2625+
auto AddrSpace = FunctionNames.front()->getType()->getPointerAddressSpace();
2626+
auto NamesArrTy = llvm::ArrayType::get(
2627+
llvm::PointerType::get(Ctx, AddrSpace), FunctionNames.size());
26272628
auto NamesArrVal = llvm::ConstantArray::get(NamesArrTy, FunctionNames);
26282629
// This variable will *NOT* be emitted to the object file. It is used
26292630
// to pass the list of names referenced to codegen.

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,12 +1955,6 @@ void InstrLowerer::emitNameData() {
19551955
GlobalValue::PrivateLinkage, NamesVal,
19561956
getInstrProfNamesVarName());
19571957

1958-
// Make names variable public if current target is a GPU
1959-
if (isGPUProfTarget(M)) {
1960-
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
1961-
NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
1962-
}
1963-
19641958
NamesSize = CompressedNameStr.size();
19651959
setGlobalVariableLargeSection(TT, *NamesVar);
19661960
NamesVar->setSection(

offload/plugins-nextgen/common/include/GlobalHandler.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct GPUProfGlobals {
8080

8181
void dump() const;
8282
Error write() const;
83+
bool empty() const;
8384
};
8485

8586
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
@@ -192,9 +193,6 @@ class GenericGlobalHandlerTy {
192193
/*D2H=*/false);
193194
}
194195

195-
/// Checks whether a given image contains profiling globals.
196-
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
197-
198196
/// Reads profiling data from a GPU image to supplied profdata struct.
199197
/// Iterates through the image symbol table and stores global values
200198
/// with profiling prefixes.

offload/plugins-nextgen/common/src/GlobalHandler.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -173,16 +173,6 @@ Error GenericGlobalHandlerTy::readGlobalFromImage(GenericDeviceTy &Device,
173173
return Plugin::success();
174174
}
175175

176-
bool GenericGlobalHandlerTy::hasProfilingGlobals(GenericDeviceTy &Device,
177-
DeviceImageTy &Image) {
178-
GlobalTy global(getInstrProfNamesVarName().str(), 0);
179-
if (auto Err = getGlobalMetadataFromImage(Device, Image, global)) {
180-
consumeError(std::move(Err));
181-
return false;
182-
}
183-
return true;
184-
}
185-
186176
Expected<GPUProfGlobals>
187177
GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
188178
DeviceImageTy &Image) {
@@ -204,12 +194,17 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
204194
// Check if given current global is a profiling global based
205195
// on name
206196
if (*NameOrErr == getInstrProfNamesVarName()) {
207-
// Read in profiled function names
208-
DeviceProfileData.NamesData = SmallVector<uint8_t>(Sym.getSize(), 0);
209-
GlobalTy NamesGlobal(NameOrErr->str(), Sym.getSize(),
210-
DeviceProfileData.NamesData.data());
211-
if (auto Err = readGlobalFromDevice(Device, Image, NamesGlobal))
212-
return Err;
197+
// Read in profiled function names from ELF
198+
auto SectionOrErr = Sym.getSection();
199+
if (!SectionOrErr)
200+
return SectionOrErr.takeError();
201+
202+
auto ContentsOrErr = (*SectionOrErr)->getContents();
203+
if (!ContentsOrErr)
204+
return ContentsOrErr.takeError();
205+
206+
SmallVector<uint8_t> NameBytes(ContentsOrErr->bytes());
207+
DeviceProfileData.NamesData = NameBytes;
213208
} else if (NameOrErr->starts_with(getInstrProfCountersVarPrefix())) {
214209
// Read global variable profiling counts
215210
SmallVector<int64_t> Counts(Sym.getSize() / sizeof(int64_t), 0);
@@ -322,3 +317,7 @@ Error GPUProfGlobals::write() const {
322317

323318
return Plugin::success();
324319
}
320+
321+
bool GPUProfGlobals::empty() const {
322+
return Counts.empty() && Data.empty() && NamesData.empty();
323+
}

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -858,14 +858,13 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
858858

859859
for (auto *Image : LoadedImages) {
860860
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
861-
if (!Handler.hasProfilingGlobals(*this, *Image))
862-
continue;
863-
864-
GPUProfGlobals profdata;
865861
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
866862
if (!ProfOrErr)
867863
return ProfOrErr.takeError();
868864

865+
if (ProfOrErr->empty())
866+
continue;
867+
869868
// Dump out profdata
870869
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
871870
uint32_t(DeviceDebugKind::PGODump))

0 commit comments

Comments
 (0)