Skip to content

Commit 2c8b912

Browse files
Revert "[PGO][OpenMP] Instrumentation for GPU devices (#76587)"
This reverts commit 5fd2af3. It caused build issues and broke the buildbot.
1 parent 808e0f1 commit 2c8b912

File tree

16 files changed

+27
-358
lines changed

16 files changed

+27
-358
lines changed

clang/lib/CodeGen/CodeGenPGO.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,15 +1193,10 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,
11931193

11941194
unsigned Counter = (*RegionCounterMap)[S];
11951195

1196-
// Make sure that pointer to global is passed in with zero addrspace
1197-
// This is relevant during GPU profiling
1198-
auto *NormalizedFuncNameVarPtr =
1199-
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1200-
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1201-
1202-
llvm::Value *Args[] = {
1203-
NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash),
1204-
Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV};
1196+
llvm::Value *Args[] = {FuncNameVar,
1197+
Builder.getInt64(FunctionHash),
1198+
Builder.getInt32(NumRegionCounters),
1199+
Builder.getInt32(Counter), StepV};
12051200

12061201
if (llvm::EnableSingleByteCoverage)
12071202
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover),

llvm/include/llvm/Frontend/OpenMP/OMPKinds.def

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -502,9 +502,6 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
502502
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
503503
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
504504

505-
__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
506-
__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)
507-
508505
__OMP_RTL(__last, false, Void, )
509506

510507
#undef __OMP_RTL

llvm/include/llvm/ProfileData/InstrProf.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,6 @@ inline StringRef getInstrProfCounterBiasVarName() {
177177
/// Return the marker used to separate PGO names during serialization.
178178
inline StringRef getInstrProfNameSeparator() { return "\01"; }
179179

180-
/// Determines whether module targets a GPU eligable for PGO
181-
/// instrumentation
182-
bool isGPUProfTarget(const Module &M);
183-
184180
/// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
185181
/// for front-end (Clang, etc) instrumentation.
186182
/// Return the modified name for function \c F suitable to be

llvm/lib/ProfileData/InstrProf.cpp

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -432,31 +432,13 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
432432
return VarName;
433433
}
434434

435-
bool isGPUProfTarget(const Module &M) {
436-
const auto &T = Triple(M.getTargetTriple());
437-
return T.isAMDGPU() || T.isNVPTX();
438-
}
439-
440-
void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
441-
// If the target is a GPU, make the symbol protected so it can
442-
// be read from the host device
443-
if (isGPUProfTarget(M))
444-
FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
445-
// Hide the symbol so that we correctly get a copy for each executable.
446-
else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
447-
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
448-
}
449-
450435
GlobalVariable *createPGOFuncNameVar(Module &M,
451436
GlobalValue::LinkageTypes Linkage,
452437
StringRef PGOFuncName) {
453-
// Ensure profiling variables on GPU are visible to be read from host
454-
if (isGPUProfTarget(M))
455-
Linkage = GlobalValue::ExternalLinkage;
456438
// We generally want to match the function's linkage, but available_externally
457439
// and extern_weak both have the wrong semantics, and anything that doesn't
458440
// need to link across compilation units doesn't need to be visible at all.
459-
else if (Linkage == GlobalValue::ExternalWeakLinkage)
441+
if (Linkage == GlobalValue::ExternalWeakLinkage)
460442
Linkage = GlobalValue::LinkOnceAnyLinkage;
461443
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
462444
Linkage = GlobalValue::LinkOnceODRLinkage;
@@ -470,7 +452,10 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
470452
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
471453
getPGOFuncNameVarName(PGOFuncName, Linkage));
472454

473-
setPGOFuncVisibility(M, FuncNameVar);
455+
// Hide the symbol so that we correctly get a copy for each executable.
456+
if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
457+
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
458+
474459
return FuncNameVar;
475460
}
476461

llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -879,8 +879,6 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
879879
llvm::InstrProfValueKind::IPVK_MemOPSize);
880880
CallInst *Call = nullptr;
881881
auto *TLI = &GetTLI(*Ind->getFunction());
882-
auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
883-
DataVar, PointerType::get(M.getContext(), 0));
884882

885883
// To support value profiling calls within Windows exception handlers, funclet
886884
// information contained within operand bundles needs to be copied over to
@@ -889,13 +887,11 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
889887
SmallVector<OperandBundleDef, 1> OpBundles;
890888
Ind->getOperandBundlesAsDefs(OpBundles);
891889
if (!IsMemOpSize) {
892-
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
893-
Builder.getInt32(Index)};
890+
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
894891
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
895892
OpBundles);
896893
} else {
897-
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
898-
Builder.getInt32(Index)};
894+
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
899895
Call = Builder.CreateCall(
900896
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
901897
Args, OpBundles);
@@ -1620,8 +1616,7 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
16201616
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
16211617
ValuesVar->setAlignment(Align(8));
16221618
maybeSetComdat(ValuesVar, Fn, CntsVarName);
1623-
ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1624-
ValuesVar, PointerType::get(Fn->getContext(), 0));
1619+
ValuesPtrExpr = ValuesVar;
16251620
}
16261621

16271622
uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
@@ -1645,10 +1640,6 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
16451640
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
16461641
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
16471642

1648-
if (isGPUProfTarget(M)) {
1649-
Linkage = GlobalValue::ExternalLinkage;
1650-
Visibility = GlobalValue::ProtectedVisibility;
1651-
}
16521643
// If the data variable is not referenced by code (if we don't emit
16531644
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
16541645
// data variable live under linker GC, the data variable can be private. This
@@ -1660,9 +1651,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
16601651
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
16611652
// that other copies must have the same CFG and cannot have value profiling.
16621653
// If no hash suffix, other profd copies may be referenced by code.
1663-
else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1664-
(TT.isOSBinFormatELF() ||
1665-
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1654+
if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1655+
(TT.isOSBinFormatELF() ||
1656+
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
16661657
Linkage = GlobalValue::PrivateLinkage;
16671658
Visibility = GlobalValue::DefaultVisibility;
16681659
}
@@ -1785,13 +1776,6 @@ void InstrLowerer::emitNameData() {
17851776
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
17861777
GlobalValue::PrivateLinkage, NamesVal,
17871778
getInstrProfNamesVarName());
1788-
1789-
// Make names variable public if current target is a GPU
1790-
if (isGPUProfTarget(M)) {
1791-
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
1792-
NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
1793-
}
1794-
17951779
NamesSize = CompressedNameStr.size();
17961780
setGlobalVariableLargeSection(TT, *NamesVar);
17971781
NamesVar->setSection(
@@ -1858,13 +1842,10 @@ void InstrLowerer::emitRegistration() {
18581842
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
18591843
for (Value *Data : CompilerUsedVars)
18601844
if (!isa<Function>(Data))
1861-
// Check for addrspace cast when profiling GPU
1862-
IRB.CreateCall(RuntimeRegisterF,
1863-
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
1845+
IRB.CreateCall(RuntimeRegisterF, Data);
18641846
for (Value *Data : UsedVars)
18651847
if (Data != NamesVar && !isa<Function>(Data))
1866-
IRB.CreateCall(RuntimeRegisterF,
1867-
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
1848+
IRB.CreateCall(RuntimeRegisterF, Data);
18681849

18691850
if (NamesVar) {
18701851
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
@@ -1873,9 +1854,7 @@ void InstrLowerer::emitRegistration() {
18731854
auto *NamesRegisterF =
18741855
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
18751856
getInstrProfNamesRegFuncName(), M);
1876-
IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
1877-
NamesVar, VoidPtrTy),
1878-
IRB.getInt64(NamesSize)});
1857+
IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
18791858
}
18801859

18811860
IRB.CreateRetVoid();
@@ -1896,10 +1875,7 @@ bool InstrLowerer::emitRuntimeHook() {
18961875
auto *Var =
18971876
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
18981877
nullptr, getInstrProfRuntimeHookVarName());
1899-
if (isGPUProfTarget(M))
1900-
Var->setVisibility(GlobalValue::ProtectedVisibility);
1901-
else
1902-
Var->setVisibility(GlobalValue::HiddenVisibility);
1878+
Var->setVisibility(GlobalValue::HiddenVisibility);
19031879

19041880
if (TT.isOSBinFormatELF() && !TT.isPS()) {
19051881
// Mark the user variable as used so that it isn't stripped out.

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -883,18 +883,14 @@ static void instrumentOneFunc(
883883
auto Name = FuncInfo.FuncNameVar;
884884
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
885885
FuncInfo.FunctionHash);
886-
// Make sure that pointer to global is passed in with zero addrspace
887-
// This is relevant during GPU profiling
888-
auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
889-
Name, PointerType::get(M->getContext(), 0));
890886
if (PGOFunctionEntryCoverage) {
891887
auto &EntryBB = F.getEntryBlock();
892888
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
893889
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
894890
// i32 <index>)
895891
Builder.CreateCall(
896892
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
897-
{NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
893+
{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
898894
return;
899895
}
900896

@@ -949,8 +945,7 @@ static void instrumentOneFunc(
949945
// i32 <index>)
950946
Builder.CreateCall(
951947
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
952-
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
953-
Builder.getInt32(I)});
948+
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
954949
I += PGOBlockCoverage ? 8 : 1;
955950
}
956951

@@ -964,8 +959,7 @@ static void instrumentOneFunc(
964959
Intrinsic::getDeclaration(M, PGOBlockCoverage
965960
? Intrinsic::instrprof_cover
966961
: Intrinsic::instrprof_increment),
967-
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
968-
Builder.getInt32(I++)});
962+
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
969963
}
970964

971965
// Now instrument select instructions:
@@ -1008,14 +1002,11 @@ static void instrumentOneFunc(
10081002
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
10091003
assert(ToProfile && "value profiling Value is of unexpected type");
10101004

1011-
auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1012-
Name, PointerType::get(M->getContext(), 0));
1013-
10141005
SmallVector<OperandBundleDef, 1> OpBundles;
10151006
populateEHOperandBundle(Cand, BlockColors, OpBundles);
10161007
Builder.CreateCall(
10171008
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1018-
{NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
1009+
{FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
10191010
ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
10201011
OpBundles);
10211012
}
@@ -1690,13 +1681,10 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
16901681
IRBuilder<> Builder(&SI);
16911682
Type *Int64Ty = Builder.getInt64Ty();
16921683
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1693-
auto *NormalizedFuncNameVarPtr =
1694-
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1695-
FuncNameVar, PointerType::get(M->getContext(), 0));
16961684
Builder.CreateCall(
16971685
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1698-
{NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
1699-
Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
1686+
{FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1687+
Builder.getInt32(*CurCtrIdx), Step});
17001688
++(*CurCtrIdx);
17011689
}
17021690

offload/DeviceRTL/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ set(include_files
7777
${include_directory}/Interface.h
7878
${include_directory}/LibC.h
7979
${include_directory}/Mapping.h
80-
${include_directory}/Profiling.h
8180
${include_directory}/State.h
8281
${include_directory}/Synchronization.h
8382
${include_directory}/Types.h
@@ -93,7 +92,6 @@ set(src_files
9392
${source_directory}/Mapping.cpp
9493
${source_directory}/Misc.cpp
9594
${source_directory}/Parallelism.cpp
96-
${source_directory}/Profiling.cpp
9795
${source_directory}/Reduction.cpp
9896
${source_directory}/State.cpp
9997
${source_directory}/Synchronization.cpp

offload/DeviceRTL/include/Profiling.h

Lines changed: 0 additions & 21 deletions
This file was deleted.

offload/DeviceRTL/src/Profiling.cpp

Lines changed: 0 additions & 22 deletions
This file was deleted.

offload/plugins-nextgen/common/include/GlobalHandler.h

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@
1313
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
1414
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
1515

16-
#include <type_traits>
16+
#include <string>
1717

1818
#include "llvm/ADT/DenseMap.h"
1919
#include "llvm/Object/ELFObjectFile.h"
20-
#include "llvm/ProfileData/InstrProf.h"
2120

2221
#include "Shared/Debug.h"
2322
#include "Shared/Utils.h"
@@ -56,23 +55,6 @@ class GlobalTy {
5655
void setPtr(void *P) { Ptr = P; }
5756
};
5857

59-
using IntPtrT = void *;
60-
struct __llvm_profile_data {
61-
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
62-
std::remove_const<Type>::type Name;
63-
#include "llvm/ProfileData/InstrProfData.inc"
64-
};
65-
66-
/// PGO profiling data extracted from a GPU device
67-
struct GPUProfGlobals {
68-
SmallVector<uint8_t> NamesData;
69-
SmallVector<SmallVector<int64_t>> Counts;
70-
SmallVector<__llvm_profile_data> Data;
71-
Triple TargetTriple;
72-
73-
void dump() const;
74-
};
75-
7658
/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
7759
template <typename Ty> class StaticGlobalTy : public GlobalTy {
7860
Ty Data;
@@ -182,15 +164,6 @@ class GenericGlobalHandlerTy {
182164
return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
183165
/*D2H=*/false);
184166
}
185-
186-
/// Checks whether a given image contains profiling globals.
187-
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);
188-
189-
/// Reads profiling data from a GPU image to supplied profdata struct.
190-
/// Iterates through the image symbol table and stores global values
191-
/// with profiling prefixes.
192-
Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
193-
DeviceImageTy &Image);
194167
};
195168

196169
} // namespace plugin

0 commit comments

Comments
 (0)