Skip to content

Commit 6bfd67b

Browse files
dmitryryinteligcbot
authored andcommitted
Localize only global variables in private addrspace.
Prior to this patch global variable localization was considered an optimization. GV was localized based on heuristics, which is wrong. GV can be localized only when GV is thread local (or one can call it kernel local) and it must be localized in this case since current toolchain doesn't have more general thread local GV support. Non thread local GVs cannot be localized since it would change their behavior. Being thread local is defined via addrspace: GVs in private addrspace are considered thread local since private memory is not available to other threads, GVs in other addrspaces are ignored by the localization. LLVM thread local attribute is currently ignored.
1 parent f757202 commit 6bfd67b

File tree

6 files changed

+15
-271
lines changed

6 files changed

+15
-271
lines changed

IGC/Options/include/igc/Options/VCInternalOptions.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,5 @@ def : PlainJoined<"runtime=">, Alias<binary_format>,
4848
def ftime_report : PlainFlag<"ftime-report">,
4949
HelpText<"Print timing summary of each stage of compilation">;
5050

51-
def vc_globals_localization : PlainSeparate<"globals-localization">,
52-
HelpText<"globals localization optimization configuration; values:\n"
53-
"\t'all' - localize all globals,\n\t'no' - don't localize globals,\n"
54-
"\t'vector' - localize all vector globals the rest is partially "
55-
"localized,\n\t'partial' - compiler may localize some globals "
56-
"according to its heuristics">;
57-
def : PlainJoined<"globals-localization=">, Alias<vc_globals_localization>,
58-
HelpText<"Alias for -globals-localization <value>">;
59-
6051
}
6152
// }} VC internal options

IGC/VectorCompiler/include/vc/Driver/Driver.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ struct CompileOptions {
8686
bool DumpAsm = false;
8787
bool DumpDebugInfo = false;
8888
bool TimePasses = false;
89-
GlobalsLocalizationMode GlobalsLocalization = GlobalsLocalizationMode::Vector;
9089
std::string LLVMOptions;
9190
bool UseBindlessBuffers = false;
9291

IGC/VectorCompiler/include/vc/Support/BackendConfig.h

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -46,55 +46,6 @@ namespace llvm {
4646

4747
void initializeGenXBackendConfigPass(PassRegistry &PR);
4848

49-
struct GlobalsLocalizationConfig {
50-
using LimitT = std::size_t;
51-
static constexpr auto NoLimit = std::numeric_limits<LimitT>::max();
52-
53-
private:
54-
// Whether every global variable must be localized.
55-
bool IsForced = true;
56-
// Whether every vector global variable must be localized.
57-
bool IsVectorForced = true;
58-
// How many GRF memory is allowed to be used for localization.
59-
LimitT Limit = NoLimit;
60-
61-
public:
62-
GlobalsLocalizationConfig(bool IsForcedIn, bool IsVectorForcedIn,
63-
LimitT LimitIn)
64-
: IsForced{IsForcedIn}, IsVectorForced{IsVectorForcedIn}, Limit{LimitIn} {
65-
if (IsForced || IsVectorForced)
66-
IGC_ASSERT_MESSAGE(
67-
Limit == NoLimit,
68-
"there can be no localization limit when localization is forced");
69-
if (IsForced)
70-
IGC_ASSERT_MESSAGE(IsVectorForced,
71-
"localizing every GV means localizing vectors too");
72-
}
73-
74-
GlobalsLocalizationConfig() {}
75-
76-
// Every global variable must be localized.
77-
static GlobalsLocalizationConfig CreateForcedLocalization() { return {}; }
78-
79-
// Every global variable must be localized.
80-
static GlobalsLocalizationConfig CreateForcedVectorLocalization() {
81-
return {/* IsForced */ false, /* IsVectorForced */ true, NoLimit};
82-
}
83-
84-
// GlobalsLocalization is allowed to localize globals but it can use only
85-
// GlobalsLocalizationLimit bytes of GRF.
86-
static GlobalsLocalizationConfig
87-
CreateLocalizationWithLimit(LimitT GlobalsLocalizationLimitIn = NoLimit) {
88-
return {/* IsForced */ false, /* IsVectorForced */ false,
89-
GlobalsLocalizationLimitIn};
90-
}
91-
92-
bool isForced() const { return IsForced; }
93-
bool isVectorForced() const { return IsVectorForced; }
94-
95-
LimitT getLimit() const { return Limit; }
96-
};
97-
9849
// Plain structure to be filled by users who want to create backend
9950
// configuration. Some values are default-initialized from cl options.
10051
struct GenXBackendOptions {
@@ -120,9 +71,6 @@ struct GenXBackendOptions {
12071
bool EnableDebugInfoDumps;
12172
std::string DebugInfoDumpsNameOverride;
12273

123-
// Configuration for GlobalsLocalization pass
124-
// (part of CMABI pass by historical reasons).
125-
GlobalsLocalizationConfig GlobalsLocalization;
12674
bool ForceArrayPromotion = false;
12775

12876
// Localize live ranges to reduce accumulator usage
@@ -235,15 +183,6 @@ class GenXBackendConfig : public ImmutablePass {
235183
return Options.DebugInfoDumpsNameOverride;
236184
}
237185

238-
bool isGlobalsLocalizationForced() const {
239-
return Options.GlobalsLocalization.isForced();
240-
}
241-
bool isVectorGlobalsLocalizationForced() const {
242-
return Options.GlobalsLocalization.isVectorForced();
243-
}
244-
GlobalsLocalizationConfig::LimitT getGlobalsLocalizationLimit() const {
245-
return Options.GlobalsLocalization.getLimit();
246-
}
247186
bool isArrayPromotionForced() const { return Options.ForceArrayPromotion; }
248187

249188
bool localizeLiveRangesForAccUsage() const {

IGC/VectorCompiler/lib/Driver/Driver.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -198,26 +198,6 @@ createTargetMachine(const vc::CompileOptions &Opts, Triple &TheTriple) {
198198
return {std::move(TM)};
199199
}
200200

201-
static GlobalsLocalizationConfig
202-
defineGlobalsLocalizationConfig(vc::GlobalsLocalizationMode GLMode,
203-
vc::BinaryKind Binary) {
204-
// Globals must be forced for CMRT binary.
205-
if (Binary == vc::BinaryKind::CM)
206-
return GlobalsLocalizationConfig::CreateForcedLocalization();
207-
switch (GLMode) {
208-
case vc::GlobalsLocalizationMode::All:
209-
return GlobalsLocalizationConfig::CreateForcedLocalization();
210-
case vc::GlobalsLocalizationMode::No:
211-
return GlobalsLocalizationConfig::CreateLocalizationWithLimit(0);
212-
case vc::GlobalsLocalizationMode::Vector:
213-
return GlobalsLocalizationConfig::CreateForcedVectorLocalization();
214-
default:
215-
IGC_ASSERT_MESSAGE(GLMode == vc::GlobalsLocalizationMode::Partial,
216-
"unexpected globals localization mode");
217-
return GlobalsLocalizationConfig::CreateLocalizationWithLimit();
218-
}
219-
}
220-
221201
// Create backend options for immutable config pass. Override default
222202
// values with provided ones.
223203
static GenXBackendOptions createBackendOptions(const vc::CompileOptions &Opts) {
@@ -233,8 +213,6 @@ static GenXBackendOptions createBackendOptions(const vc::CompileOptions &Opts) {
233213
BackendOpts.EnableDebugInfoDumps = Opts.DumpDebugInfo;
234214
BackendOpts.Dumper = Opts.Dumper.get();
235215
BackendOpts.ShaderOverrider = Opts.ShaderOverrider.get();
236-
BackendOpts.GlobalsLocalization =
237-
defineGlobalsLocalizationConfig(Opts.GlobalsLocalization, Opts.Binary);
238216
BackendOpts.ForceArrayPromotion = (Opts.Binary == vc::BinaryKind::CM);
239217
if (Opts.ForceLiveRangesLocalizationForAccUsage)
240218
BackendOpts.LocalizeLRsForAccUsage = true;
@@ -619,21 +597,6 @@ static Error fillInternalOptions(const opt::ArgList &InternalOptions,
619597
Opts.Binary = MaybeBinary.getValue();
620598
}
621599

622-
if (opt::Arg *A = InternalOptions.getLastArg(OPT_vc_globals_localization)) {
623-
StringRef Val = A->getValue();
624-
auto MaybeGLM = StringSwitch<Optional<vc::GlobalsLocalizationMode>>(Val)
625-
.Case("all", vc::GlobalsLocalizationMode::All)
626-
.Case("no", vc::GlobalsLocalizationMode::No)
627-
.Case("vector", vc::GlobalsLocalizationMode::Vector)
628-
.Case("partial", vc::GlobalsLocalizationMode::Partial)
629-
.Default(None);
630-
// FIXME: -globals-localization=no is ignored when cm binary is used, throw
631-
// a warning here.
632-
if (!MaybeGLM)
633-
return makeOptionError(*A, InternalOptions, /*IsInternal=*/true);
634-
Opts.GlobalsLocalization = MaybeGLM.getValue();
635-
}
636-
637600
Opts.FeaturesString =
638601
llvm::join(InternalOptions.getAllArgValues(OPT_target_features), ",");
639602

IGC/VectorCompiler/lib/GenXOpts/CMTrans/CMABI.cpp

Lines changed: 15 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,6 @@ int DiagnosticInfoOverlappingArgs::KindID = 0;
239239
class CMABIAnalysis : public ModulePass {
240240
// This map captures all global variables to be localized.
241241
std::vector<LocalizationInfo *> LocalizationInfoObjs;
242-
GlobalsLocalizationConfig::LimitT GlobalsLocalizationLimit = 0;
243-
bool LocalizeVectorGlobals = false;
244242

245243
public:
246244
static char ID;
@@ -355,118 +353,14 @@ INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig)
355353
INITIALIZE_PASS_END(CMABIAnalysis, "cmabi-analysis",
356354
"Fix ABI issues for the genx backend", false, true)
357355

358-
static std::size_t
359-
defineGlobalsLocalizationLimit(const GenXBackendConfig &Config) {
360-
if (Config.isGlobalsLocalizationForced())
361-
return GlobalsLocalizationConfig::NoLimit;
362-
363-
// Half of a size of standard GenX register file in bytes.
364-
// 128 * 32 / 2
365-
constexpr std::size_t HalfGRF = 2048;
366-
std::size_t Limit = Config.getGlobalsLocalizationLimit();
367-
return std::min(Limit, HalfGRF);
368-
}
369-
370356
bool CMABIAnalysis::runOnModule(Module &M) {
371357
auto &&BCfg = getAnalysis<GenXBackendConfig>();
372-
GlobalsLocalizationLimit = defineGlobalsLocalizationLimit(BCfg);
373-
LocalizeVectorGlobals = BCfg.isVectorGlobalsLocalizationForced();
374358
FCtrl = BCfg.getFCtrl();
375359

376360
runOnCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph());
377361
return false;
378362
}
379363

380-
// Currently weight of the global defines by its size
381-
static int calcGVWeight(const GlobalVariable &GV, const DataLayout &DL) {
382-
if (!GV.getValueType()->isVectorTy())
383-
return DL.getTypeAllocSize(GV.getValueType());
384-
// Alignment rules are too restrictive for vectors and cannot be reduced
385-
// (even though LangRef says that they can).
386-
// GRF width (32) alignment was taken as it is considered that localized
387-
// global vectors should be eventually promoted to GRF.
388-
return alignTo(DL.getTypeStoreSize(GV.getValueType()), 32);
389-
}
390-
391-
// selectGlobalsToLocalize - chooses which globals to localize.
392-
// Returns std::vector of std::reference_wrapper to such globals.
393-
//
394-
// Algorithm: exclude globals that definitely should not be localized, include
395-
// those that definitely should. If the total weight of the already chosen
396-
// globals doesn't exceed \p Bound, sort the remaining globals by weight,
397-
// choose first lightest ones, so the total weight is under \p Bound.
398-
//
399-
// \p Globals - range of globals to choose from
400-
// \p Bound - bound not to overcome
401-
// \p ExcludePred - functor : GVRef -> bool, true if global should not be
402-
// localized
403-
// \p InlcudePred - functor : GVRef -> bool, true if the provided global must
404-
// be localized
405-
// \p WeightCalculator - functor : GVRef -> decltype(Bound), returns
406-
// weight of global
407-
template <typename ForwardRange, typename ExcludePredT, typename IncludePredT,
408-
typename T, typename WeightCalculatorT>
409-
auto selectGlobalsToLocalize(ForwardRange Globals, T Bound,
410-
ExcludePredT ExcludePred, IncludePredT IncludePred,
411-
WeightCalculatorT WeightCalculator) {
412-
IGC_ASSERT_MESSAGE(Bound >= 0, "bound must be nonnegative");
413-
using GVRef = vc::ranges::range_reference_t<ForwardRange>;
414-
using GVT = std::remove_reference_t<GVRef>;
415-
using GVRefWrapper = std::reference_wrapper<GVT>;
416-
417-
IGC_ASSERT_MESSAGE(std::none_of(Globals.begin(), Globals.end(),
418-
[ExcludePred, IncludePred](GVRef GV) {
419-
return ExcludePred(GV) && IncludePred(GV);
420-
}),
421-
"'must include' and 'must exclude' sets must be disjoint");
422-
423-
if (Bound == GlobalsLocalizationConfig::NoLimit) {
424-
std::vector<GVRefWrapper> ToLocalize;
425-
// filter out those, that we must exclude
426-
std::copy_if(Globals.begin(), Globals.end(), std::back_inserter(ToLocalize),
427-
[ExcludePred](GVRef GV) { return !ExcludePred(GV); });
428-
return ToLocalize;
429-
}
430-
431-
std::vector<GVRefWrapper> ToLocalize;
432-
// Adding those that we must include.
433-
std::copy_if(Globals.begin(), Globals.end(), std::back_inserter(ToLocalize),
434-
IncludePred);
435-
if (Bound == 0)
436-
return ToLocalize;
437-
438-
T IncludeWeight =
439-
std::accumulate(ToLocalize.begin(), ToLocalize.end(), static_cast<T>(0),
440-
[WeightCalculator](T Prev, GVRef GV) {
441-
return Prev + WeightCalculator(GV);
442-
});
443-
if (IncludeWeight >= Bound)
444-
return ToLocalize;
445-
446-
std::vector<GVRefWrapper> Remainder;
447-
std::copy_if(Globals.begin(), Globals.end(), std::back_inserter(Remainder),
448-
[IncludePred, ExcludePred](GVRef GV) {
449-
return !IncludePred(GV) && !ExcludePred(GV);
450-
});
451-
// Sorting remaining globals by weight.
452-
std::sort(Remainder.begin(), Remainder.end(),
453-
[WeightCalculator](GVRef LHS, GVRef RHS) {
454-
return WeightCalculator(LHS) < WeightCalculator(RHS);
455-
});
456-
457-
T RemainderBound = Bound - IncludeWeight;
458-
// filter max number of lightest ones, which weight sum is under the bound
459-
auto FirstNotToLocalize = vc::upper_partial_sum_bound(
460-
Remainder.begin(), Remainder.end(), RemainderBound,
461-
[WeightCalculator](T Base, GVRef Inc) {
462-
return Base + WeightCalculator(Inc);
463-
});
464-
465-
std::copy(Remainder.begin(), FirstNotToLocalize,
466-
std::back_inserter(ToLocalize));
467-
return ToLocalize;
468-
}
469-
470364
bool CMABIAnalysis::runOnCallGraph(CallGraph &CG) {
471365
// Analyze global variable usages and for each function attaches global
472366
// variables to be copy-in and copy-out.
@@ -541,45 +435,25 @@ bool CMABI::runOnSCC(CallGraphSCC &SCC) {
541435
return Changed;
542436
}
543437

544-
// Whether \p Inst is an instruction on which IR rebuild caused by addrspace
545-
// change will stop.
546-
static bool isRebuildTerminal(const Instruction &Inst) {
547-
// Result of a load inst is no longer a pointer so here propogation will stop.
548-
if (isa<LoadInst>(Inst) || isa<AddrSpaceCastInst>(Inst) ||
549-
isa<StoreInst>(Inst))
550-
return true;
551-
if (!isa<IntrinsicInst>(Inst))
552-
return false;
553-
auto IID = cast<IntrinsicInst>(Inst).getIntrinsicID();
554-
return IID == Intrinsic::masked_gather || IID == Intrinsic::masked_scatter;
555-
}
556-
557438
// Replaces uses of global variables with the corresponding allocas inside a
558439
// specified function. More insts can be rebuild if global variable addrspace
559440
// wasn't private.
560441
static void replaceUsesWithinFunction(
561442
const SmallDenseMap<Value *, Value *> &GlobalsToReplace, Function *F) {
562-
auto ToRebuild = vc::MakeRebuildInfoBuilder(
563-
[](const Instruction &Inst) { return isRebuildTerminal(Inst); });
564-
ReversePostOrderTraversal<Function *> RPOT(F);
565-
for (auto *BB : RPOT) {
566-
for (auto &Inst : *BB) {
443+
for (auto &BB : *F) {
444+
for (auto &Inst : BB) {
567445
for (unsigned i = 0, e = Inst.getNumOperands(); i < e; ++i) {
568446
Value *Op = Inst.getOperand(i);
569447
auto Iter = GlobalsToReplace.find(Op);
570448
if (Iter != GlobalsToReplace.end()) {
571-
if (Op->getType() == Iter->second->getType())
572-
Inst.setOperand(i, Iter->second);
573-
else {
574-
ToRebuild.addEntry(Inst, i, *Iter->second);
575-
}
576-
} else {
577-
ToRebuild.addNodeIfRequired(Inst, i);
449+
IGC_ASSERT_MESSAGE(Op->getType() == Iter->second->getType(),
450+
"only global variables in private addrspace are "
451+
"localized, so types must match");
452+
Inst.setOperand(i, Iter->second);
578453
}
579454
}
580455
}
581456
}
582-
vc::MakeInstructionRebuilder(std::move(ToRebuild).emit()).rebuild();
583457
}
584458

585459
// \brief Create allocas for globals directly used in this kernel and
@@ -1534,21 +1408,15 @@ void CMABIAnalysis::analyzeGlobals(CallGraph &CG) {
15341408
if (M.global_empty())
15351409
return;
15361410

1537-
const auto &DL = M.getDataLayout();
1538-
auto ToLocalize = selectGlobalsToLocalize(
1539-
M.globals(), GlobalsLocalizationLimit,
1540-
[](const GlobalVariable &GV) {
1541-
// Don't localize global constant format string, as it must be
1542-
// relocated in case of zebin printf.
1543-
// FIXME: what if we force localization.
1544-
return GV.hasAttribute(genx::FunctionMD::GenXVolatile) ||
1545-
vc::isConstantString(GV);
1546-
},
1547-
[IncludeVectors = LocalizeVectorGlobals](const GlobalVariable &GV) {
1548-
return IncludeVectors && GV.getValueType()->isVectorTy() &&
1549-
!GV.hasAttribute(genx::FunctionMD::GenXVolatile);
1550-
},
1551-
[&DL](const GlobalVariable &GV) { return calcGVWeight(GV, DL); });
1411+
// FIXME: String constants must be localized too. Excluding them there
1412+
// to WA legacy printf implementation in CM FE (printf strings are
1413+
// not in constant addrspace in legacy printf).
1414+
auto ToLocalize =
1415+
make_filter_range(M.globals(), [](const GlobalVariable &GV) {
1416+
return GV.getAddressSpace() == PrivateAddrSpace &&
1417+
!GV.hasAttribute(genx::FunctionMD::GenXVolatile) &&
1418+
!vc::isConstantString(GV);
1419+
});
15521420

15531421
// Collect direct and indirect (GV is used in a called function)
15541422
// uses of globals.

0 commit comments

Comments
 (0)