Skip to content

Commit 24e24b1

Browse files
lwesierswrodziew
authored andcommitted
Restore constancy of GenXSubtarget
Change-Id: Ia7343036186e15d7beb09d85424fd2653ddde475
1 parent 91ff3c6 commit 24e24b1

File tree

6 files changed

+44
-49
lines changed

6 files changed

+44
-49
lines changed

IGC/VectorCompiler/include/vc/GenXOpts/Utils/KernelInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ class KernelMetadata {
148148
// Accessors
149149
bool isKernel() const { return IsKernel; }
150150
StringRef getName() const { return Name; }
151+
const Function *getFunction() const { return F; }
151152
unsigned getSLMSize() const { return SLMSize; }
152153
ArrayRef<unsigned> getArgKinds() const { return ArgKinds; }
153154
unsigned getNumArgs() const { return ArgKinds.size(); }

IGC/VectorCompiler/lib/GenXCodeGen/GenXCisaBuilder.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ class GenXKernelBuilder {
500500
bool HasCallable = false;
501501
bool HasStackcalls = false;
502502
bool HasAlloca = false;
503+
bool UseGlobalMem = false;
503504
// GRF width in unit of byte
504505
unsigned GrfByteSize = 32;
505506

@@ -2905,6 +2906,8 @@ void GenXKernelBuilder::AddGenVar(Register &Reg) {
29052906
* barrier.
29062907
*/
29072908
void GenXKernelBuilder::collectKernelInfo() {
2909+
UseGlobalMem |=
2910+
(FG->getModule()->getModuleFlag("genx.useGlobalMem") != nullptr);
29082911
for (auto It = FG->begin(), E = FG->end(); It != E; ++It) {
29092912
auto Func = *It;
29102913
HasStackcalls |=
@@ -4933,7 +4936,7 @@ void GenXKernelBuilder::pushStackArg(VISA_StateOpndHandle *Dst, Value *Src,
49334936
DoCopy);
49344937
VISA_VectorOpnd *Imm = nullptr;
49354938
unsigned OffVal = Sz;
4936-
if (Subtarget->useGlobalMem())
4939+
if (UseGlobalMem)
49374940
OffVal *= BYTES_PER_OWORD;
49384941
CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
49394942
VISA_RawOpnd *RawSrc = nullptr;
@@ -4945,7 +4948,7 @@ void GenXKernelBuilder::pushStackArg(VISA_StateOpndHandle *Dst, Value *Src,
49454948
CISA_CALL(Kernel->AppendVISADataMovementInst(ISA_MOV, nullptr, false,
49464949
vISA_EMASK_M1, EXEC_SIZE_1,
49474950
TmpOffDst, SpOpSrc1));
4948-
if (Subtarget->useGlobalMem()) {
4951+
if (UseGlobalMem) {
49494952
CISA_CALL(Kernel->AppendVISASvmBlockStoreInst(
49504953
getCisaOwordNumFromNumber(Sz), true, TmpOffSrc, RawSrc));
49514954
} else {
@@ -4997,7 +5000,7 @@ void GenXKernelBuilder::popStackArg(llvm::Value *Dst, VISA_StateOpndHandle *Src,
49975000

49985001
VISA_VectorOpnd *Imm = nullptr;
49995002
int OffVal = PrevStackOff;
5000-
if (Subtarget->useGlobalMem())
5003+
if (UseGlobalMem)
50015004
OffVal *= BYTES_PER_OWORD;
50025005
CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
50035006
PrevStackOff += Sz;
@@ -5007,7 +5010,7 @@ void GenXKernelBuilder::popStackArg(llvm::Value *Dst, VISA_StateOpndHandle *Src,
50075010
CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
50085011
vISA_EMASK_M1, EXEC_SIZE_1,
50095012
TmpOffDst, SpOpSrc, Imm));
5010-
if (Subtarget->useGlobalMem()) {
5013+
if (UseGlobalMem) {
50115014
CISA_CALL(Kernel->AppendVISASvmBlockLoadInst(
50125015
getCisaOwordNumFromNumber(Sz), false, TmpOffSrc, RawSrc));
50135016
} else {
@@ -5097,7 +5100,7 @@ void GenXKernelBuilder::beginFunction(Function *Func) {
50975100
EXEC_SIZE_1, OffOpDst, HwtidOp, Imm));
50985101

50995102
VISA_VectorOpnd *OpSrc = nullptr;
5100-
if (Subtarget->useGlobalMem()) {
5103+
if (UseGlobalMem) {
51015104
assert(Func->arg_size() > 0);
51025105
Value &PrivBase = *(Func->arg_end() - 1);
51035106
genx::KernelArgInfo AI(TheKernelMetadata.getArgKind(Func->arg_size() - 1));
@@ -5475,7 +5478,7 @@ void GenXKernelBuilder::buildStackCall(IGCLLVM::CallInst *CI,
54755478
CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0,
54765479
0, 0));
54775480

5478-
if (Subtarget->useGlobalMem())
5481+
if (UseGlobalMem)
54795482
StackOff *= BYTES_PER_OWORD;
54805483
CISA_CALL(Kernel->CreateVISAImmediate(Imm, &StackOff, ISA_TYPE_UQ));
54815484
CISA_CALL(Kernel->AppendVISAArithmeticInst(

IGC/VectorCompiler/lib/GenXCodeGen/GenXOCLRuntimeInfo.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,8 @@ void GenXOCLRuntimeInfo::KernelInfo::setMetadataProperties(
8484
genx::KernelMetadata &KM, const GenXSubtarget &ST) {
8585
Name = KM.getName();
8686
SLMSize = KM.getSLMSize();
87-
// will be replaced to metadata usage once
88-
// useGlobalMem option is removed from GenXSubtarget
8987
// FIXME: replace with 8k * simdSize * numDispatchedThreads
90-
if (ST.useGlobalMem())
88+
if (KM.getFunction()->getParent()->getModuleFlag("genx.useGlobalMem"))
9189
StatelessPrivateMemSize = 16 * 8192;
9290

9391
}

IGC/VectorCompiler/lib/GenXCodeGen/GenXSubtarget.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ void GenXSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
7070
else
7171
StackSurf = PreDefined_Surface::PREDEFINED_SURFACE_STACK;
7272
StackSurfMaxSize = StackMemSize;
73-
UseGlobalMem = false;
7473

7574
GenXVariant = llvm::StringSwitch<GenXTag>(CPU)
7675
.Case("HSW", GENX_HSW)
@@ -124,7 +123,7 @@ StringRef GenXSubtarget::getEmulateFunction(const Instruction *Inst) const {
124123
}
125124

126125
GenXSubtargetPass::GenXSubtargetPass() : ImmutablePass(ID), ST(nullptr) {}
127-
GenXSubtargetPass::GenXSubtargetPass(GenXSubtarget &ST)
126+
GenXSubtargetPass::GenXSubtargetPass(const GenXSubtarget &ST)
128127
: ImmutablePass(ID), ST(&ST) {}
129128
GenXSubtargetPass::~GenXSubtargetPass() {}
130129

@@ -134,7 +133,7 @@ namespace llvm {
134133

135134
void initializeGenXSubtargetPassPass(PassRegistry &);
136135

137-
ImmutablePass *createGenXSubtargetPass(GenXSubtarget &ST) {
136+
ImmutablePass *createGenXSubtargetPass(const GenXSubtarget &ST) {
138137
initializeGenXSubtargetPassPass(*PassRegistry::getPassRegistry());
139138
return new GenXSubtargetPass(ST);
140139
}

IGC/VectorCompiler/lib/GenXCodeGen/GenXSubtarget.h

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,6 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
111111
// Limit in bytes for stack purposes
112112
unsigned StackSurfMaxSize;
113113

114-
bool UseGlobalMem;
115-
116114
public:
117115
// This constructor initializes the data members to match that
118116
// of the specified triple.
@@ -202,14 +200,6 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
202200
/// crossing one GRF boundary
203201
bool hasIndirectGRFCrossing() const { return isSKLplus(); }
204202

205-
bool useGlobalMem() const { return UseGlobalMem; }
206-
207-
void setUseGlobalMem() {
208-
assert(hasLongLong() && isOCLRuntime() &&
209-
"Global mem stack can't be used on 32-bit targets or on CMRT");
210-
UseGlobalMem = true;
211-
}
212-
213203
/// * getEmulateFunction - return the corresponding emulation function name,
214204
/// empty string if no emulation is needed.
215205
StringRef getEmulateFunction(const Instruction *Inst) const;
@@ -277,16 +267,16 @@ class GenXSubtarget final : public GenXGenSubtargetInfo {
277267
};
278268

279269
class GenXSubtargetPass : public ImmutablePass {
280-
GenXSubtarget *ST;
270+
const GenXSubtarget *ST;
281271
public:
282272
GenXSubtargetPass();
283-
GenXSubtargetPass(GenXSubtarget &ST);
273+
GenXSubtargetPass(const GenXSubtarget &ST);
284274
~GenXSubtargetPass();
285-
GenXSubtarget *getSubtarget() { return ST; }
275+
const GenXSubtarget *getSubtarget() const { return ST; }
286276
static char ID;
287277
};
288278

289-
ImmutablePass *createGenXSubtargetPass(GenXSubtarget &ST);
279+
ImmutablePass *createGenXSubtargetPass(const GenXSubtarget &ST);
290280

291281
} // End llvm namespace
292282

IGC/VectorCompiler/lib/GenXCodeGen/GenXThreadPrivateMemory.cpp

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class GenXThreadPrivateMemory : public ModulePass,
9696

9797
private:
9898
LLVMContext *m_ctx;
99-
GenXSubtarget *m_ST;
99+
const GenXSubtarget *m_ST;
100100
const DataLayout *m_DL;
101101
std::vector<AllocaInst *> m_alloca;
102102
std::vector<CallInst *> m_gather;
@@ -105,6 +105,7 @@ class GenXThreadPrivateMemory : public ModulePass,
105105
std::queue<Instruction *> m_AIUsers;
106106
std::set<Instruction *> m_AlreadyAdded;
107107
PreDefined_Surface m_stack;
108+
bool m_useGlobalMem = false;
108109
};
109110
} // namespace
110111

@@ -355,19 +356,19 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
355356
Value *PointerOp = LdI->getPointerOperand();
356357
Value *Offset = lookForPtrReplacement(PointerOp);
357358
Offset =
358-
ZExtOrTruncIfNeeded(Offset, m_ST->useGlobalMem() ? I64Ty : I32Ty, LdI);
359-
auto IID = m_ST->useGlobalMem()
359+
ZExtOrTruncIfNeeded(Offset, m_useGlobalMem ? I64Ty : I32Ty, LdI);
360+
auto IID = m_useGlobalMem
360361
? llvm::GenXIntrinsic::genx_svm_gather
361362
: llvm::GenXIntrinsic::genx_gather_scaled;
362363

363364
Value *EltsOffset = FormEltsOffsetVector(NumEltsToLoad, RealTyToLoadSz, LdI);
364365

365366
unsigned SrcSize = genx::log2(RealTyToLoadSz);
366-
Value *logNumBlocks = ConstantInt::get(I32Ty, m_ST->useGlobalMem() ? 0 : SrcSize);
367+
Value *logNumBlocks = ConstantInt::get(I32Ty, m_useGlobalMem ? 0 : SrcSize);
367368
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
368369
Value *Surface = ConstantInt::get(I32Ty,
369370
visa::getReservedSurfaceIndex(m_stack));
370-
if (m_ST->useGlobalMem() && NumEltsToLoad > 1) {
371+
if (m_useGlobalMem && NumEltsToLoad > 1) {
371372
assert(Offset->getType()->getScalarType()->isIntegerTy(64));
372373
auto *BaseOff = FormEltsOffsetVectorForSVM(NumEltsToLoad, LdI, Offset);
373374
auto *ZextOff = CastInst::CreateZExtOrBitCast(
@@ -380,9 +381,9 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
380381
LdI->getModule(), IID,
381382
{OldValOfTheDataRead->getType(),
382383
Pred->getType(),
383-
(m_ST->useGlobalMem() ? Offset : EltsOffset)->getType()});
384+
(m_useGlobalMem ? Offset : EltsOffset)->getType()});
384385
CallInst *Gather =
385-
m_ST->useGlobalMem()
386+
m_useGlobalMem
386387
? IntrinsicInst::Create(
387388
F, {Pred, logNumBlocks, Offset, OldValOfTheDataRead})
388389
: IntrinsicInst::Create(F, {Pred, logNumBlocks, Scale, Surface,
@@ -423,17 +424,17 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
423424
Type *I32Ty = Type::getInt32Ty(*m_ctx);
424425
Type *I64Ty = Type::getInt64Ty(*m_ctx);
425426
Offset =
426-
ZExtOrTruncIfNeeded(Offset, m_ST->useGlobalMem() ? I64Ty : I32Ty, StI);
427+
ZExtOrTruncIfNeeded(Offset, m_useGlobalMem ? I64Ty : I32Ty, StI);
427428

428-
auto IID = m_ST->useGlobalMem()
429+
auto IID = m_useGlobalMem
429430
? llvm::GenXIntrinsic::genx_svm_scatter
430431
: llvm::GenXIntrinsic::genx_scatter_scaled;
431432

432433
Value *PredVal = ConstantInt::get(Type::getInt1Ty(*m_ctx), 1);
433434
Value *Pred = Builder.CreateVectorSplat(ValueNumElts, PredVal);
434435
Value *EltsOffset = FormEltsOffsetVector(ValueNumElts, ValueEltSz, StI);
435436

436-
if (m_ST->useGlobalMem() && ValueNumElts > 1) {
437+
if (m_useGlobalMem && ValueNumElts > 1) {
437438
assert(Offset->getType()->getScalarType()->isIntegerTy(64));
438439
auto *BaseOff = FormEltsOffsetVectorForSVM(ValueNumElts, StI, Offset);
439440
auto *ZextOff = CastInst::CreateZExtOrBitCast(
@@ -446,14 +447,14 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
446447
Function *F = GenXIntrinsic::getGenXDeclaration(
447448
StI->getModule(), IID,
448449
{Pred->getType(),
449-
(m_ST->useGlobalMem() ? Offset : EltsOffset)->getType(),
450+
(m_useGlobalMem ? Offset : EltsOffset)->getType(),
450451
ValueOp->getType()});
451-
Value *logNumBlocks = ConstantInt::get(I32Ty, m_ST->useGlobalMem() ? 0 : genx::log2(ValueEltSz));
452+
Value *logNumBlocks = ConstantInt::get(I32Ty, m_useGlobalMem ? 0 : genx::log2(ValueEltSz));
452453
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
453454
Value *Surface = ConstantInt::get(I32Ty,
454455
visa::getReservedSurfaceIndex(m_stack));
455456
auto *Scatter =
456-
m_ST->useGlobalMem()
457+
m_useGlobalMem
457458
? IntrinsicInst::Create(F, {Pred, logNumBlocks, Offset, ValueOp})
458459
: IntrinsicInst::Create(F, {Pred, logNumBlocks, Scale, Surface,
459460
Offset, EltsOffset, ValueOp});
@@ -477,7 +478,7 @@ bool GenXThreadPrivateMemory::replacePTI(PtrToIntInst *PTI) {
477478
}
478479

479480
bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
480-
auto IID = m_ST->useGlobalMem()
481+
auto IID = m_useGlobalMem
481482
? llvm::GenXIntrinsic::genx_svm_gather
482483
: llvm::GenXIntrinsic::genx_gather_scaled;
483484

@@ -512,15 +513,15 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
512513
Function *F = GenXIntrinsic::getGenXDeclaration(
513514
CI->getModule(), IID,
514515
{NewDstTy, Pred->getType(),
515-
(m_ST->useGlobalMem() ? Offset : EltsOffset)->getType()});
516+
(m_useGlobalMem ? Offset : EltsOffset)->getType()});
516517

517518
Value *logNumBlocks = ConstantInt::get(I32Ty, genx::log2(ValueEltSz));
518519
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
519520
Value *Surface = ConstantInt::get(I32Ty,
520521
visa::getReservedSurfaceIndex(m_stack));
521522

522523
CallInst *Gather =
523-
m_ST->useGlobalMem()
524+
m_useGlobalMem
524525
? IntrinsicInst::Create(F, {Pred, logNumBlocks, Offset, OldValue})
525526
: IntrinsicInst::Create(F, {Pred, logNumBlocks, Scale, Surface,
526527
Offset, EltsOffset, OldValue});
@@ -536,7 +537,7 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
536537
}
537538

538539
bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
539-
auto IID = m_ST->useGlobalMem()
540+
auto IID = m_useGlobalMem
540541
? llvm::GenXIntrinsic::genx_svm_scatter
541542
: llvm::GenXIntrinsic::genx_scatter_scaled;
542543
Value *ValueOp = CI->getArgOperand(3);
@@ -560,15 +561,15 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
560561

561562
Function *F = GenXIntrinsic::getGenXDeclaration(
562563
CI->getModule(), IID,
563-
{Pred->getType(), (m_ST->useGlobalMem() ? Offset : EltsOffset)->getType(),
564+
{Pred->getType(), (m_useGlobalMem ? Offset : EltsOffset)->getType(),
564565
ValueOp->getType()});
565566

566567
unsigned logNumBlocks = genx::log2(EltSz);
567568
unsigned Scale = 0; // scale is always 0
568569
Value *Surface = ConstantInt::get(I32Ty,
569570
visa::getReservedSurfaceIndex(m_stack));
570571
CallInst *ScatterStScaled =
571-
m_ST->useGlobalMem()
572+
m_useGlobalMem
572573
? IntrinsicInst::Create(
573574
F,
574575
{Pred, ConstantInt::get(I32Ty, logNumBlocks), Offset, ValueOp})
@@ -838,7 +839,7 @@ void GenXThreadPrivateMemory::addUsersIfNeeded(Instruction *I) {
838839
break;
839840
}
840841
}
841-
if (m_ST->useGlobalMem() ||
842+
if (m_useGlobalMem ||
842843
(!isa<LoadInst>(I) && !isa<StoreInst>(I) && !isGatherScatterPrivate))
843844
addUsers(I);
844845
}
@@ -889,8 +890,11 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
889890
for (auto &F : M)
890891
visit(F);
891892
if (std::find_if(m_alloca.begin(), m_alloca.end(), checkSVMNecessaryPred) !=
892-
m_alloca.end())
893-
m_ST->setUseGlobalMem();
893+
m_alloca.end()) {
894+
//TODO: maybe move the name string to vc-intrinsics *MD::useGlobalMem
895+
M.addModuleFlag(Module::ModFlagBehavior::Error, "genx.useGlobalMem", 1);
896+
m_useGlobalMem = true;
897+
}
894898
bool Result = false;
895899
for (auto &F : M)
896900
Result |= runOnFunction(F);
@@ -1006,7 +1010,7 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
10061010

10071011
for (auto CI : m_scatter) {
10081012
Type *DataTy =
1009-
CI->getArgOperand(m_ST->useGlobalMem() ? 3 : 5)->getType();
1013+
CI->getArgOperand(m_useGlobalMem ? 3 : 5)->getType();
10101014
unsigned NumElts = DataTy->getVectorNumElements();
10111015
unsigned EltSz = DataTy->getVectorElementType()->getPrimitiveSizeInBits();
10121016
unsigned ExecSz = NumElts * EltSz;

0 commit comments

Comments
 (0)