@@ -96,7 +96,7 @@ class GenXThreadPrivateMemory : public ModulePass,
96
96
97
97
private:
98
98
LLVMContext *m_ctx;
99
- GenXSubtarget *m_ST;
99
+ const GenXSubtarget *m_ST;
100
100
const DataLayout *m_DL;
101
101
std::vector<AllocaInst *> m_alloca;
102
102
std::vector<CallInst *> m_gather;
@@ -105,6 +105,7 @@ class GenXThreadPrivateMemory : public ModulePass,
105
105
std::queue<Instruction *> m_AIUsers;
106
106
std::set<Instruction *> m_AlreadyAdded;
107
107
PreDefined_Surface m_stack;
108
+ bool m_useGlobalMem = false ;
108
109
};
109
110
} // namespace
110
111
@@ -355,19 +356,19 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
355
356
Value *PointerOp = LdI->getPointerOperand ();
356
357
Value *Offset = lookForPtrReplacement (PointerOp);
357
358
Offset =
358
- ZExtOrTruncIfNeeded (Offset, m_ST-> useGlobalMem () ? I64Ty : I32Ty, LdI);
359
- auto IID = m_ST-> useGlobalMem ()
359
+ ZExtOrTruncIfNeeded (Offset, m_useGlobalMem ? I64Ty : I32Ty, LdI);
360
+ auto IID = m_useGlobalMem
360
361
? llvm::GenXIntrinsic::genx_svm_gather
361
362
: llvm::GenXIntrinsic::genx_gather_scaled;
362
363
363
364
Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, RealTyToLoadSz, LdI);
364
365
365
366
unsigned SrcSize = genx::log2 (RealTyToLoadSz);
366
- Value *logNumBlocks = ConstantInt::get (I32Ty, m_ST-> useGlobalMem () ? 0 : SrcSize);
367
+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : SrcSize);
367
368
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
368
369
Value *Surface = ConstantInt::get (I32Ty,
369
370
visa::getReservedSurfaceIndex (m_stack));
370
- if (m_ST-> useGlobalMem () && NumEltsToLoad > 1 ) {
371
+ if (m_useGlobalMem && NumEltsToLoad > 1 ) {
371
372
assert (Offset->getType ()->getScalarType ()->isIntegerTy (64 ));
372
373
auto *BaseOff = FormEltsOffsetVectorForSVM (NumEltsToLoad, LdI, Offset);
373
374
auto *ZextOff = CastInst::CreateZExtOrBitCast (
@@ -380,9 +381,9 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
380
381
LdI->getModule (), IID,
381
382
{OldValOfTheDataRead->getType (),
382
383
Pred->getType (),
383
- (m_ST-> useGlobalMem () ? Offset : EltsOffset)->getType ()});
384
+ (m_useGlobalMem ? Offset : EltsOffset)->getType ()});
384
385
CallInst *Gather =
385
- m_ST-> useGlobalMem ()
386
+ m_useGlobalMem
386
387
? IntrinsicInst::Create (
387
388
F, {Pred, logNumBlocks, Offset, OldValOfTheDataRead})
388
389
: IntrinsicInst::Create (F, {Pred, logNumBlocks, Scale, Surface,
@@ -423,17 +424,17 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
423
424
Type *I32Ty = Type::getInt32Ty (*m_ctx);
424
425
Type *I64Ty = Type::getInt64Ty (*m_ctx);
425
426
Offset =
426
- ZExtOrTruncIfNeeded (Offset, m_ST-> useGlobalMem () ? I64Ty : I32Ty, StI);
427
+ ZExtOrTruncIfNeeded (Offset, m_useGlobalMem ? I64Ty : I32Ty, StI);
427
428
428
- auto IID = m_ST-> useGlobalMem ()
429
+ auto IID = m_useGlobalMem
429
430
? llvm::GenXIntrinsic::genx_svm_scatter
430
431
: llvm::GenXIntrinsic::genx_scatter_scaled;
431
432
432
433
Value *PredVal = ConstantInt::get (Type::getInt1Ty (*m_ctx), 1 );
433
434
Value *Pred = Builder.CreateVectorSplat (ValueNumElts, PredVal);
434
435
Value *EltsOffset = FormEltsOffsetVector (ValueNumElts, ValueEltSz, StI);
435
436
436
- if (m_ST-> useGlobalMem () && ValueNumElts > 1 ) {
437
+ if (m_useGlobalMem && ValueNumElts > 1 ) {
437
438
assert (Offset->getType ()->getScalarType ()->isIntegerTy (64 ));
438
439
auto *BaseOff = FormEltsOffsetVectorForSVM (ValueNumElts, StI, Offset);
439
440
auto *ZextOff = CastInst::CreateZExtOrBitCast (
@@ -446,14 +447,14 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
446
447
Function *F = GenXIntrinsic::getGenXDeclaration (
447
448
StI->getModule (), IID,
448
449
{Pred->getType (),
449
- (m_ST-> useGlobalMem () ? Offset : EltsOffset)->getType (),
450
+ (m_useGlobalMem ? Offset : EltsOffset)->getType (),
450
451
ValueOp->getType ()});
451
- Value *logNumBlocks = ConstantInt::get (I32Ty, m_ST-> useGlobalMem () ? 0 : genx::log2 (ValueEltSz));
452
+ Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : genx::log2 (ValueEltSz));
452
453
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
453
454
Value *Surface = ConstantInt::get (I32Ty,
454
455
visa::getReservedSurfaceIndex (m_stack));
455
456
auto *Scatter =
456
- m_ST-> useGlobalMem ()
457
+ m_useGlobalMem
457
458
? IntrinsicInst::Create (F, {Pred, logNumBlocks, Offset, ValueOp})
458
459
: IntrinsicInst::Create (F, {Pred, logNumBlocks, Scale, Surface,
459
460
Offset, EltsOffset, ValueOp});
@@ -477,7 +478,7 @@ bool GenXThreadPrivateMemory::replacePTI(PtrToIntInst *PTI) {
477
478
}
478
479
479
480
bool GenXThreadPrivateMemory::replaceGatherPrivate (CallInst *CI) {
480
- auto IID = m_ST-> useGlobalMem ()
481
+ auto IID = m_useGlobalMem
481
482
? llvm::GenXIntrinsic::genx_svm_gather
482
483
: llvm::GenXIntrinsic::genx_gather_scaled;
483
484
@@ -512,15 +513,15 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
512
513
Function *F = GenXIntrinsic::getGenXDeclaration (
513
514
CI->getModule (), IID,
514
515
{NewDstTy, Pred->getType (),
515
- (m_ST-> useGlobalMem () ? Offset : EltsOffset)->getType ()});
516
+ (m_useGlobalMem ? Offset : EltsOffset)->getType ()});
516
517
517
518
Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (ValueEltSz));
518
519
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
519
520
Value *Surface = ConstantInt::get (I32Ty,
520
521
visa::getReservedSurfaceIndex (m_stack));
521
522
522
523
CallInst *Gather =
523
- m_ST-> useGlobalMem ()
524
+ m_useGlobalMem
524
525
? IntrinsicInst::Create (F, {Pred, logNumBlocks, Offset, OldValue})
525
526
: IntrinsicInst::Create (F, {Pred, logNumBlocks, Scale, Surface,
526
527
Offset, EltsOffset, OldValue});
@@ -536,7 +537,7 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
536
537
}
537
538
538
539
bool GenXThreadPrivateMemory::replaceScatterPrivate (CallInst *CI) {
539
- auto IID = m_ST-> useGlobalMem ()
540
+ auto IID = m_useGlobalMem
540
541
? llvm::GenXIntrinsic::genx_svm_scatter
541
542
: llvm::GenXIntrinsic::genx_scatter_scaled;
542
543
Value *ValueOp = CI->getArgOperand (3 );
@@ -560,15 +561,15 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
560
561
561
562
Function *F = GenXIntrinsic::getGenXDeclaration (
562
563
CI->getModule (), IID,
563
- {Pred->getType (), (m_ST-> useGlobalMem () ? Offset : EltsOffset)->getType (),
564
+ {Pred->getType (), (m_useGlobalMem ? Offset : EltsOffset)->getType (),
564
565
ValueOp->getType ()});
565
566
566
567
unsigned logNumBlocks = genx::log2 (EltSz);
567
568
unsigned Scale = 0 ; // scale is always 0
568
569
Value *Surface = ConstantInt::get (I32Ty,
569
570
visa::getReservedSurfaceIndex (m_stack));
570
571
CallInst *ScatterStScaled =
571
- m_ST-> useGlobalMem ()
572
+ m_useGlobalMem
572
573
? IntrinsicInst::Create (
573
574
F,
574
575
{Pred, ConstantInt::get (I32Ty, logNumBlocks), Offset, ValueOp})
@@ -838,7 +839,7 @@ void GenXThreadPrivateMemory::addUsersIfNeeded(Instruction *I) {
838
839
break ;
839
840
}
840
841
}
841
- if (m_ST-> useGlobalMem () ||
842
+ if (m_useGlobalMem ||
842
843
(!isa<LoadInst>(I) && !isa<StoreInst>(I) && !isGatherScatterPrivate))
843
844
addUsers (I);
844
845
}
@@ -889,8 +890,11 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
889
890
for (auto &F : M)
890
891
visit (F);
891
892
if (std::find_if (m_alloca.begin (), m_alloca.end (), checkSVMNecessaryPred) !=
892
- m_alloca.end ())
893
- m_ST->setUseGlobalMem ();
893
+ m_alloca.end ()) {
894
+ // TODO: maybe move the name string to vc-intrinsics *MD::useGlobalMem
895
+ M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem" , 1 );
896
+ m_useGlobalMem = true ;
897
+ }
894
898
bool Result = false ;
895
899
for (auto &F : M)
896
900
Result |= runOnFunction (F);
@@ -1006,7 +1010,7 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
1006
1010
1007
1011
for (auto CI : m_scatter) {
1008
1012
Type *DataTy =
1009
- CI->getArgOperand (m_ST-> useGlobalMem () ? 3 : 5 )->getType ();
1013
+ CI->getArgOperand (m_useGlobalMem ? 3 : 5 )->getType ();
1010
1014
unsigned NumElts = DataTy->getVectorNumElements ();
1011
1015
unsigned EltSz = DataTy->getVectorElementType ()->getPrimitiveSizeInBits ();
1012
1016
unsigned ExecSz = NumElts * EltSz;
0 commit comments