@@ -35,6 +35,7 @@ IN THE SOFTWARE.
35
35
#include " GenXTargetMachine.h"
36
36
#include " GenXUtil.h"
37
37
#include " GenXVisa.h"
38
+ #include " vc/GenXCodeGen/GenXInternalMetadata.h"
38
39
39
40
#include " Probe/Assertion.h"
40
41
#include " llvmWrapper/IR/DerivedTypes.h"
@@ -208,6 +209,7 @@ std::pair<Value *, unsigned>
208
209
GenXThreadPrivateMemory::NormalizeVector (Value *From, Type *To,
209
210
Instruction *Inst) {
210
211
Type *I32Ty = Type::getInt32Ty (Inst->getContext ());
212
+ Type *I64Ty = Type::getInt64Ty (Inst->getContext ());
211
213
Value *Res = From;
212
214
Type *FromTy = From->getType ();
213
215
IGC_ASSERT (isa<VectorType>(FromTy));
@@ -234,22 +236,22 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
234
236
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
235
237
EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
236
238
Res = CastInst::Create (Instruction::BitCast, Res, To, " " , Inst);
237
- } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () <
238
- genx::DWordBits
239
- // this is required for correct generation of svm.gather/scatter
240
- // of data of type which size is < i32 because these intrinsics
241
- // infer their block size from the type of the data they handle
242
- && !m_useGlobalMem) {
239
+ } else if (m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) <
240
+ genx::DWordBits) {
243
241
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
244
-
245
- Res = CastInst::Create (Instruction::ZExt, From, To, " " , Inst);
246
- } else if (cast<VectorType>(To)->getElementType ()->getPrimitiveSizeInBits () ==
247
- genx::QWordBits) {
242
+ Res = CastInst::CreateZExtOrBitCast (From, To, " " , Inst);
243
+ } else if (!m_useGlobalMem &&
244
+ m_DL->getTypeSizeInBits (cast<VectorType>(To)->getElementType ()) ==
245
+ genx::QWordBits) {
246
+ if (From->getType ()->getScalarType ()->isPointerTy ()) {
247
+ auto *NewType = IGCLLVM::FixedVectorType::get (I64Ty, NumElts);
248
+ From = CastInst::Create (CastInst::PtrToInt, From, NewType, " " , Inst);
249
+ }
248
250
NumElts *= 2 ;
249
251
EltSz = I32Ty->getPrimitiveSizeInBits () / genx::ByteBits;
250
252
To = IGCLLVM::FixedVectorType::get (I32Ty, NumElts);
251
253
252
- Res = CastInst::Create (Instruction::BitCast, From, To, " " , Inst);
254
+ Res = CastInst::CreateBitOrPointerCast ( From, To, " " , Inst);
253
255
}
254
256
255
257
return std::make_pair (Res, EltSz);
@@ -258,6 +260,8 @@ GenXThreadPrivateMemory::NormalizeVector(Value *From, Type *To,
258
260
Instruction *
259
261
GenXThreadPrivateMemory::RestoreVectorAfterNormalization (Instruction *From,
260
262
Type *To) {
263
+ if (From->getType () == To)
264
+ return From;
261
265
Instruction *Restored = From;
262
266
unsigned EltSz = m_DL->getTypeSizeInBits (To->getScalarType ());
263
267
IGC_ASSERT (EltSz > 0 );
@@ -519,35 +523,19 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
519
523
LdTy = IGCLLVM::FixedVectorType::get (LdTy, 1 );
520
524
521
525
unsigned NumEltsToLoad = cast<VectorType>(LdTy)->getNumElements ();
522
- unsigned LdEltTySz = m_DL->getTypeSizeInBits (LdEltTy);
523
- if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
524
- LdEltTySz == genx::QWordBits)
525
- NumEltsToLoad *= 2 ;
526
+ unsigned ValueEltSz = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
526
527
527
528
Value *PredVal = ConstantInt::get (Type::getInt1Ty (*m_ctx), 1 );
528
529
Value *Pred = Builder.CreateVectorSplat (NumEltsToLoad, PredVal);
529
530
530
531
Type *I32Ty = Type::getInt32Ty (*m_ctx);
531
532
Type *I64Ty = Type::getInt64Ty (*m_ctx);
532
- Type *TyToLoad = (m_useGlobalMem && LdEltTy->isIntegerTy (64 )) ? I64Ty : I32Ty;
533
- if (LdEltTy->isFloatTy ())
534
- TyToLoad = LdEltTy;
535
- Type *RealTyToLoad = LdEltTy;
536
- if (!(m_useGlobalMem && LdEltTy->isIntegerTy (64 )) &&
537
- m_DL->getTypeSizeInBits (RealTyToLoad) == genx::QWordBits)
538
- RealTyToLoad = I32Ty;
539
- unsigned RealTyToLoadSz =
540
- m_DL->getTypeSizeInBits (RealTyToLoad) / genx::ByteBits;
541
- // we don't want to use improper block sizes for loads of i8/i16
542
- // to make sure we comply with alignment rules for gathers
543
- bool NoExtToDword =
544
- m_useGlobalMem &&
545
- !(LdI->getType ()->isAggregateType () || LdI->getType ()->isVectorTy ()) &&
546
- m_DL->getTypeSizeInBits (LdI->getType ()) < genx::DWordBits;
547
- if (NoExtToDword)
548
- TyToLoad = LdI->getType ();
549
533
Value *OldValOfTheDataRead =
550
- Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (TyToLoad));
534
+ Builder.CreateVectorSplat (NumEltsToLoad, UndefValue::get (LdEltTy));
535
+ std::tie (OldValOfTheDataRead, ValueEltSz) =
536
+ NormalizeVector (OldValOfTheDataRead, LdTy, LdI);
537
+ NumEltsToLoad =
538
+ cast<VectorType>(OldValOfTheDataRead->getType ())->getNumElements ();
551
539
552
540
Value *PointerOp = LdI->getPointerOperand ();
553
541
Value *Offset = lookForPtrReplacement (PointerOp);
@@ -557,10 +545,13 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
557
545
? llvm::GenXIntrinsic::genx_svm_gather
558
546
: llvm::GenXIntrinsic::genx_gather_scaled;
559
547
560
- Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, RealTyToLoadSz , LdI);
548
+ Value *EltsOffset = FormEltsOffsetVector (NumEltsToLoad, ValueEltSz , LdI);
561
549
562
- unsigned SrcSize = genx::log2 (RealTyToLoadSz);
563
- Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : SrcSize);
550
+ unsigned NumBlocks = m_DL->getTypeSizeInBits (LdEltTy) / genx::ByteBits;
551
+ // This logic is aligned with the on in CisaBuilder and GenXLowering
552
+ // The reason behind check for == 2 is that svm intrinsics don't support
553
+ // BlockSize of 2, so for ops with i16s we have to use BlockSize == 1 and NumBlocks == 2
554
+ Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
564
555
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
565
556
Value *Surface = ConstantInt::get (I32Ty,
566
557
visa::getReservedSurfaceIndex (m_stack));
@@ -601,6 +592,10 @@ bool GenXThreadPrivateMemory::replaceLoad(LoadInst *LdI) {
601
592
ProperGather = LdVal;
602
593
}
603
594
595
+ Gather->setMetadata (InstMD::SVMBlockType,
596
+ MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
597
+ UndefValue::get (LdEltTy))));
598
+
604
599
LLVM_DEBUG (dbgs () << *Gather << " \n " );
605
600
LdI->replaceAllUsesWith (ProperGather);
606
601
LdI->eraseFromParent ();
@@ -647,7 +642,9 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
647
642
{Pred->getType (),
648
643
(m_useGlobalMem ? Offset : EltsOffset)->getType (),
649
644
ValueOp->getType ()});
650
- Value *logNumBlocks = ConstantInt::get (I32Ty, m_useGlobalMem ? 0 : genx::log2 (ValueEltSz));
645
+ unsigned NumBlocks = m_DL->getTypeSizeInBits (ValueOpTy->getScalarType ()) / genx::ByteBits;
646
+ // see the comment in replaceLoad above
647
+ Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks == 2 ? NumBlocks : 1 ));
651
648
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
652
649
Value *Surface = ConstantInt::get (I32Ty,
653
650
visa::getReservedSurfaceIndex (m_stack));
@@ -662,6 +659,11 @@ bool GenXThreadPrivateMemory::replaceStore(StoreInst *StI) {
662
659
Scatter->insertAfter (StI);
663
660
StI->eraseFromParent ();
664
661
662
+ Scatter->setMetadata (
663
+ InstMD::SVMBlockType,
664
+ MDNode::get (*m_ctx, llvm::ValueAsMetadata::get (
665
+ UndefValue::get (ValueOpTy->getScalarType ()))));
666
+
665
667
LLVM_DEBUG (dbgs () << *Scatter << " \n " );
666
668
m_scatter.push_back (Scatter);
667
669
@@ -1094,6 +1096,12 @@ void SplitScatter(CallInst *CI) {
1094
1096
}
1095
1097
IGC_ASSERT (FirstScatter && SecondScatter);
1096
1098
1099
+ auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1100
+ if (MD) {
1101
+ FirstScatter->setMetadata (InstMD::SVMBlockType, MD);
1102
+ SecondScatter->setMetadata (InstMD::SVMBlockType, MD);
1103
+ }
1104
+
1097
1105
FirstScatter->insertAfter (CI);
1098
1106
SecondScatter->insertAfter (FirstScatter);
1099
1107
@@ -1163,6 +1171,12 @@ void SplitGather(CallInst *CI) {
1163
1171
}
1164
1172
IGC_ASSERT (FirstGather && SecondGather);
1165
1173
1174
+ auto *MD = CI->getMetadata (InstMD::SVMBlockType);
1175
+ if (MD) {
1176
+ FirstGather->setMetadata (InstMD::SVMBlockType, MD);
1177
+ SecondGather->setMetadata (InstMD::SVMBlockType, MD);
1178
+ }
1179
+
1166
1180
FirstGather->insertAfter (CI);
1167
1181
SecondGather->insertAfter (FirstGather);
1168
1182
@@ -1280,14 +1294,16 @@ bool GenXThreadPrivateMemory::runOnModule(Module &M) {
1280
1294
m_ST = &getAnalysis<TargetPassConfig>()
1281
1295
.getTM <GenXTargetMachine>()
1282
1296
.getGenXSubtarget ();
1297
+ if (!m_ST->isOCLRuntime ())
1298
+ m_useGlobalMem = false ;
1283
1299
for (auto &F : M)
1284
1300
visit (F);
1285
- if (! m_useGlobalMem &&
1286
- std::find_if (m_alloca.begin (), m_alloca.end (), SVMChecker ()) !=
1287
- m_alloca.end ()) {
1301
+ if (m_useGlobalMem ||
1302
+ (m_ST-> isOCLRuntime () && std::find_if (m_alloca.begin (), m_alloca.end (),
1303
+ SVMChecker ()) != m_alloca.end () )) {
1288
1304
LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
1289
1305
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1290
- M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem " , 1 );
1306
+ M.addModuleFlag (Module::ModFlagBehavior::Error, ModuleMD::UseSVMStack , 1 );
1291
1307
m_useGlobalMem = true ;
1292
1308
}
1293
1309
bool Result = false ;
0 commit comments