@@ -36,7 +36,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36
36
#include " GenXTargetMachine.h"
37
37
#include " GenXUtil.h"
38
38
#include " GenXVisa.h"
39
- #include " vc/GenXOpts/Utils/KernelInfo.h"
40
39
41
40
#include " Probe/Assertion.h"
42
41
#include " llvmWrapper/IR/DerivedTypes.h"
@@ -45,6 +44,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
45
44
46
45
#include " llvm/ADT/SmallVector.h"
47
46
#include " llvm/CodeGen/TargetPassConfig.h"
47
+ #include " llvm/GenXIntrinsics/GenXMetadata.h"
48
48
#include " llvm/IR/IRBuilder.h"
49
49
#include " llvm/IR/InstVisitor.h"
50
50
#include " llvm/IR/Intrinsics.h"
@@ -85,7 +85,6 @@ class GenXThreadPrivateMemory : public ModulePass,
85
85
bool runOnFunction (Function &F);
86
86
87
87
void visitAllocaInst (AllocaInst &I);
88
- void visitFunction (Function &F);
89
88
90
89
private:
91
90
bool replacePhi (PHINode *Phi);
@@ -105,13 +104,11 @@ class GenXThreadPrivateMemory : public ModulePass,
105
104
std::pair<Value *, unsigned > NormalizeVector (Value *From, Type *To,
106
105
Instruction *InsertBefore);
107
106
Instruction *RestoreVectorAfterNormalization (Instruction *From, Type *To);
108
- void switchStack ();
109
107
110
108
public:
111
109
static char ID;
112
110
113
111
private:
114
- Module *m_M = nullptr ;
115
112
LLVMContext *m_ctx;
116
113
const GenXSubtarget *m_ST;
117
114
const DataLayout *m_DL;
@@ -166,19 +163,6 @@ static Value *ZExtOrTruncIfNeeded(Value *From, Type *To,
166
163
return Res;
167
164
}
168
165
169
- // 32u is max exec_size allowed (see GenXCisaBuilder.cpp:buildIntrinsic
170
- // GetExecSize lambda) For svm.gather/scatter:
171
- // BlockSize is inferred from vec elem type
172
- // BlockNum should be TotalMemSize / (ExecSize * BlockSize)
173
- // where TotalMemSize is a total amount of mem read/written for
174
- // gather/scatter
175
- // TODO: revise this for non-svm case
176
- static int getNumBlocksForType (Type *Ty, const DataLayout &DL) {
177
- return DL.getTypeSizeInBits (Ty) /
178
- (std::min<unsigned >(32u , cast<VectorType>(Ty)->getNumElements ()) *
179
- DL.getTypeSizeInBits (Ty->getScalarType ()));
180
- }
181
-
182
166
// Wipe all internal ConstantExprs out of V if it's a ConstantVector of function pointers
183
167
Value *GenXThreadPrivateMemory::NormalizeFuncPtrVec (Value *V, Instruction *InsPoint) {
184
168
V = breakConstantVector (cast<ConstantVector>(V), InsPoint, InsPoint);
@@ -671,9 +655,23 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
671
655
{NewDstTy, Pred->getType (),
672
656
(m_useGlobalMem ? Offset : EltsOffset)->getType ()});
673
657
658
+ // 32u is max exec_size allowed (see GenXCisaBuilder.cpp:buildIntrinsic
659
+ // GetExecSize lambda) For svm.gather/scatter:
660
+ // BlockSize is inferred from vec elem type
661
+ // BlockNum should be TotalMemSize / (ExecSize * BlockSize)
662
+ // where TotalMemSize is a total amount of mem read/written for
663
+ // gather/scatter
664
+ // TODO: revise NumBlocks for non-svm case
674
665
unsigned NumBlocks =
675
- (m_useGlobalMem) ? getNumBlocksForType (NewDstTy, *m_DL) : ValueEltSz;
676
- Value *logNumBlocks = ConstantInt::get (I32Ty, genx::log2 (NumBlocks));
666
+ (m_useGlobalMem)
667
+ ? genx::log2 (m_DL->getTypeSizeInBits (NewDstTy) /
668
+ (genx::ByteBits *
669
+ std::min<unsigned >(
670
+ 32u , cast<VectorType>(NewDstTy)->getNumElements ()) *
671
+ (m_DL->getTypeSizeInBits (NewDstTy->getScalarType ()) /
672
+ genx::ByteBits)))
673
+ : genx::log2 (ValueEltSz);
674
+ Value *logNumBlocks = ConstantInt::get (I32Ty, NumBlocks);
677
675
Value *Scale = ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 );
678
676
Value *Surface =
679
677
ConstantInt::get (I32Ty, visa::getReservedSurfaceIndex (m_stack));
@@ -708,7 +706,6 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
708
706
IGC_ASSERT (isa<VectorType>(OrigValueTy));
709
707
unsigned EltSz = 0 ;
710
708
std::tie (ValueOp, EltSz) = NormalizeVector (ValueOp, ValueOp->getType (), CI);
711
- auto *NewDstTy = ValueOp->getType ();
712
709
713
710
Value *Pred = CI->getArgOperand (0 );
714
711
Value *EltsOffset = CI->getArgOperand (2 );
@@ -728,33 +725,26 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
728
725
Value *Offset = lookForPtrReplacement (ScatterPtr);
729
726
Offset = ZExtOrTruncIfNeeded (Offset, m_useGlobalMem ? I64Ty : I32Ty, CI);
730
727
731
- if (m_useGlobalMem) {
732
- Offset =
733
- FormEltsOffsetVectorForSVM (lookForTruncOffset (Offset), EltsOffset, CI);
734
- if (!Offset->getType ()->getScalarType ()->isIntegerTy (64 ))
735
- Offset = CastInst::CreateZExtOrBitCast (
736
- Offset,
737
- IGCLLVM::FixedVectorType::get (
738
- I64Ty, cast<VectorType>(EltsOffset->getType ())->getNumElements ()),
739
- " " , CI);
740
- }
728
+ if (m_useGlobalMem)
729
+ EltsOffset = FormEltsOffsetVectorForSVM (Offset, EltsOffset, CI);
741
730
742
731
Function *F = GenXIntrinsic::getGenXDeclaration (
743
732
CI->getModule (), IID,
744
- {Pred->getType (), Offset ->getType (),
733
+ {Pred->getType (), EltsOffset ->getType (),
745
734
ValueOp->getType ()});
746
735
747
- unsigned NumBlocks =
748
- (m_useGlobalMem) ? getNumBlocksForType (NewDstTy, *m_DL) : EltSz;
749
- Value *logNumBlocks = ConstantInt::get (I32Ty, NumBlocks);
750
- Value *Surface =
751
- ConstantInt::get (I32Ty, visa::getReservedSurfaceIndex (m_stack));
736
+ unsigned logNumBlocks = genx::log2 (EltSz);
737
+ unsigned Scale = 0 ; // scale is always 0
738
+ Value *Surface = ConstantInt::get (I32Ty,
739
+ visa::getReservedSurfaceIndex (m_stack));
752
740
CallInst *ScatterStScaled =
753
741
m_useGlobalMem
754
- ? IntrinsicInst::Create (F, {Pred, logNumBlocks, Offset, ValueOp})
742
+ ? IntrinsicInst::Create (
743
+ F,
744
+ {Pred, ConstantInt::get (I32Ty, logNumBlocks), EltsOffset, ValueOp})
755
745
: IntrinsicInst::Create (
756
- F, {Pred, logNumBlocks,
757
- ConstantInt::get (Type::getInt16Ty (*m_ctx), 0 ), Surface,
746
+ F, {Pred, ConstantInt::get (I32Ty, logNumBlocks) ,
747
+ ConstantInt::get (Type::getInt16Ty (*m_ctx), Scale ), Surface,
758
748
Offset, EltsOffset, ValueOp});
759
749
ScatterStScaled->insertAfter (CI);
760
750
m_scatter.push_back (ScatterStScaled);
@@ -1105,7 +1095,7 @@ class SVMChecker {
1105
1095
return LoadsThreshold + 1 ;
1106
1096
}
1107
1097
} else if (isa<PHINode>(V) || isa<ICmpInst>(V)) {
1108
- // do not go thru phi as cycles may appear and
1098
+ // do not go thru phi as loops may appear and
1109
1099
// it doesn't seem necessary for the analysis now
1110
1100
return 0 ;
1111
1101
}
@@ -1119,17 +1109,6 @@ class SVMChecker {
1119
1109
bool operator ()(Value *V) { return checkSVMNecessary (V) > LoadsThreshold; }
1120
1110
};
1121
1111
1122
- void GenXThreadPrivateMemory::switchStack () {
1123
- LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
1124
- // TODO: relax hasLongLong condition once stack is redesigned to support
1125
- // emulation
1126
- IGC_ASSERT (m_M);
1127
- IGC_ASSERT (m_ST->hasLongLong () && m_ST->isOCLRuntime ());
1128
- // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1129
- m_M->addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem" , 1 );
1130
- m_useGlobalMem = true ;
1131
- }
1132
-
1133
1112
void GenXThreadPrivateMemory::addUsers (Value *V) {
1134
1113
IGC_ASSERT (isa<Instruction>(V) || isa<Argument>(V));
1135
1114
for (const auto &Usr : V->users ()) {
@@ -1182,15 +1161,19 @@ void GenXThreadPrivateMemory::addUsersIfNeeded(Value *V) {
1182
1161
}
1183
1162
1184
1163
bool GenXThreadPrivateMemory::runOnModule (Module &M) {
1185
- m_M = &M;
1186
1164
m_ST = &getAnalysis<TargetPassConfig>()
1187
1165
.getTM <GenXTargetMachine>()
1188
1166
.getGenXSubtarget ();
1189
1167
for (auto &F : M)
1190
1168
visit (F);
1191
1169
if (!m_useGlobalMem &&
1192
- std::any_of (m_alloca.begin (), m_alloca.end (), SVMChecker ()))
1193
- switchStack ();
1170
+ std::find_if (m_alloca.begin (), m_alloca.end (), SVMChecker ()) !=
1171
+ m_alloca.end ()) {
1172
+ LLVM_DEBUG (dbgs () << " Switching TPM to SVM\n " );
1173
+ // TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1174
+ M.addModuleFlag (Module::ModFlagBehavior::Error, " genx.useGlobalMem" , 1 );
1175
+ m_useGlobalMem = true ;
1176
+ }
1194
1177
bool Result = false ;
1195
1178
for (auto &F : M)
1196
1179
Result |= runOnFunction (F);
@@ -1259,52 +1242,42 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
1259
1242
m_AIUsers.pop ();
1260
1243
1261
1244
addUsersIfNeeded (I);
1262
- bool ChangeRequired = false ;
1263
- if (auto *LdI = dyn_cast<LoadInst>(I)) {
1245
+
1246
+ if (auto *LdI = dyn_cast<LoadInst>(I))
1264
1247
Changed |= replaceLoad (LdI);
1265
- ChangeRequired = true ;
1266
- } else if (auto *StI = dyn_cast<StoreInst>(I)) {
1248
+ else if (auto *StI = dyn_cast<StoreInst>(I))
1267
1249
Changed |= replaceStore (StI);
1268
- ChangeRequired = true ;
1269
- } else if (auto *PTI = dyn_cast<PtrToIntInst>(I)) {
1250
+ else if (auto *PTI = dyn_cast<PtrToIntInst>(I))
1270
1251
Changed |= replacePTI (PTI);
1271
- ChangeRequired = true ;
1272
- } else if (auto *AddrCast = dyn_cast<AddrSpaceCastInst>(I)) {
1252
+ else if (auto * AddrCast = dyn_cast<AddrSpaceCastInst>(I))
1273
1253
Changed |= replaceAddrSpaceCast (AddrCast);
1274
- ChangeRequired = true ;
1275
- } else if (isa<IntToPtrInst>(I) || isa<BitCastInst>(I)) {
1254
+ else if (isa<IntToPtrInst>(I) || isa<BitCastInst>(I)) {
1276
1255
// resolve all IntToPtr users and remove it.
1277
1256
if (I->use_empty ()) {
1278
1257
I->eraseFromParent ();
1279
1258
Changed = true ;
1280
1259
}
1281
1260
} else if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) {
1282
1261
unsigned ID = GenXIntrinsic::getAnyIntrinsicID (CI);
1283
- if (ID == GenXIntrinsic::genx_gather_private) {
1262
+ if (ID == GenXIntrinsic::genx_gather_private)
1284
1263
Changed |= replaceGatherPrivate (CI);
1285
- ChangeRequired = true ;
1286
- } else if (ID == GenXIntrinsic::genx_scatter_private) {
1264
+ else if (ID == GenXIntrinsic::genx_scatter_private)
1287
1265
Changed |= replaceScatterPrivate (CI);
1288
- ChangeRequired = true ;
1289
- } else if (ID == Intrinsic::lifetime_start ||
1290
- ID == Intrinsic::lifetime_end) {
1266
+ else if (ID == Intrinsic::lifetime_start ||
1267
+ ID == Intrinsic::lifetime_end) {
1291
1268
CI->eraseFromParent ();
1292
1269
Changed = true ;
1293
1270
}
1294
1271
} else if (PHINode *Phi = dyn_cast<PHINode>(I)) {
1295
- if (isa<PointerType>(Phi->getType ())) {
1272
+ if (isa<PointerType>(Phi->getType ()))
1296
1273
Changed |= replacePhi (Phi);
1297
- ChangeRequired = true ;
1298
- }
1299
1274
} else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
1300
- if (isa<PointerType>(Sel->getType ())) {
1275
+ if (isa<PointerType>(Sel->getType ()))
1301
1276
Changed |= replaceSelect (Sel);
1302
- ChangeRequired = true ;
1303
- }
1304
1277
}
1305
1278
1306
1279
if (m_AIUsers.empty ()) {
1307
- if (!Changed && ChangeRequired )
1280
+ if (!Changed)
1308
1281
report_fatal_error (" Thread private memory: cannot resolve all alloca uses" );
1309
1282
Changed = false ;
1310
1283
collectEachPossibleTPMUsers ();
@@ -1349,37 +1322,3 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
1349
1322
void GenXThreadPrivateMemory::visitAllocaInst (AllocaInst &I) {
1350
1323
m_alloca.push_back (&I);
1351
1324
}
1352
-
1353
- void GenXThreadPrivateMemory::visitFunction (Function &F) {
1354
- if (GenXIntrinsic::getAnyIntrinsicID (&F) != GenXIntrinsic::not_any_intrinsic)
1355
- return ;
1356
- // here we don't use genx::KernelMetadata as it's only able to
1357
- // deal with kernels while we want to look at functions as well
1358
- NamedMDNode *Named =
1359
- F.getParent ()->getNamedMetadata (genx::FunctionMD::GenXKernels);
1360
- if (!Named)
1361
- return ;
1362
-
1363
- auto NodeIt =
1364
- std::find_if (Named->op_begin (), Named->op_end (), [&F](MDNode *N) {
1365
- return N->getNumOperands () > KernelMDOp::ArgTypeDescs &&
1366
- getValueAsMetadata (N->getOperand (KernelMDOp::FunctionRef)) == &F;
1367
- });
1368
- if (NodeIt == Named->op_end ())
1369
- return ;
1370
- auto *Node = *NodeIt;
1371
- if (Node->getNumOperands () <= KernelMDOp::ArgTypeDescs)
1372
- return ;
1373
-
1374
- MDNode *ArgDescNode =
1375
- cast<MDNode>(Node->getOperand (KernelMDOp::ArgTypeDescs));
1376
-
1377
- for (auto &Arg : F.args ())
1378
- if (ArgDescNode->getNumOperands () > Arg.getArgNo () &&
1379
- cast<MDString>(ArgDescNode->getOperand (Arg.getArgNo ()))
1380
- ->getString ()
1381
- .find_lower (" svmptr_t" ) != StringRef::npos) {
1382
- switchStack ();
1383
- m_args.push_back (&Arg);
1384
- }
1385
- }
0 commit comments