Skip to content

Commit dc9e385

Browse files
zuban32sys_zuul
authored andcommitted
Sync change
Change-Id: I041490b99eba60cf492f534e79a7a830a4b84602
1 parent 56dd34b commit dc9e385

File tree

1 file changed

+50
-111
lines changed

1 file changed

+50
-111
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXThreadPrivateMemory.cpp

Lines changed: 50 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3636
#include "GenXTargetMachine.h"
3737
#include "GenXUtil.h"
3838
#include "GenXVisa.h"
39-
#include "vc/GenXOpts/Utils/KernelInfo.h"
4039

4140
#include "Probe/Assertion.h"
4241
#include "llvmWrapper/IR/DerivedTypes.h"
@@ -45,6 +44,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4544

4645
#include "llvm/ADT/SmallVector.h"
4746
#include "llvm/CodeGen/TargetPassConfig.h"
47+
#include "llvm/GenXIntrinsics/GenXMetadata.h"
4848
#include "llvm/IR/IRBuilder.h"
4949
#include "llvm/IR/InstVisitor.h"
5050
#include "llvm/IR/Intrinsics.h"
@@ -85,7 +85,6 @@ class GenXThreadPrivateMemory : public ModulePass,
8585
bool runOnFunction(Function &F);
8686

8787
void visitAllocaInst(AllocaInst &I);
88-
void visitFunction(Function &F);
8988

9089
private:
9190
bool replacePhi(PHINode *Phi);
@@ -105,13 +104,11 @@ class GenXThreadPrivateMemory : public ModulePass,
105104
std::pair<Value *, unsigned> NormalizeVector(Value *From, Type *To,
106105
Instruction *InsertBefore);
107106
Instruction *RestoreVectorAfterNormalization(Instruction *From, Type *To);
108-
void switchStack();
109107

110108
public:
111109
static char ID;
112110

113111
private:
114-
Module *m_M = nullptr;
115112
LLVMContext *m_ctx;
116113
const GenXSubtarget *m_ST;
117114
const DataLayout *m_DL;
@@ -166,19 +163,6 @@ static Value *ZExtOrTruncIfNeeded(Value *From, Type *To,
166163
return Res;
167164
}
168165

169-
// 32u is max exec_size allowed (see GenXCisaBuilder.cpp:buildIntrinsic
170-
// GetExecSize lambda) For svm.gather/scatter:
171-
// BlockSize is inferred from vec elem type
172-
// BlockNum should be TotalMemSize / (ExecSize * BlockSize)
173-
// where TotalMemSize is a total amount of mem read/written for
174-
// gather/scatter
175-
// TODO: revise this for non-svm case
176-
static int getNumBlocksForType(Type *Ty, const DataLayout &DL) {
177-
return DL.getTypeSizeInBits(Ty) /
178-
(std::min<unsigned>(32u, cast<VectorType>(Ty)->getNumElements()) *
179-
DL.getTypeSizeInBits(Ty->getScalarType()));
180-
}
181-
182166
// Wipe all internal ConstantExprs out of V if it's a ConstantVector of function pointers
183167
Value *GenXThreadPrivateMemory::NormalizeFuncPtrVec(Value *V, Instruction *InsPoint) {
184168
V = breakConstantVector(cast<ConstantVector>(V), InsPoint, InsPoint);
@@ -671,9 +655,23 @@ bool GenXThreadPrivateMemory::replaceGatherPrivate(CallInst *CI) {
671655
{NewDstTy, Pred->getType(),
672656
(m_useGlobalMem ? Offset : EltsOffset)->getType()});
673657

658+
// 32u is max exec_size allowed (see GenXCisaBuilder.cpp:buildIntrinsic
659+
// GetExecSize lambda) For svm.gather/scatter:
660+
// BlockSize is inferred from vec elem type
661+
// BlockNum should be TotalMemSize / (ExecSize * BlockSize)
662+
// where TotalMemSize is a total amount of mem read/written for
663+
// gather/scatter
664+
// TODO: revise NumBlocks for non-svm case
674665
unsigned NumBlocks =
675-
(m_useGlobalMem) ? getNumBlocksForType(NewDstTy, *m_DL) : ValueEltSz;
676-
Value *logNumBlocks = ConstantInt::get(I32Ty, genx::log2(NumBlocks));
666+
(m_useGlobalMem)
667+
? genx::log2(m_DL->getTypeSizeInBits(NewDstTy) /
668+
(genx::ByteBits *
669+
std::min<unsigned>(
670+
32u, cast<VectorType>(NewDstTy)->getNumElements()) *
671+
(m_DL->getTypeSizeInBits(NewDstTy->getScalarType()) /
672+
genx::ByteBits)))
673+
: genx::log2(ValueEltSz);
674+
Value *logNumBlocks = ConstantInt::get(I32Ty, NumBlocks);
677675
Value *Scale = ConstantInt::get(Type::getInt16Ty(*m_ctx), 0);
678676
Value *Surface =
679677
ConstantInt::get(I32Ty, visa::getReservedSurfaceIndex(m_stack));
@@ -708,7 +706,6 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
708706
IGC_ASSERT(isa<VectorType>(OrigValueTy));
709707
unsigned EltSz = 0;
710708
std::tie(ValueOp, EltSz) = NormalizeVector(ValueOp, ValueOp->getType(), CI);
711-
auto *NewDstTy = ValueOp->getType();
712709

713710
Value *Pred = CI->getArgOperand(0);
714711
Value *EltsOffset = CI->getArgOperand(2);
@@ -728,33 +725,26 @@ bool GenXThreadPrivateMemory::replaceScatterPrivate(CallInst *CI) {
728725
Value *Offset = lookForPtrReplacement(ScatterPtr);
729726
Offset = ZExtOrTruncIfNeeded(Offset, m_useGlobalMem ? I64Ty : I32Ty, CI);
730727

731-
if (m_useGlobalMem) {
732-
Offset =
733-
FormEltsOffsetVectorForSVM(lookForTruncOffset(Offset), EltsOffset, CI);
734-
if (!Offset->getType()->getScalarType()->isIntegerTy(64))
735-
Offset = CastInst::CreateZExtOrBitCast(
736-
Offset,
737-
IGCLLVM::FixedVectorType::get(
738-
I64Ty, cast<VectorType>(EltsOffset->getType())->getNumElements()),
739-
"", CI);
740-
}
728+
if (m_useGlobalMem)
729+
EltsOffset = FormEltsOffsetVectorForSVM(Offset, EltsOffset, CI);
741730

742731
Function *F = GenXIntrinsic::getGenXDeclaration(
743732
CI->getModule(), IID,
744-
{Pred->getType(), Offset->getType(),
733+
{Pred->getType(), EltsOffset->getType(),
745734
ValueOp->getType()});
746735

747-
unsigned NumBlocks =
748-
(m_useGlobalMem) ? getNumBlocksForType(NewDstTy, *m_DL) : EltSz;
749-
Value *logNumBlocks = ConstantInt::get(I32Ty, NumBlocks);
750-
Value *Surface =
751-
ConstantInt::get(I32Ty, visa::getReservedSurfaceIndex(m_stack));
736+
unsigned logNumBlocks = genx::log2(EltSz);
737+
unsigned Scale = 0; // scale is always 0
738+
Value *Surface = ConstantInt::get(I32Ty,
739+
visa::getReservedSurfaceIndex(m_stack));
752740
CallInst *ScatterStScaled =
753741
m_useGlobalMem
754-
? IntrinsicInst::Create(F, {Pred, logNumBlocks, Offset, ValueOp})
742+
? IntrinsicInst::Create(
743+
F,
744+
{Pred, ConstantInt::get(I32Ty, logNumBlocks), EltsOffset, ValueOp})
755745
: IntrinsicInst::Create(
756-
F, {Pred, logNumBlocks,
757-
ConstantInt::get(Type::getInt16Ty(*m_ctx), 0), Surface,
746+
F, {Pred, ConstantInt::get(I32Ty, logNumBlocks),
747+
ConstantInt::get(Type::getInt16Ty(*m_ctx), Scale), Surface,
758748
Offset, EltsOffset, ValueOp});
759749
ScatterStScaled->insertAfter(CI);
760750
m_scatter.push_back(ScatterStScaled);
@@ -1105,7 +1095,7 @@ class SVMChecker {
11051095
return LoadsThreshold + 1;
11061096
}
11071097
} else if (isa<PHINode>(V) || isa<ICmpInst>(V)) {
1108-
// do not go thru phi as cycles may appear and
1098+
// do not go thru phi as loops may appear and
11091099
// it doesn't seem necessary for the analysis now
11101100
return 0;
11111101
}
@@ -1119,17 +1109,6 @@ class SVMChecker {
11191109
bool operator()(Value *V) { return checkSVMNecessary(V) > LoadsThreshold; }
11201110
};
11211111

1122-
void GenXThreadPrivateMemory::switchStack() {
1123-
LLVM_DEBUG(dbgs() << "Switching TPM to SVM\n");
1124-
// TODO: relax hasLongLong condition once stack is redesigned to support
1125-
// emulation
1126-
IGC_ASSERT(m_M);
1127-
IGC_ASSERT(m_ST->hasLongLong() && m_ST->isOCLRuntime());
1128-
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1129-
m_M->addModuleFlag(Module::ModFlagBehavior::Error, "genx.useGlobalMem", 1);
1130-
m_useGlobalMem = true;
1131-
}
1132-
11331112
void GenXThreadPrivateMemory::addUsers(Value *V) {
11341113
IGC_ASSERT(isa<Instruction>(V) || isa<Argument>(V));
11351114
for (const auto &Usr : V->users()) {
@@ -1182,15 +1161,19 @@ void GenXThreadPrivateMemory::addUsersIfNeeded(Value *V) {
11821161
}
11831162

11841163
bool GenXThreadPrivateMemory::runOnModule(Module &M) {
1185-
m_M = &M;
11861164
m_ST = &getAnalysis<TargetPassConfig>()
11871165
.getTM<GenXTargetMachine>()
11881166
.getGenXSubtarget();
11891167
for (auto &F : M)
11901168
visit(F);
11911169
if (!m_useGlobalMem &&
1192-
std::any_of(m_alloca.begin(), m_alloca.end(), SVMChecker()))
1193-
switchStack();
1170+
std::find_if(m_alloca.begin(), m_alloca.end(), SVMChecker()) !=
1171+
m_alloca.end()) {
1172+
LLVM_DEBUG(dbgs() << "Switching TPM to SVM\n");
1173+
// TODO: move the name string to vc-intrinsics *MD::useGlobalMem
1174+
M.addModuleFlag(Module::ModFlagBehavior::Error, "genx.useGlobalMem", 1);
1175+
m_useGlobalMem = true;
1176+
}
11941177
bool Result = false;
11951178
for (auto &F : M)
11961179
Result |= runOnFunction(F);
@@ -1259,52 +1242,42 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
12591242
m_AIUsers.pop();
12601243

12611244
addUsersIfNeeded(I);
1262-
bool ChangeRequired = false;
1263-
if (auto *LdI = dyn_cast<LoadInst>(I)) {
1245+
1246+
if (auto *LdI = dyn_cast<LoadInst>(I))
12641247
Changed |= replaceLoad(LdI);
1265-
ChangeRequired = true;
1266-
} else if (auto *StI = dyn_cast<StoreInst>(I)) {
1248+
else if (auto *StI = dyn_cast<StoreInst>(I))
12671249
Changed |= replaceStore(StI);
1268-
ChangeRequired = true;
1269-
} else if (auto *PTI = dyn_cast<PtrToIntInst>(I)) {
1250+
else if (auto *PTI = dyn_cast<PtrToIntInst>(I))
12701251
Changed |= replacePTI(PTI);
1271-
ChangeRequired = true;
1272-
} else if (auto *AddrCast = dyn_cast<AddrSpaceCastInst>(I)) {
1252+
else if (auto* AddrCast = dyn_cast<AddrSpaceCastInst>(I))
12731253
Changed |= replaceAddrSpaceCast(AddrCast);
1274-
ChangeRequired = true;
1275-
} else if (isa<IntToPtrInst>(I) || isa<BitCastInst>(I)) {
1254+
else if (isa<IntToPtrInst>(I) || isa<BitCastInst>(I)) {
12761255
// resolve all IntToPtr users and remove it.
12771256
if (I->use_empty()) {
12781257
I->eraseFromParent();
12791258
Changed = true;
12801259
}
12811260
} else if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I)) {
12821261
unsigned ID = GenXIntrinsic::getAnyIntrinsicID(CI);
1283-
if (ID == GenXIntrinsic::genx_gather_private) {
1262+
if (ID == GenXIntrinsic::genx_gather_private)
12841263
Changed |= replaceGatherPrivate(CI);
1285-
ChangeRequired = true;
1286-
} else if (ID == GenXIntrinsic::genx_scatter_private) {
1264+
else if (ID == GenXIntrinsic::genx_scatter_private)
12871265
Changed |= replaceScatterPrivate(CI);
1288-
ChangeRequired = true;
1289-
} else if (ID == Intrinsic::lifetime_start ||
1290-
ID == Intrinsic::lifetime_end) {
1266+
else if (ID == Intrinsic::lifetime_start ||
1267+
ID == Intrinsic::lifetime_end) {
12911268
CI->eraseFromParent();
12921269
Changed = true;
12931270
}
12941271
} else if (PHINode *Phi = dyn_cast<PHINode>(I)) {
1295-
if (isa<PointerType>(Phi->getType())) {
1272+
if (isa<PointerType>(Phi->getType()))
12961273
Changed |= replacePhi(Phi);
1297-
ChangeRequired = true;
1298-
}
12991274
} else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
1300-
if (isa<PointerType>(Sel->getType())) {
1275+
if (isa<PointerType>(Sel->getType()))
13011276
Changed |= replaceSelect(Sel);
1302-
ChangeRequired = true;
1303-
}
13041277
}
13051278

13061279
if (m_AIUsers.empty()) {
1307-
if (!Changed && ChangeRequired)
1280+
if (!Changed)
13081281
report_fatal_error("Thread private memory: cannot resolve all alloca uses");
13091282
Changed = false;
13101283
collectEachPossibleTPMUsers();
@@ -1349,37 +1322,3 @@ bool GenXThreadPrivateMemory::runOnFunction(Function &F) {
13491322
void GenXThreadPrivateMemory::visitAllocaInst(AllocaInst &I) {
13501323
m_alloca.push_back(&I);
13511324
}
1352-
1353-
void GenXThreadPrivateMemory::visitFunction(Function &F) {
1354-
if (GenXIntrinsic::getAnyIntrinsicID(&F) != GenXIntrinsic::not_any_intrinsic)
1355-
return;
1356-
// here we don't use genx::KernelMetadata as it's only able to
1357-
// deal with kernels while we want to look at functions as well
1358-
NamedMDNode *Named =
1359-
F.getParent()->getNamedMetadata(genx::FunctionMD::GenXKernels);
1360-
if (!Named)
1361-
return;
1362-
1363-
auto NodeIt =
1364-
std::find_if(Named->op_begin(), Named->op_end(), [&F](MDNode *N) {
1365-
return N->getNumOperands() > KernelMDOp::ArgTypeDescs &&
1366-
getValueAsMetadata(N->getOperand(KernelMDOp::FunctionRef)) == &F;
1367-
});
1368-
if (NodeIt == Named->op_end())
1369-
return;
1370-
auto *Node = *NodeIt;
1371-
if (Node->getNumOperands() <= KernelMDOp::ArgTypeDescs)
1372-
return;
1373-
1374-
MDNode *ArgDescNode =
1375-
cast<MDNode>(Node->getOperand(KernelMDOp::ArgTypeDescs));
1376-
1377-
for (auto &Arg : F.args())
1378-
if (ArgDescNode->getNumOperands() > Arg.getArgNo() &&
1379-
cast<MDString>(ArgDescNode->getOperand(Arg.getArgNo()))
1380-
->getString()
1381-
.find_lower("svmptr_t") != StringRef::npos) {
1382-
switchStack();
1383-
m_args.push_back(&Arg);
1384-
}
1385-
}

0 commit comments

Comments
 (0)