Skip to content

Commit 2219f46

Browse files
jfuentessys_zuul
authored andcommitted
Implement tagging mechanism for generic pointers. Address space tag goes on bits[61:63] of 64-bit generic pointers.
Tags used are 001 for private, 010 for local, and 000/111 global address space. Change-Id: I3af1b947c926aac23344b50ca2d35bdf5919b7a1
1 parent f6e6a70 commit 2219f46

File tree

2 files changed

+170
-135
lines changed

2 files changed

+170
-135
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8148,8 +8148,46 @@ void EmitPass::emitPtrToInt(llvm::PtrToIntInst* P2I)
81488148

81498149
void EmitPass::emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast)
81508150
{
8151+
// Tags are used to determine the address space of generic pointers
8152+
// casted from private, local or global pointers.
8153+
// Bit[60:63] are used for this purpose. bit[60] is reserved for future use.
8154+
// Address space tag on bit[61:63] can be:
8155+
// 001: private
8156+
// 010: local
8157+
// 000/111: global
8158+
81518159
CVariable* srcV = GetSymbol(addrSpaceCast->getOperand(0));
8152-
m_encoder->Cast(m_destination, srcV);
8160+
if (addrSpaceCast->getDestAddressSpace() == ADDRESS_SPACE_GENERIC)
8161+
{
8162+
if (addrSpaceCast->getSrcAddressSpace() == ADDRESS_SPACE_PRIVATE)
8163+
{
8164+
CVariable* pTempVar = m_currShader->GetNewVariable(
8165+
numLanes(m_currShader->m_SIMDSize),
8166+
ISA_TYPE_UQ,
8167+
m_currShader->getGRFAlignment(), m_destination->IsUniform());
8168+
8169+
m_encoder->Copy(pTempVar, m_currShader->ImmToVariable(1ULL << 61, ISA_TYPE_UQ));
8170+
8171+
m_encoder->Or(m_destination, srcV, pTempVar);
8172+
}
8173+
else if (addrSpaceCast->getSrcAddressSpace() == ADDRESS_SPACE_LOCAL)
8174+
{
8175+
CVariable* pTempVar = m_currShader->GetNewVariable(
8176+
numLanes(m_currShader->m_SIMDSize),
8177+
ISA_TYPE_UQ,
8178+
EALIGN_GRF, true);
8179+
8180+
m_encoder->Copy(pTempVar, m_currShader->ImmToVariable(1ULL << 62, ISA_TYPE_UQ));
8181+
8182+
m_encoder->Or(m_destination, srcV, pTempVar);
8183+
}
8184+
// else ADDRESS_SPACE_GLOBAL
8185+
// nop
8186+
}
8187+
else
8188+
{
8189+
m_encoder->Cast(m_destination, srcV);
8190+
}
81538191
m_encoder->Push();
81548192
}
81558193

IGC/Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.cpp

Lines changed: 131 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,6 @@ bool GenericAddressAnalysis::runOnFunction(Function& F)
9292
Ty = GEP->getPointerOperandType();
9393
auto PT = dyn_cast<PointerType>(Ty);
9494
if (PT && PT->getAddressSpace() == ADDRESS_SPACE_GENERIC) {
95-
auto implicitArgs = ImplicitArgs(F, getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils());
96-
SmallVector<ImplicitArg::ArgType, 3> args;
97-
args.push_back(ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS);
98-
args.push_back(ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_SIZE);
99-
args.push_back(ImplicitArg::PRIVATE_MEMORY_STATELESS_SIZE);
100-
ImplicitArgs::addImplicitArgs(F, args, getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils());
10195
return true;
10296
}
10397
}
@@ -136,9 +130,7 @@ namespace {
136130
Module* getModule() { return m_module; }
137131

138132
private:
139-
Value* addIsAddrSpaceComparison(Value* pointer, Instruction* insertPoint, unsigned targetAS);
140133
Type* getPointerAsIntType(LLVMContext& Ctx, unsigned AS);
141-
Value* getAddrSpaceWindowEndAddress(Instruction& insertPoint, unsigned targetAS);
142134
void resolveGAS(Instruction& I, Value* pointerOperand, unsigned targetAS);
143135
};
144136

@@ -212,37 +204,6 @@ Type* GenericAddressDynamicResolution::getPointerAsIntType(LLVMContext& ctx, con
212204
return IntegerType::get(ctx, ptrBits);
213205
}
214206

215-
Value* GenericAddressDynamicResolution::addIsAddrSpaceComparison(Value* pointer, Instruction* insertPoint, const unsigned targetAS)
216-
{
217-
Function* func = insertPoint->getParent()->getParent();
218-
219-
ImplicitArgs implicitArgs = ImplicitArgs(*func, getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils());
220-
Value* windowStartPtr = targetAS == ADDRESS_SPACE_LOCAL
221-
? implicitArgs.getImplicitArg(*func, ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS)
222-
: implicitArgs.getImplicitArg(*func, ImplicitArg::PRIVATE_BASE);
223-
224-
Type* intPtrTy = getPointerAsIntType(pointer->getContext(), ADDRESS_SPACE_GENERIC);
225-
226-
// (ptr >= window_start) & [ptr < (window_start + window_size)]
227-
Value* ptrAsInt = PtrToIntInst::Create(Instruction::PtrToInt, pointer, intPtrTy, "", insertPoint);
228-
Value* windowStartAsInt = nullptr;
229-
230-
if (windowStartPtr) {
231-
windowStartAsInt = PtrToIntInst::Create(Instruction::PtrToInt, windowStartPtr, intPtrTy, "", insertPoint);
232-
}
233-
else {
234-
// Kernel might not have implicit argument for ImplicitArg::PRIVATE_BASE
235-
windowStartAsInt = ConstantInt::get(intPtrTy, 0);
236-
}
237-
238-
Value* windowEnd = getAddrSpaceWindowEndAddress(*insertPoint, targetAS);
239-
Value* cmpLowerBound = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGE, ptrAsInt, windowStartAsInt, "CmpWindowLowerBound", insertPoint);
240-
Value* cmpUpperBound = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, ptrAsInt, windowEnd, "CmpWindowUpperBound", insertPoint);
241-
Value* isInWindow = BinaryOperator::CreateAnd(cmpLowerBound, cmpUpperBound, "isPtrInWindow", insertPoint);
242-
243-
return isInWindow;
244-
}
245-
246207
bool GenericAddressDynamicResolution::visitLoadStoreInst(Instruction& I)
247208
{
248209
bool changed = false;
@@ -271,101 +232,99 @@ bool GenericAddressDynamicResolution::visitLoadStoreInst(Instruction& I)
271232
return changed;
272233
}
273234

274-
Value* GenericAddressDynamicResolution::getAddrSpaceWindowEndAddress(Instruction& insertPoint, const unsigned targetAS)
275-
{
276-
Function* pCurrentFunc = insertPoint.getParent()->getParent();
277-
ImplicitArgs implicitArgs = ImplicitArgs(*pCurrentFunc, getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils());
278-
Argument* windowStart = nullptr, * windowSize = nullptr;
279-
280-
if (targetAS == ADDRESS_SPACE_LOCAL) {
281-
windowStart = implicitArgs.getImplicitArg(*pCurrentFunc, ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS);
282-
windowSize = implicitArgs.getImplicitArg(*pCurrentFunc, ImplicitArg::LOCAL_MEMORY_STATELESS_WINDOW_SIZE);
283-
}
284-
else if (targetAS == ADDRESS_SPACE_PRIVATE) {
285-
windowStart = implicitArgs.getImplicitArg(*pCurrentFunc, ImplicitArg::PRIVATE_BASE);
286-
windowSize = implicitArgs.getImplicitArg(*pCurrentFunc, ImplicitArg::PRIVATE_MEMORY_STATELESS_SIZE);
287-
}
288-
else
289-
assert(false && "Unknown AddrSpace");
290-
291-
if ((windowStart != nullptr) && (windowSize != nullptr)) {
292-
Type* intPtrTy = getPointerAsIntType(windowStart->getContext(), ADDRESS_SPACE_GENERIC);
293-
Value* windowStartAsInt = PtrToIntInst::Create(Instruction::PtrToInt, windowStart, intPtrTy, "", &insertPoint);
294-
Value* windowSizeAsInt = CastInst::CreateZExtOrBitCast(windowSize, intPtrTy, "", &insertPoint);
295-
Value* windowEnd = BinaryOperator::CreateAdd(windowStartAsInt, windowSizeAsInt, "localWindowEnd", &insertPoint);
296-
return windowEnd;
297-
}
298-
else if ((windowStart == nullptr) && (windowSize != nullptr)) {
299-
// Assume start from 0 if windowStart is not defined.
300-
Type* intPtrTy = getPointerAsIntType(pCurrentFunc->getContext(), ADDRESS_SPACE_GENERIC);
301-
return CastInst::CreateZExtOrBitCast(windowSize, intPtrTy, "", &insertPoint);
302-
}
303-
else
304-
assert(false && "AddrSpace without limit");
305-
306-
return ConstantInt::get(Type::getInt32Ty(insertPoint.getContext()), 0);
307-
}
308-
309235
void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerOperand, const unsigned targetAS)
310236
{
311-
Value* isPtrInLocalWindow = addIsAddrSpaceComparison(pointerOperand, &I, targetAS);
312-
PointerType* pointerType = dyn_cast<PointerType>(pointerOperand->getType());
237+
// Every time there is a load/store from/to a GAS pointer, we have to resolve
238+
// its corresponding address space by looking at its tag on bits[61:63].
239+
// First, the GAS' tag is obtained to then perform the load/store
240+
// with the corresponding address space.
241+
313242
IRBuilder<> builder(&I);
243+
PointerType* pointerType = dyn_cast<PointerType>(pointerOperand->getType());
244+
ConstantInt* privateTag = builder.getInt64(1); // tag 001
245+
ConstantInt* localTag = builder.getInt64(2); // tag 010
314246

247+
Type* intPtrTy = getPointerAsIntType(pointerOperand->getContext(), ADDRESS_SPACE_GENERIC);
248+
Value* ptrAsInt = PtrToIntInst::Create(Instruction::PtrToInt, pointerOperand, intPtrTy, "", &I);
249+
// Get actual tag
250+
Value* tag = builder.CreateLShr(ptrAsInt, ConstantInt::get(ptrAsInt->getType(), 61));
251+
252+
// Three cases for private, local and global pointers
315253
BasicBlock* currentBlock = I.getParent();
316254
BasicBlock* convergeBlock = currentBlock->splitBasicBlock(&I);
317-
BasicBlock* localLoadBlock = BasicBlock::Create(I.getContext(), "LocalLoadBlock", convergeBlock->getParent(), convergeBlock);
318-
BasicBlock* nonLocalLoadBlock = BasicBlock::Create(I.getContext(), "GlobalPrivateLoadBlock", convergeBlock->getParent(), convergeBlock);
255+
BasicBlock* privateBlock = BasicBlock::Create(I.getContext(), "PrivateBlock", convergeBlock->getParent(), convergeBlock);
256+
BasicBlock* localBlock = BasicBlock::Create(I.getContext(), "LocalBlock", convergeBlock->getParent(), convergeBlock);
257+
BasicBlock* globalBlock = BasicBlock::Create(I.getContext(), "GlobalBlock", convergeBlock->getParent(), convergeBlock);
319258

320259
Value* localLoad = nullptr;
321-
Value* nonLocalLoad = nullptr;
260+
Value* privateLoad = nullptr;
261+
Value* globalLoad = nullptr;
262+
263+
// Private
264+
{
265+
IRBuilder<> privateBuilder(privateBlock);
266+
PointerType* ptrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_PRIVATE);
267+
Value* privatePtr = privateBuilder.CreateAddrSpaceCast(pointerOperand, ptrType);
322268

323-
// if is_local(ptr)
269+
if (LoadInst* LI = dyn_cast<LoadInst>(&I))
270+
{
271+
privateLoad = privateBuilder.CreateAlignedLoad(privatePtr, LI->getAlignment(), LI->isVolatile(), "privateLoad");
272+
}
273+
else if (StoreInst* SI = dyn_cast<StoreInst>(&I))
274+
{
275+
privateBuilder.CreateAlignedStore(I.getOperand(0), privatePtr, SI->getAlignment(), SI->isVolatile());
276+
}
277+
privateBuilder.CreateBr(convergeBlock);
278+
}
279+
280+
// Local
324281
{
325-
IRBuilder<> localBuilder(localLoadBlock);
326-
PointerType* localPtrType = pointerType->getElementType()->getPointerTo(targetAS);
282+
IRBuilder<> localBuilder(localBlock);
283+
PointerType* localPtrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_LOCAL);
327284
Value* localPtr = localBuilder.CreateAddrSpaceCast(pointerOperand, localPtrType);
328-
if (LoadInst * LI = dyn_cast<LoadInst>(&I)) {
285+
if (LoadInst* LI = dyn_cast<LoadInst>(&I))
286+
{
329287
localLoad = localBuilder.CreateAlignedLoad(localPtr, LI->getAlignment(), LI->isVolatile(), "localLoad");
330288
}
331-
else if (StoreInst * SI = dyn_cast<StoreInst>(&I)) {
289+
else if (StoreInst* SI = dyn_cast<StoreInst>(&I))
290+
{
332291
localBuilder.CreateAlignedStore(I.getOperand(0), localPtr, SI->getAlignment(), SI->isVolatile());
333292
}
334-
else {
335-
// Inst I is a to_local(pointerOperand) call, and we can use localPtr as I's result.
336-
localLoad = localPtr;
337-
}
338293
localBuilder.CreateBr(convergeBlock);
339294
}
340295

341-
// else (is either global or private)
296+
// Global
342297
{
343-
IRBuilder<> nonLocalBuilder(nonLocalLoadBlock);
344-
PointerType* ptrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_GLOBAL_OR_PRIVATE);
345-
Value* nonLocalPtr = nonLocalBuilder.CreateAddrSpaceCast(pointerOperand, ptrType);
298+
IRBuilder<> globalBuilder(globalBlock);
299+
PointerType* ptrType = pointerType->getElementType()->getPointerTo(ADDRESS_SPACE_GLOBAL);
300+
Value* globalPtr = globalBuilder.CreateAddrSpaceCast(pointerOperand, ptrType);
346301

347-
if (LoadInst * LI = dyn_cast<LoadInst>(&I)) {
348-
nonLocalLoad = nonLocalBuilder.CreateAlignedLoad(nonLocalPtr, LI->getAlignment(), LI->isVolatile(), "globalOrPrivateLoad");
349-
}
350-
else if (StoreInst * SI = dyn_cast<StoreInst>(&I)) {
351-
nonLocalBuilder.CreateAlignedStore(I.getOperand(0), nonLocalPtr, SI->getAlignment(), SI->isVolatile());
302+
if (LoadInst* LI = dyn_cast<LoadInst>(&I))
303+
{
304+
globalLoad = globalBuilder.CreateAlignedLoad(globalPtr, LI->getAlignment(), LI->isVolatile(), "globalLoad");
352305
}
353-
else {
354-
// Inst I is a to_local(pointerOperand) call, and we can use null as I's result.
355-
nonLocalLoad = Constant::getNullValue(pointerType->getElementType()->getPointerTo(targetAS));
306+
else if (StoreInst* SI = dyn_cast<StoreInst>(&I))
307+
{
308+
globalBuilder.CreateAlignedStore(I.getOperand(0), globalPtr, SI->getAlignment(), SI->isVolatile());
356309
}
357-
nonLocalBuilder.CreateBr(convergeBlock);
310+
globalBuilder.CreateBr(convergeBlock);
358311
}
359312

360313
currentBlock->getTerminator()->eraseFromParent();
361314
builder.SetInsertPoint(currentBlock);
362-
builder.CreateCondBr(isPtrInLocalWindow, localLoadBlock, nonLocalLoadBlock);
363315

364-
if ((localLoad != nullptr) && (nonLocalLoad != nullptr)) {
316+
SwitchInst* switchTag = builder.CreateSwitch(tag, globalBlock, 2);
317+
// Based on tag there are two cases 001: private, 010: local, 000/111 global (otherwise)
318+
switchTag->addCase(privateTag, privateBlock);
319+
switchTag->addCase(localTag, localBlock);
320+
321+
if ((privateLoad != nullptr) && (localLoad != nullptr) && (globalLoad != nullptr))
322+
{
365323
IRBuilder<> phiBuilder(&(*convergeBlock->begin()));
366-
PHINode* phi = phiBuilder.CreatePHI(I.getType(), 2, I.getName());
367-
phi->addIncoming(localLoad, localLoadBlock);
368-
phi->addIncoming(nonLocalLoad, nonLocalLoadBlock);
324+
PHINode* phi = phiBuilder.CreatePHI(I.getType(), 3, I.getName());
325+
phi->addIncoming(privateLoad, privateBlock);
326+
phi->addIncoming(localLoad, localBlock);
327+
phi->addIncoming(globalLoad, globalBlock);
369328
I.replaceAllUsesWith(phi);
370329
}
371330

@@ -375,48 +334,86 @@ void GenericAddressDynamicResolution::resolveGAS(Instruction& I, Value* pointerO
375334
bool GenericAddressDynamicResolution::visitIntrinsicCall(CallInst& I)
376335
{
377336
bool changed = false;
378-
Function* pCurrentFunc = I.getParent()->getParent();
379337
Function* pCalledFunc = I.getCalledFunction();
380-
if (pCalledFunc == nullptr) {
338+
if (pCalledFunc == nullptr)
339+
{
381340
// Indirect call
382341
return false;
383342
}
384343

385-
ImplicitArgs implicitArgs = ImplicitArgs(*pCurrentFunc, getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils());
386344
StringRef funcName = pCalledFunc->getName();
387345

388-
if ((funcName == "__builtin_IB_to_local") || (funcName == "__builtin_IB_to_private")) {
346+
if ((funcName == "__builtin_IB_to_private") || (funcName == "__builtin_IB_to_local")
347+
|| (funcName == "__builtin_IB_to_global"))
348+
{
389349
assert(I.getNumArgOperands() == 1);
390350
Value* arg = I.getArgOperand(0);
351+
PointerType* dstType = dyn_cast<PointerType>(I.getType());
391352
const unsigned targetAS = cast<PointerType>(I.getType())->getAddressSpace();
392353

393-
//
394-
// First to check whether we can simplify trivial cases like addrspacecast from
395-
// global/private to local, or from local to local.
396-
//
397-
if (AddrSpaceCastInst * AI = dyn_cast<AddrSpaceCastInst>(arg)) {
398-
// to_local(__global*) -> null
399-
// to_local(__private*) -> null
400-
PointerType* ptrType = cast<PointerType>(AI->getSrcTy());
401-
if ((ptrType->getAddressSpace() != targetAS) && (ptrType->getAddressSpace() != ADDRESS_SPACE_GENERIC)) {
402-
Constant* np = Constant::getNullValue(I.getType());
403-
I.replaceAllUsesWith(np);
404-
I.eraseFromParent();
405-
changed = true;
406-
}
407-
else if (ptrType->getAddressSpace() == targetAS) {
408-
// to_local(__local*) -> __local*
409-
I.replaceAllUsesWith(AI->getOperand(0));
410-
I.eraseFromParent();
411-
changed = true;
412-
}
354+
IRBuilder<> builder(&I);
355+
PointerType* pointerType = dyn_cast<PointerType>(arg->getType());
356+
ConstantInt* privateTag = builder.getInt64(1); // tag 001
357+
ConstantInt* localTag = builder.getInt64(2); // tag 010
358+
ConstantInt* globalTag = builder.getInt64(0); // tag 000
359+
360+
Type* intPtrTy = getPointerAsIntType(arg->getContext(), ADDRESS_SPACE_GENERIC);
361+
Value* ptrAsInt = PtrToIntInst::Create(Instruction::PtrToInt, arg, intPtrTy, "", &I);
362+
// Get actual tag
363+
Value* tag = builder.CreateLShr(ptrAsInt, ConstantInt::get(ptrAsInt->getType(), 61));
364+
365+
Value* newPtr = nullptr;
366+
Value* newPtrNull = nullptr;
367+
Value* cmpTag = nullptr;
368+
369+
// Tag was already obtained from GAS pointer, now we check its address space (AS)
370+
// and the target AS for this intrinsic call
371+
if (targetAS == ADDRESS_SPACE_PRIVATE)
372+
cmpTag = builder.CreateICmpEQ(tag, privateTag, "cmpTag");
373+
else if (targetAS == ADDRESS_SPACE_LOCAL)
374+
cmpTag = builder.CreateICmpEQ(tag, localTag, "cmpTag");
375+
else if (targetAS == ADDRESS_SPACE_GLOBAL)
376+
cmpTag = builder.CreateICmpEQ(tag, globalTag, "cmpTag");
377+
378+
// Two cases:
379+
// 1: Generic pointer's AS matches with instrinsic's target AS
380+
// So we create the address space cast
381+
// 2: Generic pointer's AS does not match with instrinsic's target AS
382+
// So the instrinsic call returns NULL
383+
BasicBlock* currentBlock = I.getParent();
384+
BasicBlock* convergeBlock = currentBlock->splitBasicBlock(&I);
385+
BasicBlock* ifBlock = BasicBlock::Create(I.getContext(), "IfBlock",
386+
convergeBlock->getParent(), convergeBlock);
387+
BasicBlock* elseBlock = BasicBlock::Create(I.getContext(), "ElseBlock",
388+
convergeBlock->getParent(), convergeBlock);
389+
390+
// If Block
391+
{
392+
IRBuilder<> ifBuilder(ifBlock);
393+
PointerType* ptrType = pointerType->getElementType()->getPointerTo(targetAS);
394+
newPtr = ifBuilder.CreateAddrSpaceCast(arg, ptrType);
395+
ifBuilder.CreateBr(convergeBlock);
413396
}
414397

415-
// Add runtime check to resolve GAS for non-trivial cases.
416-
if (!changed) {
417-
resolveGAS(I, arg, targetAS);
418-
changed = true;
398+
// Else Block
399+
{
400+
IRBuilder<> elseBuilder(elseBlock);
401+
Value* ptrNull = Constant::getNullValue(I.getType());
402+
newPtrNull = elseBuilder.CreatePointerCast(ptrNull, dstType, "");
403+
elseBuilder.CreateBr(convergeBlock);
419404
}
405+
406+
currentBlock->getTerminator()->eraseFromParent();
407+
builder.SetInsertPoint(currentBlock);
408+
builder.CreateCondBr(cmpTag, ifBlock, elseBlock);
409+
410+
IRBuilder<> phiBuilder(&(*convergeBlock->begin()));
411+
PHINode* phi = phiBuilder.CreatePHI(I.getType(), 2, I.getName());
412+
phi->addIncoming(newPtr, ifBlock);
413+
phi->addIncoming(newPtrNull, elseBlock);
414+
I.replaceAllUsesWith(phi);
415+
I.eraseFromParent();
416+
changed = true;
420417
}
421418

422419
return changed;

0 commit comments

Comments
 (0)