Skip to content

Commit 64da687

Browse files
aratajewigcbot
authored andcommitted
Move bindless promotion to StatelessToStateful pass
The logic implemented in `PromoteStatelessToBindless` pass is deficient and cannot handle all the cases properly. For example: if a pointer is shared between raw load and atomic, the pointer gets changed to bindless thereby breaking the atomic which needs A64 pointer. The logic implemented in `StatelessToStateful` pass can handle all these cases properly, so it seems reasonable to reuse it for bindless promotion.
1 parent 4b63a66 commit 64da687

16 files changed

+1016
-66
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -735,19 +735,22 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
735735
}
736736

737737
if (ctx.type == ShaderType::OPENCL_SHADER &&
738-
static_cast<OpenCLProgramContext&>(ctx).
739-
m_InternalOptions.PromoteStatelessToBindless &&
740-
(static_cast<OpenCLProgramContext&>(ctx).
741-
m_InternalOptions.UseBindlessLegacyMode ||
742-
!ctx.getModuleMetaData()->compOpt.GreaterThan4GBBufferRequired)
743-
)
738+
static_cast<OpenCLProgramContext&>(ctx).m_InternalOptions.PromoteStatelessToBindless)
744739
{
745-
mpm.add(new PromoteStatelessToBindless());
740+
if (static_cast<OpenCLProgramContext&>(ctx).m_InternalOptions.UseBindlessLegacyMode)
741+
{
742+
mpm.add(new PromoteStatelessToBindless());
743+
}
744+
else if (!ctx.getModuleMetaData()->compOpt.GreaterThan4GBBufferRequired)
745+
{
746+
// Advanced bindless mode used by the regular OpenCL compilation path
747+
mpm.add(new StatelessToStateful(TargetAddressing::BINDLESS));
748+
}
746749
}
747750

748751
if (!isOptDisabled && useStatelessToStateful(ctx))
749752
{
750-
mpm.add(new StatelessToStateful());
753+
mpm.add(new StatelessToStateful(TargetAddressing::BINDFUL));
751754
}
752755

753756
// Light cleanup for subroutines after cloning. Note that the constant

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.cpp

Lines changed: 135 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
3838
IGC_INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3939
IGC_INITIALIZE_PASS_END(StatelessToStateful, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
4040

41+
static cl::opt<TargetAddressing> targetAddressingMode(
42+
"target-addressing-mode", cl::init(TargetAddressing::BINDFUL), cl::Hidden,
43+
cl::values(
44+
clEnumValN(TargetAddressing::BINDFUL, "bindful", "Set bindful as target addressing mode"),
45+
clEnumValN(TargetAddressing::BINDLESS, "bindless", "Set bindless as target addressing mode")
46+
),
47+
cl::desc("Set target addressing for stateful promotion"));
48+
4149
// This pass turns a global/constants address space (stateless) load/store into a stateful a load/store.
4250
//
4351
// The conservative approach is to search for any directly positively-indexed kernels argument, such as:
@@ -129,15 +137,11 @@ IGC_INITIALIZE_PASS_END(StatelessToStateful, PASS_FLAG, PASS_DESCRIPTION, PASS_C
129137

130138
char StatelessToStateful::ID = 0;
131139

132-
StatelessToStateful::StatelessToStateful()
140+
StatelessToStateful::StatelessToStateful() : FunctionPass(ID), m_targetAddressing(targetAddressingMode) {}
141+
142+
StatelessToStateful::StatelessToStateful(TargetAddressing addressing)
133143
: FunctionPass(ID),
134-
m_hasBufferOffsetArg(false),
135-
m_hasOptionalBufferOffsetArg(false),
136-
m_hasPositivePointerOffset(false),
137-
m_ACT(nullptr),
138-
m_pImplicitArgs(nullptr),
139-
m_pKernelArgs(nullptr),
140-
m_changed(false)
144+
m_targetAddressing(addressing)
141145
{
142146
initializeStatelessToStatefulPass(*PassRegistry::getPassRegistry());
143147
}
@@ -538,7 +542,7 @@ bool StatelessToStateful::isUntypedAtomic(const GenISAIntrinsic::ID intrinID)
538542
intrinID == GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64);
539543
}
540544

541-
unsigned StatelessToStateful::encodeStatefulAddrspace(unsigned uavIndex)
545+
unsigned StatelessToStateful::encodeBindfulAddrspace(unsigned uavIndex)
542546
{
543547
auto int32Ty = Type::getInt32Ty(m_Module->getContext());
544548
auto resourceNumber = ConstantInt::get(int32Ty, uavIndex);
@@ -555,8 +559,37 @@ void StatelessToStateful::promoteIntrinsic(InstructionInfo& II)
555559
Module* M = m_F->getParent();
556560
const DebugLoc& DL = I->getDebugLoc();
557561
GenISAIntrinsic::ID const intrinID = I->getIntrinsicID();
558-
559562
PointerType* pTy = PointerType::get(IGCLLVM::getNonOpaquePtrEltTy(II.ptr->getType()), II.getStatefulAddrSpace());
563+
564+
if (m_targetAddressing == TargetAddressing::BINDLESS)
565+
{
566+
Argument* srcOffset = m_pImplicitArgs->getNumberedImplicitArg(*m_F, ImplicitArg::BINDLESS_OFFSET, II.getBaseArgIndex());
567+
auto newBasePtr = IntToPtrInst::Create(Instruction::IntToPtr, srcOffset, pTy, "", I);
568+
if (intrinID == GenISAIntrinsic::GenISA_simdBlockRead)
569+
{
570+
Function* newBlockReadFunc = GenISAIntrinsic::getDeclaration(M,
571+
GenISAIntrinsic::GenISA_simdBlockReadBindless,
572+
{ I->getType(), newBasePtr->getType(), Type::getInt32Ty(M->getContext())});
573+
Instruction* newBlockRead = CallInst::Create(newBlockReadFunc, { newBasePtr, II.offset }, "", I);
574+
newBlockRead->setDebugLoc(I->getDebugLoc());
575+
I->replaceAllUsesWith(newBlockRead);
576+
I->eraseFromParent();
577+
}
578+
else if (intrinID == GenISAIntrinsic::GenISA_simdBlockWrite)
579+
{
580+
Function* newBlockWriteFunc = GenISAIntrinsic::getDeclaration(M,
581+
GenISAIntrinsic::GenISA_simdBlockWriteBindless,
582+
{ newBasePtr->getType(), I->getOperand(1)->getType(), Type::getInt32Ty(M->getContext())});
583+
Instruction* newBlockWrite = CallInst::Create(newBlockWriteFunc, { newBasePtr, I->getOperand(1), II.offset }, "", I);
584+
newBlockWrite->setDebugLoc(I->getDebugLoc());
585+
I->replaceAllUsesWith(newBlockWrite);
586+
I->eraseFromParent();
587+
}
588+
return;
589+
}
590+
591+
IGC_ASSERT(m_targetAddressing == TargetAddressing::BINDFUL);
592+
560593
Instruction* statefulPtr = IntToPtrInst::Create(Instruction::IntToPtr, II.offset, pTy, "", I);
561594
Instruction* statefulInst = nullptr;
562595

@@ -614,31 +647,51 @@ void StatelessToStateful::promoteLoad(InstructionInfo& II)
614647
LoadInst* I = cast<LoadInst>(II.statelessInst);
615648
PointerType* pTy = PointerType::get(I->getType(), II.getStatefulAddrSpace());
616649

617-
Instruction* statefulPtr = IntToPtrInst::Create(Instruction::IntToPtr, II.offset, pTy, "", I);
618-
Instruction* pLoad = new LoadInst(
619-
IGCLLVM::getNonOpaquePtrEltTy(statefulPtr->getType()),
620-
statefulPtr,
621-
"",
622-
I->isVolatile(),
623-
IGCLLVM::getAlign(*I), I->getOrdering(), I->getSyncScopeID(),
624-
I);
625-
626650
const DebugLoc& DL = I->getDebugLoc();
627-
statefulPtr->setDebugLoc(DL);
628-
pLoad->setDebugLoc(DL);
629651

630-
Value* ptr = I->getPointerOperand();
631-
PointerType* ptrType = dyn_cast<PointerType>(ptr->getType());
632-
if (ptrType && ptrType->getAddressSpace() == ADDRESS_SPACE_CONSTANT)
652+
if (m_targetAddressing == TargetAddressing::BINDLESS)
633653
{
634-
LLVMContext& context = I->getContext();
635-
MDString* const metadataName = MDString::get(context, "invariant.load");
636-
MDNode* node = MDNode::get(context, metadataName);
637-
pLoad->setMetadata(LLVMContext::MD_invariant_load, node);
654+
Argument* srcOffset = m_pImplicitArgs->getNumberedImplicitArg(*m_F, ImplicitArg::BINDLESS_OFFSET, II.getBaseArgIndex());
655+
auto newBasePtr = IntToPtrInst::Create(Instruction::IntToPtr, srcOffset, pTy, "", I);
656+
auto bindlessLoad = IGC::CreateLoadRawIntrinsic(I, cast<Instruction>(newBasePtr), II.offset);
657+
658+
newBasePtr->setDebugLoc(DL);
659+
bindlessLoad->setDebugLoc(DL);
660+
661+
I->replaceAllUsesWith(bindlessLoad);
662+
I->eraseFromParent();
638663
}
664+
else if (m_targetAddressing == TargetAddressing::BINDFUL)
665+
{
666+
auto newBasePtr = IntToPtrInst::Create(Instruction::IntToPtr, II.offset, pTy, "", I);
667+
auto bindfulLoad = new LoadInst(
668+
IGCLLVM::getNonOpaquePtrEltTy(newBasePtr->getType()),
669+
newBasePtr,
670+
"",
671+
I->isVolatile(),
672+
IGCLLVM::getAlign(*I), I->getOrdering(), I->getSyncScopeID(),
673+
I);
674+
675+
newBasePtr->setDebugLoc(DL);
676+
bindfulLoad->setDebugLoc(DL);
677+
678+
Value* ptr = I->getPointerOperand();
679+
PointerType* ptrType = dyn_cast<PointerType>(ptr->getType());
680+
if (ptrType && ptrType->getAddressSpace() == ADDRESS_SPACE_CONSTANT)
681+
{
682+
LLVMContext& context = I->getContext();
683+
MDString* const metadataName = MDString::get(context, "invariant.load");
684+
MDNode* node = MDNode::get(context, metadataName);
685+
bindfulLoad->setMetadata(LLVMContext::MD_invariant_load, node);
686+
}
639687

640-
I->replaceAllUsesWith(pLoad);
641-
I->eraseFromParent();
688+
I->replaceAllUsesWith(bindfulLoad);
689+
I->eraseFromParent();
690+
}
691+
else
692+
{
693+
IGC_ASSERT_MESSAGE(false, "Unsupported addressing!");
694+
}
642695
}
643696

644697
void StatelessToStateful::promoteStore(InstructionInfo& II)
@@ -648,19 +701,38 @@ void StatelessToStateful::promoteStore(InstructionInfo& II)
648701
Value* dataVal = I->getValueOperand();
649702
PointerType* pTy = PointerType::get(dataVal->getType(), II.getStatefulAddrSpace());
650703

651-
Instruction* statefulPtr = IntToPtrInst::Create(Instruction::IntToPtr, II.offset, pTy, "", I);
652-
Instruction* pStore = new StoreInst(
653-
dataVal,
654-
statefulPtr,
655-
I->isVolatile(),
656-
IGCLLVM::getAlign(*I), I->getOrdering(), I->getSyncScopeID(),
657-
I);
658-
659704
const DebugLoc& DL = I->getDebugLoc();
660-
statefulPtr->setDebugLoc(DL);
661-
pStore->setDebugLoc(DL);
662705

663-
I->eraseFromParent();
706+
if (m_targetAddressing == TargetAddressing::BINDLESS)
707+
{
708+
Argument* srcOffset = m_pImplicitArgs->getNumberedImplicitArg(*m_F, ImplicitArg::BINDLESS_OFFSET, II.getBaseArgIndex());
709+
auto newBasePtr = IntToPtrInst::Create(Instruction::IntToPtr, srcOffset, pTy, "", I);
710+
auto bindlessStore = IGC::CreateStoreRawIntrinsic(I, cast<Instruction>(newBasePtr), II.offset);
711+
712+
newBasePtr->setDebugLoc(DL);
713+
bindlessStore->setDebugLoc(DL);
714+
715+
I->eraseFromParent();
716+
}
717+
else if (m_targetAddressing == TargetAddressing::BINDFUL)
718+
{
719+
auto newBasePtr = IntToPtrInst::Create(Instruction::IntToPtr, II.offset, pTy, "", I);
720+
auto bindfulStore = new StoreInst(
721+
dataVal,
722+
newBasePtr,
723+
I->isVolatile(),
724+
IGCLLVM::getAlign(*I), I->getOrdering(), I->getSyncScopeID(),
725+
I);
726+
727+
newBasePtr->setDebugLoc(DL);
728+
bindfulStore->setDebugLoc(DL);
729+
730+
I->eraseFromParent();
731+
}
732+
else
733+
{
734+
IGC_ASSERT_MESSAGE(false, "Unsupported addressing!");
735+
}
664736
}
665737

666738
void StatelessToStateful::promoteInstruction(StatelessToStateful::InstructionInfo& InstInfo)
@@ -696,21 +768,34 @@ void StatelessToStateful::promote()
696768
{
697769
IGC_ASSERT(bufferPos < maxPromotionCount);
698770

699-
ArgAllocMD* argAlloc = &resAllocMD->argAllocMDList[baseArgIndex];
700-
701-
// If the support for dynamic BTIs allocation is disabled, then BTIs are pre-assigned
702-
// in ResourceAllocator pass for all resources independently whether they are
703-
// accessed through stateful addressing model or not.
704-
if (ctx->platform.supportDynamicBTIsAllocation())
771+
unsigned statefullAddrspace = 0;
772+
if (m_targetAddressing == TargetAddressing::BINDLESS)
705773
{
706-
argAlloc->type = ResourceTypeEnum::UAVResourceType;
707-
argAlloc->indexType = resAllocMD->uavsNumType + bufferPos;
774+
statefullAddrspace =
775+
IGC::EncodeAS4GFXResource(
776+
*UndefValue::get(Type::getInt32Ty(m_Module->getContext())),
777+
IGC::BINDLESS);
778+
}
779+
else
780+
{
781+
ArgAllocMD* argAlloc = &resAllocMD->argAllocMDList[baseArgIndex];
782+
783+
// If the support for dynamic BTIs allocation is disabled, then BTIs are pre-assigned
784+
// in ResourceAllocator pass for all resources independently whether they are
785+
// accessed through stateful addressing model or not.
786+
if (ctx->platform.supportDynamicBTIsAllocation())
787+
{
788+
argAlloc->type = ResourceTypeEnum::UAVResourceType;
789+
argAlloc->indexType = resAllocMD->uavsNumType + bufferPos;
790+
}
791+
792+
statefullAddrspace = encodeBindfulAddrspace(argAlloc->indexType);
708793
}
709794

710-
unsigned statefullAddrspace = encodeStatefulAddrspace(argAlloc->indexType);
711795
for (auto &instInfo : instsToPromote)
712796
{
713797
instInfo.setStatefulAddrspace(statefullAddrspace);
798+
instInfo.setBaseArgIndex(baseArgIndex);
714799
promoteInstruction(instInfo);
715800
}
716801
bufferPos++;

IGC/Compiler/Optimizer/OpenCLPasses/StatelessToStateful/StatelessToStateful.hpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ namespace IGC
2727
// performance. Simply disable stateful promotion after 32 args.
2828
constexpr uint maxPromotionCount = 32;
2929

30+
enum class TargetAddressing {
31+
BINDFUL,
32+
BINDLESS
33+
};
34+
3035
class StatelessToStateful : public llvm::FunctionPass, public llvm::InstVisitor<StatelessToStateful>
3136
{
3237
public:
@@ -35,6 +40,7 @@ namespace IGC
3540
static char ID;
3641

3742
StatelessToStateful();
43+
StatelessToStateful(TargetAddressing addressing);
3844

3945
~StatelessToStateful() {}
4046

@@ -68,10 +74,13 @@ namespace IGC
6874
IGC_ASSERT(statefulAddrSpace);
6975
return *statefulAddrSpace;
7076
}
77+
void setBaseArgIndex(unsigned index) { baseArgIndex = index; }
78+
unsigned getBaseArgIndex() { return baseArgIndex; }
7179
llvm::Instruction* const statelessInst;
7280
llvm::Value* const ptr;
7381
llvm::Value* const offset;
7482
private:
83+
unsigned baseArgIndex;
7584
std::optional<unsigned> statefulAddrSpace;
7685
};
7786

@@ -116,7 +125,7 @@ namespace IGC
116125
// separate indices space. So if there is a read_only image and global buffer in the kernel,
117126
// they will both have `0` encoded in addrspace. The actual BTI will be computed based
118127
// on BTLayout in EmitVISAPass.
119-
unsigned encodeStatefulAddrspace(unsigned uavIndex);
128+
unsigned encodeBindfulAddrspace(unsigned uavIndex);
120129

121130
void updateArgInfo(const KernelArg* KA, bool IsPositive);
122131
void finalizeArgInitialValue(llvm::Function* F);
@@ -146,32 +155,33 @@ namespace IGC
146155
}
147156

148157
// When true, runtime can generate surface with buffer's original base (creation base)
149-
bool m_hasBufferOffsetArg;
158+
bool m_hasBufferOffsetArg = false;
150159

151160
// When m_hasBufferOffsetArg is true, optional buffer offset
152161
// can be on or off, which is indicated by this boolean flag.
153-
bool m_hasOptionalBufferOffsetArg;
162+
bool m_hasOptionalBufferOffsetArg = false;
154163

155164
// When true, every messages that are in ptrArg + offset will have offset >= 0.
156-
bool m_hasPositivePointerOffset;
165+
bool m_hasPositivePointerOffset = false;
157166

158167
// Handle non-gep pointer
159168
// For historic reason (probably non-DW aligned arg), non-gep ptr isn't handled.
160169
// If this field is true, non-gep ptr shall be handled.
161170
const bool m_supportNonGEPPtr = false;
162171

163-
llvm::AssumptionCacheTracker* m_ACT;
172+
llvm::AssumptionCacheTracker* m_ACT = nullptr;
164173
llvm::AssumptionCache* getAC(llvm::Function* F)
165174
{
166175
return (m_ACT != nullptr ? &m_ACT->getAssumptionCache(*F)
167176
: nullptr);
168177
}
169178

179+
TargetAddressing m_targetAddressing;
170180
OpenCLProgramContext* m_ctx;
171-
ImplicitArgs* m_pImplicitArgs;
172-
KernelArgs* m_pKernelArgs;
181+
ImplicitArgs* m_pImplicitArgs = nullptr;
182+
KernelArgs* m_pKernelArgs = nullptr;
173183
ArgInfoMap m_argsInfo;
174-
bool m_changed;
184+
bool m_changed = false;
175185
llvm::Function* m_F;
176186
llvm::Module* m_Module;
177187

0 commit comments

Comments
 (0)