Skip to content

Commit cc2ea2a

Browse files
dguzhaevigcbot
authored andcommitted
Add a flag to specify path and prefix for DebugInfo dumps
* Added a flag to specify path and prefix for DebugInfo dumps in a single option, ignoring PID, binary and hash in produced filename(for LIT testing): {specified path and prefix}[usual kernel name w/o hash]
1 parent 90e1a55 commit cc2ea2a

File tree

7 files changed

+137
-85
lines changed

7 files changed

+137
-85
lines changed

IGC/Compiler/CISACodeGen/DebugInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,14 @@ static void debugDump(const CShader* Shader, llvm::StringRef Ext,
298298
return;
299299

300300
auto ExtStr = Ext.str();
301-
std::string DumpName = IGC::Debug::GetDumpName(Shader, ExtStr.c_str());
301+
IGC::Debug::DumpName DumpNameObj = IGC::Debug::GetDumpNameObj(Shader, ExtStr.c_str());
302+
std::string DumpName = DumpNameObj.str();
303+
if (IGC_IS_FLAG_ENABLED(DebugDumpNamePrefix))
304+
{
305+
auto hash = ShaderHash();
306+
DumpNameObj = DumpNameObj.Hash(hash);
307+
DumpName = DumpNameObj.AbsolutePath(IGC_GET_REGKEYSTRING(DebugDumpNamePrefix));
308+
}
302309
FILE* const DumpFile = fopen(DumpName.c_str(), "wb+");
303310
if (nullptr == DumpFile)
304311
return;

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2761,7 +2761,9 @@ namespace IGC
27612761
prevOffset = offset;
27622762

27632763
// skip unused arguments
2764-
bool IsUnusedArg = (arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET) &&
2764+
bool IsUnusedArg =
2765+
(arg.getArgType() == KernelArg::ArgType::IMPLICIT_BUFFER_OFFSET ||
2766+
arg.getArgType() == KernelArg::ArgType::IMPLICIT_BINDLESS_OFFSET) &&
27652767
arg.getArg()->use_empty();
27662768

27672769
// Runtime Values should not be processed any further. No annotations shall be created for them.

IGC/Compiler/PromoteStatelessToBindless.cpp

Lines changed: 108 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ bool PromoteStatelessToBindless::runOnFunction(Function& F)
5151
if (HasStackCall)
5252
return false;
5353

54-
m_AccessToSrcPtrMap.clear();
55-
m_AddressUsedSrcPtrMap.clear();
54+
m_SrcPtrNeedStatelessAccess.clear();
55+
m_SrcPtrToAccessMap.clear();
5656
if (!ClContext->m_InternalOptions.UseBindlessPrintf)
5757
{
5858
CheckPrintfBuffer(F);
@@ -83,6 +83,8 @@ void PromoteStatelessToBindless::CheckPrintfBuffer(Function& F)
8383

8484
void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap(Instruction* inst, Value* resourcePtr)
8585
{
86+
bool canPromoteAccess = true;
87+
auto modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
8688
unsigned addrSpace = resourcePtr->getType()->getPointerAddressSpace();
8789

8890
if (addrSpace != ADDRESS_SPACE_GLOBAL && addrSpace != ADDRESS_SPACE_CONSTANT)
@@ -103,14 +105,16 @@ void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap(Instruction* inst,
103105
break;
104106
case GenISAIntrinsic::GenISA_intatomicrawA64:
105107
// Ignore a buffer in this intrinsic, keep it stateless.
106-
return;
108+
canPromoteAccess = false;
109+
break;
107110
default:
108111
IGC_ASSERT_MESSAGE(0, "Unsupported Instruction");
109-
return;
112+
canPromoteAccess = false;
113+
break;
110114
}
111115
}
112116
else
113-
return;
117+
canPromoteAccess = false;
114118
}
115119

116120
std::vector<Value*> tempList;
@@ -130,22 +134,36 @@ void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap(Instruction* inst,
130134
// printf buffer address (through atomic add), see printf implementation in
131135
// OpenCLPrintfResolution.cpp. Currently keep printf implementation as stateless and
132136
// thus skip printf buffer for now.
133-
return;
137+
canPromoteAccess = false;
134138
}
135139

136-
m_promotedArgs.insert(cast<Argument>(srcPtr)->getArgNo());
137-
138-
// Save the instruction, which makes access (load/store/intrinsic) to the buffer
139-
m_AccessToSrcPtrMap[inst] = srcPtr;
140-
// Save the instruction, which generate an address of the buffer. This is the
141-
// instruction right before the last one. The last one has to be the buffer itself.
142-
if (tempList.size() > 1)
140+
if (modMD->compOpt.UseLegacyBindlessMode)
143141
{
144-
m_AddressUsedSrcPtrMap[tempList[tempList.size()-2]] = srcPtr;
142+
if (!canPromoteAccess)
143+
{
144+
// In this case, the srcPtr is traced to a kernel arg, but the access instruction does not support
145+
// bindless access, so we have to make all access stateless.
146+
// Remove all access instructions of this srcPtr that may have been added in previous passes, to
147+
// prevent promoting it to bindless.
148+
m_SrcPtrNeedStatelessAccess.insert(srcPtr);
149+
m_SrcPtrToAccessMap.erase(srcPtr);
150+
return;
151+
}
152+
else if (m_SrcPtrNeedStatelessAccess.count(srcPtr) != 0)
153+
{
154+
return;
155+
}
145156
}
146-
else
157+
158+
if (canPromoteAccess)
147159
{
148-
m_AddressUsedSrcPtrMap[inst] = srcPtr;
160+
// Save the instruction, which makes access (load/store/intrinsic) to the buffer
161+
Value* accessInst = inst;
162+
// Save the instruction, which generate an address of the buffer. This is the
163+
// instruction right before the last one. The last one has to be the buffer itself.
164+
Value* addrUsedInst = (tempList.size() > 1) ? tempList[tempList.size() - 2] : inst;
165+
166+
m_SrcPtrToAccessMap[srcPtr].push_back(std::make_pair(accessInst, addrUsedInst));
149167
}
150168
}
151169

@@ -164,85 +182,95 @@ void PromoteStatelessToBindless::PromoteStatelessToBindlessBuffers(Function& F)
164182

165183
bool supportDynamicBTIsAllocation = ctx->platform.supportDynamicBTIsAllocation();
166184

167-
// Modify the reference to the buffer not through all users but only in instructions
168-
// which are used in accesing (load/store) the buffer.
169-
for (auto inst : m_AddressUsedSrcPtrMap)
185+
for (auto iter : m_SrcPtrToAccessMap)
170186
{
171-
Instruction* accessInst = cast<Instruction>(inst.first);
172-
Argument* srcPtr = cast<Argument>(inst.second);
173-
174-
Value* nullSrcPtr = ConstantPointerNull::get(cast<PointerType>(srcPtr->getType()));
175-
accessInst->replaceUsesOfWith(srcPtr, nullSrcPtr);
187+
Argument* srcPtr = cast<Argument>(iter.first);
176188

177189
ArgAllocMD* argInfo = &resourceAlloc->argAllocMDList[srcPtr->getArgNo()];
178190
IGC_ASSERT_MESSAGE((size_t)srcPtr->getArgNo() < resourceAlloc->argAllocMDList.size(), "ArgAllocMD List Out of Bounds");
179-
// Update metadata to show bindless resource type
180-
argInfo->type = ResourceTypeEnum::BindlessUAVResourceType;
191+
192+
if (modMD->compOpt.UseLegacyBindlessMode)
193+
{
194+
// Update metadata to show bindless resource type.
195+
// Do this only for legacy mode, since the resource type of the original
196+
// kernel arg needs to be bindless for it to be reinterpreted as a bindless offset.
197+
// In advanced mode, always keep the original kernel arg as stateless, and use the
198+
// IMPLICIT_BUFFER_OFFSET arg for bindless access.
199+
argInfo->type = ResourceTypeEnum::BindlessUAVResourceType;
200+
}
201+
181202
if (supportDynamicBTIsAllocation)
182203
{
183204
argInfo->indexType =
184205
resourceAlloc->uavsNumType +
185-
(unsigned)std::distance(m_promotedArgs.begin(), m_promotedArgs.find(srcPtr->getArgNo()));
206+
(unsigned)std::distance(m_SrcPtrToAccessMap.begin(), m_SrcPtrToAccessMap.find(srcPtr));
186207
}
187-
}
188-
189-
if(supportDynamicBTIsAllocation)
190-
resourceAlloc->uavsNumType += m_promotedArgs.size();
191208

192-
for (auto inst : m_AccessToSrcPtrMap)
193-
{
194-
Instruction* accessInst = cast<Instruction>(inst.first);
195-
Argument* srcPtr = cast<Argument>(inst.second);
196-
197-
// Get the base bindless pointer
198-
IGCIRBuilder<> builder(accessInst);
199-
Value* resourcePtr = IGC::GetBufferOperand(accessInst);
200-
IGC_ASSERT(resourcePtr);
201-
unsigned bindlessAS = IGC::EncodeAS4GFXResource(*UndefValue::get(builder.getInt32Ty()), IGC::BINDLESS);
202-
PointerType* basePointerType = PointerType::get(IGCLLVM::getNonOpaquePtrEltTy(resourcePtr->getType()), bindlessAS);
203-
Value* bufferOffset = builder.CreatePtrToInt(resourcePtr, builder.getInt32Ty());
204-
205-
Value* basePointer = nullptr;
206-
if (!modMD->compOpt.UseLegacyBindlessMode) {
207-
Argument * srcOffset = implicitArgs.getNumberedImplicitArg(F, ImplicitArg::BINDLESS_OFFSET, srcPtr->getArgNo());
208-
basePointer = builder.CreateIntToPtr(srcOffset, basePointerType);
209-
} else {
210-
basePointer = builder.CreatePointerCast(srcPtr, basePointerType);
211-
}
212-
213-
if (LoadInst * load = dyn_cast<LoadInst>(accessInst))
214-
{
215-
Value* ldraw = IGC::CreateLoadRawIntrinsic(load, cast<Instruction>(basePointer), bufferOffset);
216-
load->replaceAllUsesWith(ldraw);
217-
load->eraseFromParent();
218-
}
219-
else if (StoreInst * store = dyn_cast<StoreInst>(accessInst))
209+
// Loop through all access instructions for srcPtr
210+
for (auto insts : iter.second)
220211
{
221-
IGC::CreateStoreRawIntrinsic(store, cast<Instruction>(basePointer), bufferOffset);
222-
store->eraseFromParent();
223-
}
224-
else if (GenIntrinsicInst * pIntr = dyn_cast<GenIntrinsicInst>(accessInst))
225-
{
226-
if (pIntr->getIntrinsicID() == GenISAIntrinsic::GenISA_simdBlockRead)
212+
Instruction* accessInst = cast<Instruction>(insts.first);
213+
Instruction* addrUsedInst = cast<Instruction>(insts.second);
214+
215+
// Modify the reference to the buffer not through all users but only in instructions
216+
// which are used in accesing (load/store) the buffer.
217+
Value* nullSrcPtr = ConstantPointerNull::get(cast<PointerType>(srcPtr->getType()));
218+
addrUsedInst->replaceUsesOfWith(srcPtr, nullSrcPtr);
219+
220+
// Get the base bindless pointer
221+
IGCIRBuilder<> builder(accessInst);
222+
Value* resourcePtr = IGC::GetBufferOperand(accessInst);
223+
IGC_ASSERT(resourcePtr);
224+
unsigned bindlessAS = IGC::EncodeAS4GFXResource(*UndefValue::get(builder.getInt32Ty()), IGC::BINDLESS);
225+
PointerType* basePointerType = PointerType::get(IGCLLVM::getNonOpaquePtrEltTy(resourcePtr->getType()), bindlessAS);
226+
Value* bufferOffset = builder.CreatePtrToInt(resourcePtr, builder.getInt32Ty());
227+
228+
Value* basePointer = nullptr;
229+
if (!modMD->compOpt.UseLegacyBindlessMode) {
230+
Argument* srcOffset = implicitArgs.getNumberedImplicitArg(F, ImplicitArg::BINDLESS_OFFSET, srcPtr->getArgNo());
231+
basePointer = builder.CreateIntToPtr(srcOffset, basePointerType);
232+
}
233+
else {
234+
basePointer = builder.CreatePointerCast(srcPtr, basePointerType);
235+
}
236+
237+
if (LoadInst* load = dyn_cast<LoadInst>(accessInst))
227238
{
228-
Function* newBlockReadFunc = GenISAIntrinsic::getDeclaration(F.getParent(),
229-
GenISAIntrinsic::GenISA_simdBlockReadBindless,
230-
{ accessInst->getType(), basePointer->getType(),Type::getInt32Ty(accessInst->getContext()) });
231-
Instruction* newBlockRead = CallInst::Create(newBlockReadFunc, { basePointer, bufferOffset }, "", accessInst);
232-
newBlockRead->setDebugLoc(pIntr->getDebugLoc());
233-
accessInst->replaceAllUsesWith(newBlockRead);
234-
accessInst->eraseFromParent();
239+
Value* ldraw = IGC::CreateLoadRawIntrinsic(load, cast<Instruction>(basePointer), bufferOffset);
240+
load->replaceAllUsesWith(ldraw);
241+
load->eraseFromParent();
235242
}
236-
else if (pIntr->getIntrinsicID() == GenISAIntrinsic::GenISA_simdBlockWrite)
243+
else if (StoreInst* store = dyn_cast<StoreInst>(accessInst))
237244
{
238-
Function* newBlockWriteFunc = GenISAIntrinsic::getDeclaration(F.getParent(),
239-
GenISAIntrinsic::GenISA_simdBlockWriteBindless,
240-
{ basePointer->getType(), pIntr->getOperand(1)->getType(), Type::getInt32Ty(accessInst->getContext()) });
241-
Instruction* newBlockWrite = CallInst::Create(newBlockWriteFunc, { basePointer, pIntr->getOperand(1), bufferOffset }, "", accessInst);
242-
newBlockWrite->setDebugLoc(pIntr->getDebugLoc());
243-
accessInst->replaceAllUsesWith(newBlockWrite);
244-
accessInst->eraseFromParent();
245+
IGC::CreateStoreRawIntrinsic(store, cast<Instruction>(basePointer), bufferOffset);
246+
store->eraseFromParent();
247+
}
248+
else if (GenIntrinsicInst* pIntr = dyn_cast<GenIntrinsicInst>(accessInst))
249+
{
250+
if (pIntr->getIntrinsicID() == GenISAIntrinsic::GenISA_simdBlockRead)
251+
{
252+
Function* newBlockReadFunc = GenISAIntrinsic::getDeclaration(F.getParent(),
253+
GenISAIntrinsic::GenISA_simdBlockReadBindless,
254+
{ accessInst->getType(), basePointer->getType(),Type::getInt32Ty(accessInst->getContext()) });
255+
Instruction* newBlockRead = CallInst::Create(newBlockReadFunc, { basePointer, bufferOffset }, "", accessInst);
256+
newBlockRead->setDebugLoc(pIntr->getDebugLoc());
257+
accessInst->replaceAllUsesWith(newBlockRead);
258+
accessInst->eraseFromParent();
259+
}
260+
else if (pIntr->getIntrinsicID() == GenISAIntrinsic::GenISA_simdBlockWrite)
261+
{
262+
Function* newBlockWriteFunc = GenISAIntrinsic::getDeclaration(F.getParent(),
263+
GenISAIntrinsic::GenISA_simdBlockWriteBindless,
264+
{ basePointer->getType(), pIntr->getOperand(1)->getType(), Type::getInt32Ty(accessInst->getContext()) });
265+
Instruction* newBlockWrite = CallInst::Create(newBlockWriteFunc, { basePointer, pIntr->getOperand(1), bufferOffset }, "", accessInst);
266+
newBlockWrite->setDebugLoc(pIntr->getDebugLoc());
267+
accessInst->replaceAllUsesWith(newBlockWrite);
268+
accessInst->eraseFromParent();
269+
}
245270
}
246271
}
247272
}
273+
274+
if(supportDynamicBTIsAllocation)
275+
resourceAlloc->uavsNumType += m_SrcPtrToAccessMap.size();
248276
}

IGC/Compiler/PromoteStatelessToBindless.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,16 @@ namespace IGC
4949
void PromoteStatelessToBindlessBuffers(llvm::Function& F) const;
5050
void CheckPrintfBuffer(llvm::Function& F);
5151

52-
std::set<unsigned> m_promotedArgs;
53-
std::unordered_map<llvm::Value*, llvm::Value*> m_AccessToSrcPtrMap;
54-
std::unordered_map<llvm::Value*, llvm::Value*> m_AddressUsedSrcPtrMap;
52+
// Pair of bindless resource access instructions.
53+
// The first is the actual instruction accessing the bindless buffer (load/store/etc)
54+
// The second is the instruction accessing the address of the bindless buffer, which may or may not
55+
// be identical to the first. Needed to convert to null address.
56+
typedef std::pair<llvm::Value*, llvm::Value*> BindlessAccessInsts;
57+
// Map of the srcPtr (kernel arg resource) to a vector of instructions accessing it
58+
llvm::DenseMap<llvm::Value*, llvm::SmallVector<BindlessAccessInsts, 8>> m_SrcPtrToAccessMap;
59+
// Tracks the set of resources that must have at least one stateless access
60+
std::set<llvm::Value*> m_SrcPtrNeedStatelessAccess;
61+
5562
llvm::Value* m_PrintfBuffer;
5663
};
5764

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ DECLARE_IGC_REGKEY(bool, ShaderDumpInstNamer, false, "dump all unnamed
344344
DECLARE_IGC_REGKEY(debugString, ShaderDumpFilter, 0, "Only dump files matching the given regex", true)
345345
DECLARE_IGC_REGKEY(bool, ElfDumpEnable, false, "dump ELF file", true)
346346
DECLARE_IGC_REGKEY(bool, ElfTempDumpEnable, false, "dump temporary ELF files", true)
347+
DECLARE_IGC_REGKEY(debugString, DebugDumpNamePrefix, 0, "Set a prefix to debug info dump filenames(with path) and drop hash info from them (for testing purposes)", true)
347348
DECLARE_IGC_REGKEY(bool, ShowFullVectorsInShaderDumps, false, "print all elements of vectors in ShaderDumps, can dramatically increase ShaderDumps size", true)
348349
DECLARE_IGC_REGKEY(bool, PrintHexFloatInShaderDumpAsm, true, "print floats in hex in asm dump", true)
349350
DECLARE_IGC_REGKEY(debugString, PrintAfter, 0, "Take either all or comma/semicolon-separated list of pass names. If set, enable print LLVM IR after the given pass is done (mimic llvm print-after)", true)

IGC/ocloc_tests/lit.cfg.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,19 @@
4444
if llvm_config.add_tool_substitutions([ToolSubst('ocloc', unresolved='break')], tool_dirs) is False:
4545
lit_config.note('Did not find ocloc in %s, ocloc will be used from system paths' % tool_dirs)
4646

47+
llvm_config.add_tool_substitutions([ToolSubst('llvm-dwarfdump')], tool_dirs)
48+
llvm_config.add_tool_substitutions([ToolSubst('opt')], tool_dirs)
49+
4750
if not config.regkeys_disabled:
4851
config.available_features.add('regkeys')
4952

5053
if config.spirv_as_enabled:
5154
config.available_features.add('spirv-as')
5255
llvm_config.add_tool_substitutions([ToolSubst('spirv-as', unresolved='fatal')], tool_dirs)
5356

57+
if config.is32b == "1":
58+
config.available_features.add('sys32')
59+
5460
if config.use_khronos_spirv_translator_in_sc == "1":
5561
config.available_features.add('khronos-translator')
5662
config.available_features.add('khronos-translator-' + config.llvm_version_major)

IGC/ocloc_tests/lit.site.cfg.py.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ config.spirv_as_enabled = "@IGC_BUILD__PROJ__spirv_as@"
2626
config.spirv_as_dir = "@IGC_SPIRV_AS_DIR@"
2727
config.use_khronos_spirv_translator_in_sc = "$<BOOL:@IGC_OPTION__USE_KHRONOS_SPIRV_TRANSLATOR_IN_SC@>"
2828
config.llvm_version_major = "@LLVM_VERSION_MAJOR@"
29+
config.is32b = "$<BOOL:$<EQUAL:@CMAKE_SIZEOF_VOID_P@,4>>"
2930

3031
# Support substitution of the tools and libs dirs with user parameters. This is
3132
# used when we can't determine the tool dir at configuration time.

0 commit comments

Comments
 (0)