Skip to content

Commit 4f7756c

Browse files
fda0igcbot
authored andcommitted
Address patching for zeroed global variables and i64 offsets
Fix size overflows for .data and .bss zebin sections by using 64 bit integers to calculate them. Use 64 bits to represent offsets into inlineProgramScopeOffsets, globalBase and constBase to prevent overflows. Add address patching support for zeroinitialized global variables. Previously we cannot do the zero-initializer optimization if the variable is used by another global. This is because the offsets for the zero-init variables have not been calculated at the time of patching. With this change, we save the patching info, and patch later after those offsets have been calculated. This enables the optimization to defer copying zero values to the global/const buffers.
1 parent 85fa09e commit 4f7756c

File tree

7 files changed

+63
-57
lines changed

7 files changed

+63
-57
lines changed

IGC/AdaptorOCL/OCL/sp/zebin_builder.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ void ZEBinaryBuilder::addGlobalConstants(const IGC::SOpenCLProgramInfo& annotati
193193
if (annotations.m_initConstantAnnotation && annotations.m_initConstantAnnotation->AllocSize) {
194194
auto& ca = annotations.m_initConstantAnnotation;
195195
// the normal .data.const size
196-
uint32_t dataSize = ca->InlineData.size();
196+
uint64_t dataSize = ca->InlineData.size();
197197
// the zero-initialize variables size, the .bss.const size
198-
uint32_t bssSize = ca->AllocSize - dataSize;
198+
uint64_t bssSize = ca->AllocSize - dataSize;
199199
uint32_t alignment = ca->Alignment;
200200

201201
if (IGC_IS_FLAG_ENABLED(AllocateZeroInitializedVarsInBss)) {
@@ -220,8 +220,9 @@ void ZEBinaryBuilder::addGlobalConstants(const IGC::SOpenCLProgramInfo& annotati
220220
} else {
221221
// before runtime can support bss section, we create all 0s in .const.data section by adding
222222
// bssSize of padding
223+
IGC_ASSERT_MESSAGE(bssSize == static_cast<uint32_t>(bssSize), ".const.data padding size overflows 32 bits");
223224
mGlobalConstSectID = mBuilder.addSectionData("const", (const uint8_t*)ca->InlineData.data(),
224-
dataSize, bssSize, alignment, /*rodata*/true);
225+
dataSize, static_cast<uint32_t>(bssSize), alignment, /*rodata*/true);
225226
}
226227
}
227228

@@ -248,8 +249,8 @@ void ZEBinaryBuilder::addGlobals(const IGC::SOpenCLProgramInfo& annotations)
248249
if (!ca->AllocSize)
249250
return;
250251

251-
uint32_t dataSize = ca->InlineData.size();
252-
uint32_t bssSize = ca->AllocSize - dataSize;
252+
uint64_t dataSize = ca->InlineData.size();
253+
uint64_t bssSize = ca->AllocSize - dataSize;
253254
uint32_t alignment = ca->Alignment;
254255

255256
if (IGC_IS_FLAG_ENABLED(AllocateZeroInitializedVarsInBss)) {
@@ -271,8 +272,9 @@ void ZEBinaryBuilder::addGlobals(const IGC::SOpenCLProgramInfo& annotations)
271272
} else {
272273
// before runtime can support bss section, we create all 0s in .global.data section by adding
273274
// bssSize of padding
275+
IGC_ASSERT_MESSAGE(bssSize == static_cast<uint32_t>(bssSize), ".global.data padding size overflows 32 bits");
274276
mGlobalSectID = mBuilder.addSectionData("global", (const uint8_t*)ca->InlineData.data(),
275-
dataSize, bssSize, alignment, /*rodata*/false);
277+
dataSize, static_cast<uint32_t>(bssSize), alignment, /*rodata*/false);
276278
}
277279
}
278280

IGC/Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.cpp

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -187,22 +187,8 @@ bool ProgramScopeConstantAnalysis::runOnModule(Module& M)
187187

188188
if (initializer->isZeroValue())
189189
{
190-
// For zero initialized values, we dont need to copy the data, just tell driver how much to allocate
191-
// However, if it's used as a pointer value, we need to do patching and therefore cannot defer the offset calculation
192-
bool hasPointerUser = false;
193-
for (auto UI : globalVar->users())
194-
{
195-
if (isa<Constant>(UI) && UI->getType()->isPointerTy())
196-
{
197-
hasPointerUser = true;
198-
break;
199-
}
200-
}
201-
if (!hasPointerUser)
202-
{
203-
zeroInitializedGlobals.push_back(globalVar);
204-
continue;
205-
}
190+
zeroInitializedGlobals.push_back(globalVar);
191+
continue;
206192
}
207193

208194
// Align the buffer.
@@ -245,6 +231,22 @@ bool ProgramScopeConstantAnalysis::runOnModule(Module& M)
245231
offset += (unsigned)(m_DL->getTypeAllocSize(IGCLLVM::getNonOpaquePtrEltTy(globalVar->getType())));
246232
}
247233

234+
// Patch the offsets for usages of zero initialized globals after those offsets have been calculated in the previous step.
235+
// TODO: Remove this logic after enabling ZeBinary, since we will switch the patching to use relocation table instead.
236+
for (ZeroInitPatchInfo &patchData : m_PatchLaterDataVector)
237+
{
238+
char* whereToPatch = std::get<0>(patchData);
239+
unsigned patchSize = std::get<1>(patchData);
240+
GlobalVariable* globalVar = std::get<2>(patchData);
241+
uint64_t offset = std::get<3>(patchData);
242+
243+
auto iter = inlineProgramScopeOffsets.find(globalVar);
244+
IGC_ASSERT(iter != inlineProgramScopeOffsets.end());
245+
246+
const uint64_t patchOffset = iter->second + offset;
247+
memcpy_s(whereToPatch, patchSize, (char*)&patchOffset, patchSize);
248+
}
249+
248250
if (inlineProgramScopeOffsets.size())
249251
{
250252
// Add globals tracked in metadata to the "llvm.used" list so they won't be deleted by optimizations
@@ -346,7 +348,7 @@ bool ProgramScopeConstantAnalysis::runOnModule(Module& M)
346348
const bool changed = !inlineProgramScopeOffsets.empty();
347349
for (auto offset : inlineProgramScopeOffsets)
348350
{
349-
m_pModuleMd->inlineProgramScopeOffsets[offset.first] = offset.second;
351+
m_pModuleMd->inlineProgramScopeOffsets[offset.first] = static_cast<uint64_t>(offset.second);
350352
}
351353

352354
// Update LLVM metadata based on IGC MetadataUtils
@@ -476,20 +478,26 @@ void ProgramScopeConstantAnalysis::addData(Constant* initializer,
476478
}
477479
else
478480
{
479-
auto iter = inlineProgramScopeOffsets.find(ptrBase);
480-
IGC_ASSERT(iter != inlineProgramScopeOffsets.end());
481-
482-
const uint64_t pointeeOffset = iter->second + offset;
481+
// Add the patching info for runtime
482+
pointerOffsetInfoList.push_back(PointerOffsetInfo(addressSpace, inlineProgramScopeBuffer.size(), pointedToAddrSpace));
483483

484-
pointerOffsetInfoList.push_back(
485-
PointerOffsetInfo(
486-
addressSpace,
487-
inlineProgramScopeBuffer.size(),
488-
pointedToAddrSpace));
489-
490-
// For old patching logic, write the offset relative to the entire global/constant buffer where the base global resides.
491-
// The base address of the buffer will be added to it at runtime.
492-
inlineProgramScopeBuffer.insert(inlineProgramScopeBuffer.end(), (char*)&pointeeOffset, ((char*)&pointeeOffset) + pointerSize);
484+
auto iter = inlineProgramScopeOffsets.find(ptrBase);
485+
if (iter != inlineProgramScopeOffsets.end())
486+
{
487+
const uint64_t pointeeOffset = iter->second + offset;
488+
// For old patching logic, write the offset relative to the entire global/constant buffer where the base global resides.
489+
// The base address of the buffer will be added to it at runtime.
490+
inlineProgramScopeBuffer.insert(inlineProgramScopeBuffer.end(), (char*)&pointeeOffset, ((char*)&pointeeOffset) + pointerSize);
491+
}
492+
else
493+
{
494+
// If we can't find the base global variable, it must be a zero initialized value whose data is not directly copied.
495+
// Save the info for now, and patch it later once we have the offsets for this zeroinit global vars.
496+
unsigned patchIdx = inlineProgramScopeBuffer.size();
497+
inlineProgramScopeBuffer.insert(inlineProgramScopeBuffer.end(), pointerSize, 0);
498+
499+
m_PatchLaterDataVector.push_back(std::make_tuple((char*)(&inlineProgramScopeBuffer[patchIdx]), pointerSize, ptrBase, offset));
500+
}
493501
}
494502
}
495503
else

IGC/Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantAnalysis.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ namespace IGC
5454

5555
protected:
5656
typedef std::vector<unsigned char> DataVector;
57-
typedef llvm::MapVector<llvm::GlobalVariable*, int> BufferOffsetMap;
57+
typedef llvm::MapVector<llvm::GlobalVariable*, int64_t> BufferOffsetMap;
5858

5959
struct PointerOffsetInfo
6060
{
@@ -91,6 +91,10 @@ namespace IGC
9191

9292
const llvm::DataLayout* m_DL;
9393
ModuleMetaData* m_pModuleMd;
94+
95+
// Used to patch offsets for zero initialized globals
96+
typedef std::tuple<char*, unsigned, llvm::GlobalVariable*, uint64_t> ZeroInitPatchInfo;
97+
std::vector<ZeroInitPatchInfo> m_PatchLaterDataVector;
9498
};
9599

96100
} // namespace IGC

IGC/Compiler/Optimizer/OpenCLPasses/ProgramScopeConstants/ProgramScopeConstantResolution.cpp

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -126,22 +126,14 @@ bool ProgramScopeConstantResolution::runOnModule(Module& M)
126126
continue;
127127
}
128128

129-
// Get the offset of this constant from the base.
130-
int offset = -1;
131-
132129
auto bufferOffset = modMD->inlineProgramScopeOffsets.find(pGlobalVar);
133-
if (bufferOffset != modMD->inlineProgramScopeOffsets.end())
134-
{
135-
offset = bufferOffset->second;
136-
}
130+
if (bufferOffset == modMD->inlineProgramScopeOffsets.end())
131+
continue; // This constant is not used, so it didn't get an offset.
137132

138-
// This constant is not used, so it didn't get an offset.
139-
if (offset == -1)
140-
{
141-
continue;
142-
}
133+
// Get the offset of this constant from the base.
134+
int64_t offset = bufferOffset->second;
135+
ConstantInt* pOffset = ConstantInt::get(Type::getInt64Ty(C), offset);
143136

144-
ConstantInt* pOffset = ConstantInt::get(Type::getInt32Ty(C), offset);
145137
const ImplicitArg::ArgType argType =
146138
AS == ADDRESS_SPACE_GLOBAL ? ImplicitArg::GLOBAL_BASE : ImplicitArg::CONSTANT_BASE;
147139

IGC/Compiler/tests/ProgramScopeConstantAnalysis/basic.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ entry:
6666
; CHECK-DAG: [[SYMBOL]] = !{!"Symbol", !"a"}
6767
; CHECK-DAG: !{!"inlineProgramScopeOffsets", [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP0:![0-9]*]], [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE0:![0-9]*]], [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP1:![0-9]*]], [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE1:![0-9]*]], [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP2:![0-9]*]], [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE2:![0-9]*]]}
6868
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP0]] = !{!"inlineProgramScopeOffsetsMap[0]", [2 x i32] addrspace(2)* @a}
69-
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE0]] = !{!"inlineProgramScopeOffsetsValue[0]", i32 0}
69+
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE0]] = !{!"inlineProgramScopeOffsetsValue[0]", i64 0}
7070
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP1]] = !{!"inlineProgramScopeOffsetsMap[1]", i32 addrspace(2)* addrspace(1)* @d}
71-
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE1]] = !{!"inlineProgramScopeOffsetsValue[1]", i32 0}
71+
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE1]] = !{!"inlineProgramScopeOffsetsValue[1]", i64 0}
7272
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_MAP2]] = !{!"inlineProgramScopeOffsetsMap[2]", i32 addrspace(1)* @c}
73-
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE2]] = !{!"inlineProgramScopeOffsetsValue[2]", i32 8}
73+
; CHECK-DAG: [[INLINE_PROGRAM_SCOPE_OFFSETS_VALUE2]] = !{!"inlineProgramScopeOffsetsValue[2]", i64 8}

IGC/Compiler/tests/ProgramScopeConstantResolution/basic.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
define spir_kernel void @test_program(i32 addrspace(1)* %dst, <8 x i32> %r0, <8 x i32> %payloadHeader, i8 addrspace(2)* %constBase, i8 addrspace(1)* %globalBase, i8* %privateBase, i32 %bufferOffset) #0 {
3030
; CHECK-LABEL: @test_program(
3131
; CHECK: entry:
32-
; CHECK: [[OFFC:%.*]] = getelementptr i8, i8 addrspace(1)* %globalBase, i32 8
32+
; CHECK: [[OFFC:%.*]] = getelementptr i8, i8 addrspace(1)* %globalBase, i64 8
3333
; CHECK: [[CASTC:%.*]] = bitcast i8 addrspace(1)* [[OFFC]] to i32 addrspace(1)*
34-
; CHECK: [[OFFD:%.*]] = getelementptr i8, i8 addrspace(1)* %globalBase, i32 0
34+
; CHECK: [[OFFD:%.*]] = getelementptr i8, i8 addrspace(1)* %globalBase, i64 0
3535
; CHECK: [[CASTD:%.*]] = bitcast i8 addrspace(1)* [[OFFD]] to i32 addrspace(2)* addrspace(1)*
36-
; CHECK: [[OFFA:%.*]] = getelementptr i8, i8 addrspace(2)* %constBase, i32 0
36+
; CHECK: [[OFFA:%.*]] = getelementptr i8, i8 addrspace(2)* %constBase, i64 0
3737
; CHECK: [[CASTA:%.*]] = bitcast i8 addrspace(2)* [[OFFA]] to [2 x i32] addrspace(2)*
3838
; CHECK: [[DST_ADDR:%.*]] = alloca i32 addrspace(1)*, align 8
3939
; CHECK: [[AA:%.*]] = alloca i32, align 4

IGC/common/MDFrameWork.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ namespace IGC
692692
std::vector<uint32_t> RasterizerOrderedByteAddressBuffer;
693693
std::set<uint32_t> RasterizerOrderedViews;
694694
unsigned int MinNOSPushConstantSize = 0;
695-
llvm::MapVector<llvm::GlobalVariable*, int> inlineProgramScopeOffsets;
695+
llvm::MapVector<llvm::GlobalVariable*, uint64_t> inlineProgramScopeOffsets;
696696
ShaderData shaderData;
697697
URBLayoutInfo URBInfo;
698698
bool UseBindlessImage = false;

0 commit comments

Comments
 (0)