Skip to content

Commit 49fed10

Browse files
Kotynia, Piotrigcbot
authored andcommitted
Disable EnableWriteOldFPToStack by default
Disable EnableWriteOldFPToStack by default, since it is no longer needed for stack walk debug. This change is based on dlei6g proposition. Debug patch applied. Before this patch frontend framepointer was saved at the beginning of the stack call. It is not needed anymore so it is redundant and affecting performance. Debug info generation change is also needed to ensure proper DWARF location expression emitting.
1 parent df78e04 commit 49fed10

File tree

9 files changed

+14
-106
lines changed

9 files changed

+14
-106
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -10385,14 +10385,6 @@ void EmitPass::emitStackAlloca(GenIntrinsicInst* GII)
1038510385
{
1038610386
// Static private mem access is done through the FP
1038710387
CVariable* pFP = m_currShader->GetFP();
10388-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
10389-
{
10390-
// If we have written the previous FP to the current frame's start, the start of
10391-
// private memory will be offset by 16 bytes
10392-
CVariable* tempFP = m_currShader->GetNewVariable(pFP);
10393-
emitAddPointer(tempFP, pFP, m_currShader->ImmToVariable(getFPOffset(), ISA_TYPE_UD));
10394-
pFP = tempFP;
10395-
}
1039610388
CVariable* pOffset = m_currShader->GetSymbol(GII->getOperand(0));
1039710389
emitAddPointer(m_destination, pFP, pOffset);
1039810390
}
@@ -18695,55 +18687,13 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1869518687
m_encoder->Copy(pFP, pSP);
1869618688
m_encoder->Push();
1869718689

18698-
// Allocate 1 extra oword to store previous frame's FP
18699-
pushSize += IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack) ? SIZE_OWORD : 0;
18700-
1870118690
// Since we use unaligned oword writes, pushSize should be OW aligned address
1870218691
pushSize = int_cast<unsigned>(llvm::alignTo(pushSize, SIZE_OWORD));
1870318692

1870418693
if (pushSize != 0)
1870518694
{
1870618695
// Update SP by pushSize
1870718696
emitAddPointer(pSP, pSP, m_currShader->ImmToVariable(pushSize, ISA_TYPE_UD));
18708-
18709-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
18710-
{
18711-
// Store old FP value to current FP
18712-
CVariable* pOldFP = m_currShader->GetPrevFP();
18713-
// If previous FP is null (for kernel frame), we initialize it to 0
18714-
if (pOldFP == nullptr)
18715-
{
18716-
pOldFP = m_currShader->GetNewVariable(pFP);
18717-
m_encoder->Copy(pOldFP, m_currShader->ImmToVariable(0, pOldFP->GetType()));
18718-
m_encoder->Push();
18719-
}
18720-
18721-
pFP = ReAlignUniformVariable(pFP, EALIGN_GRF);
18722-
bool useA64 = (pFP->GetSize() == 8);
18723-
if (shouldGenerateLSC())
18724-
{
18725-
ResourceDescriptor resource;
18726-
resource.m_surfaceType = ESURFACE_STATELESS;
18727-
emitLSCStore(nullptr, pOldFP, pFP, 64, 1, 0, &resource, (useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b), LSC_DATA_ORDER_TRANSPOSE, 0, 1);
18728-
m_encoder->Push();
18729-
}
18730-
else
18731-
{
18732-
if (useA64)
18733-
m_encoder->OWStoreA64(pOldFP, pFP, SIZE_OWORD, 0);
18734-
else {
18735-
// FP is in units of BYTES, but OWStore requires units of OWORDS
18736-
CVariable* offsetShr = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, "FPOffset_OW");
18737-
m_encoder->SetSimdSize(SIMDMode::SIMD1);
18738-
m_encoder->SetNoMask();
18739-
m_encoder->SetSrcRegion(0, 0, 1, 0);
18740-
m_encoder->Shr(offsetShr, pFP, m_currShader->ImmToVariable(4, ISA_TYPE_UD));
18741-
m_encoder->Push();
18742-
m_encoder->OWStore(pOldFP, ESURFACE_STATELESS, nullptr, offsetShr, SIZE_OWORD, 0);
18743-
}
18744-
m_encoder->Push();
18745-
}
18746-
}
1874718697
}
1874818698
}
1874918699

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,6 @@ class EmitPass : public llvm::FunctionPass
257257
void emitFlushSamplerCache();
258258
void emitSurfaceInfo(llvm::GenIntrinsicInst* intrinsic);
259259

260-
static uint64_t getFPOffset() { return SIZE_OWORD; }
261260
void emitStackAlloca(llvm::GenIntrinsicInst* intrinsic);
262261
void emitVLAStackAlloca(llvm::GenIntrinsicInst* intrinsic);
263262

IGC/Compiler/DebugInfo/ScalarVISAModule.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,6 @@ llvm::StringRef ScalarVisaModule::GetVISAFuncName() const
135135
// matches that used by VISA.
136136
return getFunction()->getName();
137137
}
138-
uint64_t ScalarVisaModule::getFPOffset() const {
139-
return EmitPass::getFPOffset();
140-
}
141138

142139
bool ScalarVisaModule::usesSlot1ScratchSpill() const {
143140
return m_pShader->ProgramOutput()->getScratchSpaceUsageInSlot1();

IGC/Compiler/DebugInfo/ScalarVISAModule.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ class ScalarVisaModule final : public IGC::VISAModule {
102102
bool hasPTO() const override {
103103
return getPerThreadOffset() != nullptr;
104104
}
105-
uint64_t getFPOffset() const override;
106105
int getPTOReg() const override;
107106
int getFPReg() const override;
108107

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,6 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
251251
return 0;
252252

253253
uint32_t currFuncPrivateMem = (uint32_t)(funcIt->second.privateMemoryPerWI);
254-
// Add 1 OWORD for FP stack write
255-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
256-
currFuncPrivateMem += uint32_t(EmitPass::getFPOffset());
257254

258255
CallGraphNode* Node = CG[F];
259256

@@ -799,26 +796,6 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
799796

800797
if (privateOnStack)
801798
{
802-
// If the private memory is on the stack there may be a situation where
803-
// some extra data is placed at the beginning of stack frame (e.g. prev FP).
804-
// In that case, allocas' alignment may not be satisfied. To prevent this,
805-
// a padding is added between that extra data and the private memory.
806-
unsigned int allocasExtraOffset = 0;
807-
unsigned int padding = 0;
808-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
809-
{
810-
allocasExtraOffset += uint32_t(EmitPass::getFPOffset());
811-
}
812-
813-
if (allocasExtraOffset > 0)
814-
{
815-
alignment_t privateMemoryAlignment = m_ModAllocaInfo->getPrivateMemAlignment(m_currFunction);
816-
padding = iSTD::Align(allocasExtraOffset, size_t(privateMemoryAlignment)) - allocasExtraOffset;
817-
}
818-
819-
modMD->FuncMD[m_currFunction].privateMemoryPerWI += padding;
820-
modMD->privateMemoryPerWI += padding;//redundant ?
821-
822799
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
823800
Instruction* simdLaneId16 = entryBuilder.CreateCall(simdLaneIdFunc, llvm::None, VALUE_NAME("simdLaneId16"));
824801
Value* simdLaneId = entryBuilder.CreateIntCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"));
@@ -857,10 +834,6 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
857834
Value* increment = isUniform ? builder.getInt32(0) : simdLaneId;
858835
Value* perLaneOffset = builder.CreateMul(increment, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
859836
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
860-
if (padding > 0)
861-
{
862-
totalOffset = builder.CreateAdd(totalOffset, ConstantInt::get(typeInt32, padding), VALUE_NAME(pAI->getName() + ".totalOffsetWithPadding"));
863-
}
864837
Function* stackAllocaFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_StackAlloca);
865838
Value* stackAlloca = builder.CreateCall(stackAllocaFunc, totalOffset, VALUE_NAME("stackAlloca"));
866839
privateBuffer = builder.CreatePointerCast(stackAlloca, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));

IGC/DebugInfo/DwarfCompileUnit.cpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,12 +1276,13 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
12761276
// There is a private value in the current stack frame.
12771277
// Location encoding is similar to a global variable except SIMD lane
12781278
// location encoding and storage size connected to this, because since it is
1279-
// uniform we assume this size to be 0. 1 DW_OP_regx <Frame Pointer reg
1280-
// encoded> 2 DW_OP_const1u <bit-offset to Frame Pointer reg> 3
1281-
// DW_OP_const1u 64 , i.e. size in bits 4 DW_OP_INTEL_push_bit_piece_stack
1282-
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
1283-
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
1284-
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset
1279+
// uniform we assume this size to be 0.
1280+
// 1 DW_OP_regx <Frame Pointer reg encoded>
1281+
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
1282+
// 3 DW_OP_const1u 64, i.e. size in bits
1283+
// 4 DW_OP_INTEL_push_bit_piece_stack
1284+
// 6 DW_OP_plus_uconst storageOffset
1285+
// MD: StorageOffset -> the offset
12851286
// where each variable is stored in the current stack frame
12861287

12871288
const auto *VISAMod = Loc.GetVISAModule();
@@ -1322,11 +1323,6 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
13221323
// Frame Pointer reg>
13231324
extractSubRegValue(Block, 64);
13241325

1325-
addUInt(Block, dwarf::DW_FORM_data1,
1326-
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
1327-
// from getFPOffset())
1328-
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
1329-
13301326
addUInt(Block, dwarf::DW_FORM_data1,
13311327
dwarf::DW_OP_plus_uconst); // 6 DW_OP_plus_uconst storageOffset
13321328
addUInt(Block, dwarf::DW_FORM_udata, storageOffset); // storageOffset
@@ -2829,11 +2825,13 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28292825
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
28302826
// 3 DW_OP_const1u 64 , i.e. size in bits
28312827
// 4 DW_OP_INTEL_push_bit_piece_stack
2832-
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
2833-
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
2834-
// DW_OP_push_simd_lane 7 DW_OP_const1u/2u/4u/8u storageSize // MD:
2835-
// StorageSize; the size of the variable 8 DW_OP_mul 9 DW_OP_plus 10
2836-
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset where
2828+
// 5 DW_OP_push_simd_lane
2829+
// 6 DW_OP_const1u/2u/4u/8u storageSize
2830+
// MD: StorageSize -> the size of the variable
2831+
// 7 DW_OP_mul
2832+
// 8 DW_OP_plus
2833+
// 9 DW_OP_plus_uconst storageOffset
2834+
// MD: StorageOffset -> the offset where
28372835
// each variable is stored in the current stack frame
28382836

28392837
auto regNumFP = VISAMod->getFPReg();
@@ -2861,11 +2859,6 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28612859
bitOffsetToFPReg); // 2 DW_OP_const1u/2u <bit-offset to Frame Pointer reg>
28622860
extractSubRegValue(Block, 64);
28632861

2864-
addUInt(Block, dwarf::DW_FORM_data1,
2865-
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
2866-
// from getFPOffset())
2867-
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
2868-
28692862
addUInt(Block, dwarf::DW_FORM_data1,
28702863
DW_OP_INTEL_push_simd_lane); // 6 DW_OP_INTEL_push_simd_lane
28712864
addConstantUValue(Block, storageSize); // 7 DW_OP_const1u/2u/4u/8u storageSize

IGC/DebugInfo/VISAModule.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,6 @@ class VISAModule {
549549
virtual bool hasPTO() const = 0;
550550
virtual int getPTOReg() const = 0;
551551
virtual int getFPReg() const = 0;
552-
virtual uint64_t getFPOffset() const = 0;
553552
virtual bool usesSlot1ScratchSpill() const = 0;
554553

555554
virtual llvm::ArrayRef<char> getGenDebug() const = 0;

IGC/VectorCompiler/lib/GenXCodeGen/GenXDebugInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,6 @@ class GenXFunction final : public IGC::VISAModule {
10551055
bool hasPTO() const override { return false; }
10561056
int getPTOReg() const override { return -1; }
10571057
int getFPReg() const override { return -1; }
1058-
uint64_t getFPOffset() const override { return 16; }
10591058

10601059
bool usesSlot1ScratchSpill() const override { return false; }
10611060

IGC/common/igc_flags.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,6 @@ DECLARE_IGC_REGKEY(bool, AvoidUsingR0R1, false, "Do not use r0 an
394394
DECLARE_IGC_REGKEY(bool, EnableGTLocationDebugging, true, "Setting this to 1 (true) enables GT location expression emission for GPU debugger", true)
395395
DECLARE_IGC_REGKEY(bool, UseOffsetInLocation, true, "Setting this to 1 (true) preserves private base and per thread offset and removes preservation of any other debug variables", true)
396396
DECLARE_IGC_REGKEY(bool, EnableRelocations, false, "Setting this to 1 (true) makes IGC emit relocatable ELF with debug info", true)
397-
DECLARE_IGC_REGKEY(bool, EnableWriteOldFPToStack, true, "Setting this to 1 (true) writes the caller frame's frame-pointer to the start of callee's frame on stack, to support stack walk", false)
398397
DECLARE_IGC_REGKEY(bool, ZeBinCompatibleDebugging, true, "Setting this to 1 (true) enables embed debug info in zeBinary", true)
399398
DECLARE_IGC_REGKEY(bool, DebugInfoEnforceAmd64EM, false, "Enforces elf file with the debug infomation to have eMachine set to AMD64", false)
400399
DECLARE_IGC_REGKEY(bool, DebugInfoValidation, false, "Enable optional (strict) checks to detect debug information inconsistencies", false)

0 commit comments

Comments
 (0)