Skip to content

Commit 7ea2122

Browse files
Kotynia, PiotrArtem Gindinson
authored andcommitted
Disable EnableWriteOldFPToStack by default
Disable EnableWriteOldFPToStack by default, since it is no longer needed for stack walk debug. This change is based on dlei6g proposition. Debug patch applied. Before this patch frontend framepointer was saved at the beginning of the stack call. It is not needed anymore so it is redundant and affecting performance. Debug info generation change is also needed to ensure proper DWARF location expression emitting. (cherry picked from commit 49fed10)
1 parent 1afbede commit 7ea2122

File tree

9 files changed

+14
-106
lines changed

9 files changed

+14
-106
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -10386,14 +10386,6 @@ void EmitPass::emitStackAlloca(GenIntrinsicInst* GII)
1038610386
{
1038710387
// Static private mem access is done through the FP
1038810388
CVariable* pFP = m_currShader->GetFP();
10389-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
10390-
{
10391-
// If we have written the previous FP to the current frame's start, the start of
10392-
// private memory will be offset by 16 bytes
10393-
CVariable* tempFP = m_currShader->GetNewVariable(pFP);
10394-
emitAddPointer(tempFP, pFP, m_currShader->ImmToVariable(getFPOffset(), ISA_TYPE_UD));
10395-
pFP = tempFP;
10396-
}
1039710389
CVariable* pOffset = m_currShader->GetSymbol(GII->getOperand(0));
1039810390
emitAddPointer(m_destination, pFP, pOffset);
1039910391
}
@@ -18611,55 +18603,13 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1861118603
m_encoder->Copy(pFP, pSP);
1861218604
m_encoder->Push();
1861318605

18614-
// Allocate 1 extra oword to store previous frame's FP
18615-
pushSize += IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack) ? SIZE_OWORD : 0;
18616-
1861718606
// Since we use unaligned oword writes, pushSize should be OW aligned address
1861818607
pushSize = int_cast<unsigned>(llvm::alignTo(pushSize, SIZE_OWORD));
1861918608

1862018609
if (pushSize != 0)
1862118610
{
1862218611
// Update SP by pushSize
1862318612
emitAddPointer(pSP, pSP, m_currShader->ImmToVariable(pushSize, ISA_TYPE_UD));
18624-
18625-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
18626-
{
18627-
// Store old FP value to current FP
18628-
CVariable* pOldFP = m_currShader->GetPrevFP();
18629-
// If previous FP is null (for kernel frame), we initialize it to 0
18630-
if (pOldFP == nullptr)
18631-
{
18632-
pOldFP = m_currShader->GetNewVariable(pFP);
18633-
m_encoder->Copy(pOldFP, m_currShader->ImmToVariable(0, pOldFP->GetType()));
18634-
m_encoder->Push();
18635-
}
18636-
18637-
pFP = ReAlignUniformVariable(pFP, EALIGN_GRF);
18638-
bool useA64 = (pFP->GetSize() == 8);
18639-
if (shouldGenerateLSC())
18640-
{
18641-
ResourceDescriptor resource;
18642-
resource.m_surfaceType = ESURFACE_STATELESS;
18643-
emitLSCStore(nullptr, pOldFP, pFP, 64, 1, 0, &resource, (useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b), LSC_DATA_ORDER_TRANSPOSE, 0);
18644-
m_encoder->Push();
18645-
}
18646-
else
18647-
{
18648-
if (useA64)
18649-
m_encoder->OWStoreA64(pOldFP, pFP, SIZE_OWORD, 0);
18650-
else {
18651-
// FP is in units of BYTES, but OWStore requires units of OWORDS
18652-
CVariable* offsetShr = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, "FPOffset_OW");
18653-
m_encoder->SetSimdSize(SIMDMode::SIMD1);
18654-
m_encoder->SetNoMask();
18655-
m_encoder->SetSrcRegion(0, 0, 1, 0);
18656-
m_encoder->Shr(offsetShr, pFP, m_currShader->ImmToVariable(4, ISA_TYPE_UD));
18657-
m_encoder->Push();
18658-
m_encoder->OWStore(pOldFP, ESURFACE_STATELESS, nullptr, offsetShr, SIZE_OWORD, 0);
18659-
}
18660-
m_encoder->Push();
18661-
}
18662-
}
1866318613
}
1866418614
}
1866518615

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ class EmitPass : public llvm::FunctionPass
256256
void emitFlushSamplerCache();
257257
void emitSurfaceInfo(llvm::GenIntrinsicInst* intrinsic);
258258

259-
static uint64_t getFPOffset() { return SIZE_OWORD; }
260259
void emitStackAlloca(llvm::GenIntrinsicInst* intrinsic);
261260
void emitVLAStackAlloca(llvm::GenIntrinsicInst* intrinsic);
262261

IGC/Compiler/DebugInfo/ScalarVISAModule.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,6 @@ llvm::StringRef ScalarVisaModule::GetVISAFuncName() const
135135
// matches that used by VISA.
136136
return getFunction()->getName();
137137
}
138-
uint64_t ScalarVisaModule::getFPOffset() const {
139-
return EmitPass::getFPOffset();
140-
}
141138

142139
bool ScalarVisaModule::usesSlot1ScratchSpill() const {
143140
return m_pShader->ProgramOutput()->getScratchSpaceUsageInSlot1();

IGC/Compiler/DebugInfo/ScalarVISAModule.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ class ScalarVisaModule final : public IGC::VISAModule {
102102
bool hasPTO() const override {
103103
return getPerThreadOffset() != nullptr;
104104
}
105-
uint64_t getFPOffset() const override;
106105
int getPTOReg() const override;
107106
int getFPReg() const override;
108107

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,6 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
240240
return 0;
241241

242242
uint32_t currFuncPrivateMem = (uint32_t)(funcIt->second.privateMemoryPerWI);
243-
// Add 1 OWORD for FP stack write
244-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
245-
currFuncPrivateMem += uint32_t(EmitPass::getFPOffset());
246243

247244
CallGraphNode* Node = CG[F];
248245

@@ -779,26 +776,6 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
779776

780777
if (privateOnStack)
781778
{
782-
// If the private memory is on the stack there may be a situation where
783-
// some extra data is placed at the beginning of stack frame (e.g. prev FP).
784-
// In that case, allocas' alignment may not be satisfied. To prevent this,
785-
// a padding is added between that extra data and the private memory.
786-
unsigned int allocasExtraOffset = 0;
787-
unsigned int padding = 0;
788-
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
789-
{
790-
allocasExtraOffset += uint32_t(EmitPass::getFPOffset());
791-
}
792-
793-
if (allocasExtraOffset > 0)
794-
{
795-
alignment_t privateMemoryAlignment = m_ModAllocaInfo->getPrivateMemAlignment(m_currFunction);
796-
padding = iSTD::Align(allocasExtraOffset, size_t(privateMemoryAlignment)) - allocasExtraOffset;
797-
}
798-
799-
modMD->FuncMD[m_currFunction].privateMemoryPerWI += padding;
800-
modMD->privateMemoryPerWI += padding;//redundant ?
801-
802779
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
803780
Instruction* simdLaneId16 = entryBuilder.CreateCall(simdLaneIdFunc, llvm::None, VALUE_NAME("simdLaneId16"));
804781
Value* simdLaneId = entryBuilder.CreateIntCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"));
@@ -837,10 +814,6 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
837814
Value* increment = isUniform ? builder.getInt32(0) : simdLaneId;
838815
Value* perLaneOffset = builder.CreateMul(increment, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
839816
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
840-
if (padding > 0)
841-
{
842-
totalOffset = builder.CreateAdd(totalOffset, ConstantInt::get(typeInt32, padding), VALUE_NAME(pAI->getName() + ".totalOffsetWithPadding"));
843-
}
844817
Function* stackAllocaFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_StackAlloca);
845818
Value* stackAlloca = builder.CreateCall(stackAllocaFunc, totalOffset, VALUE_NAME("stackAlloca"));
846819
privateBuffer = builder.CreatePointerCast(stackAlloca, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));

IGC/DebugInfo/DwarfCompileUnit.cpp

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,12 +1275,13 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
12751275
// There is a private value in the current stack frame.
12761276
// Location encoding is similar to a global variable except SIMD lane
12771277
// location encoding and storage size connected to this, because since it is
1278-
// uniform we assume this size to be 0. 1 DW_OP_regx <Frame Pointer reg
1279-
// encoded> 2 DW_OP_const1u <bit-offset to Frame Pointer reg> 3
1280-
// DW_OP_const1u 64 , i.e. size in bits 4 DW_OP_INTEL_push_bit_piece_stack
1281-
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
1282-
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
1283-
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset
1278+
// uniform we assume this size to be 0.
1279+
// 1 DW_OP_regx <Frame Pointer reg encoded>
1280+
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
1281+
// 3 DW_OP_const1u 64, i.e. size in bits
1282+
// 4 DW_OP_INTEL_push_bit_piece_stack
1283+
// 6 DW_OP_plus_uconst storageOffset
1284+
// MD: StorageOffset -> the offset
12841285
// where each variable is stored in the current stack frame
12851286

12861287
const auto *VISAMod = Loc.GetVISAModule();
@@ -1321,11 +1322,6 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
13211322
// Frame Pointer reg>
13221323
extractSubRegValue(Block, 64);
13231324

1324-
addUInt(Block, dwarf::DW_FORM_data1,
1325-
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
1326-
// from getFPOffset())
1327-
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
1328-
13291325
addUInt(Block, dwarf::DW_FORM_data1,
13301326
dwarf::DW_OP_plus_uconst); // 6 DW_OP_plus_uconst storageOffset
13311327
addUInt(Block, dwarf::DW_FORM_udata, storageOffset); // storageOffset
@@ -2809,11 +2805,13 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28092805
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
28102806
// 3 DW_OP_const1u 64 , i.e. size in bits
28112807
// 4 DW_OP_INTEL_push_bit_piece_stack
2812-
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
2813-
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
2814-
// DW_OP_push_simd_lane 7 DW_OP_const1u/2u/4u/8u storageSize // MD:
2815-
// StorageSize; the size of the variable 8 DW_OP_mul 9 DW_OP_plus 10
2816-
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset where
2808+
// 5 DW_OP_push_simd_lane
2809+
// 6 DW_OP_const1u/2u/4u/8u storageSize
2810+
// MD: StorageSize -> the size of the variable
2811+
// 7 DW_OP_mul
2812+
// 8 DW_OP_plus
2813+
// 9 DW_OP_plus_uconst storageOffset
2814+
// MD: StorageOffset -> the offset where
28172815
// each variable is stored in the current stack frame
28182816

28192817
auto regNumFP = VISAMod->getFPReg();
@@ -2841,11 +2839,6 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28412839
bitOffsetToFPReg); // 2 DW_OP_const1u/2u <bit-offset to Frame Pointer reg>
28422840
extractSubRegValue(Block, 64);
28432841

2844-
addUInt(Block, dwarf::DW_FORM_data1,
2845-
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
2846-
// from getFPOffset())
2847-
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
2848-
28492842
addUInt(Block, dwarf::DW_FORM_data1,
28502843
DW_OP_INTEL_push_simd_lane); // 6 DW_OP_INTEL_push_simd_lane
28512844
addConstantUValue(Block, storageSize); // 7 DW_OP_const1u/2u/4u/8u storageSize

IGC/DebugInfo/VISAModule.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,6 @@ class VISAModule {
549549
virtual bool hasPTO() const = 0;
550550
virtual int getPTOReg() const = 0;
551551
virtual int getFPReg() const = 0;
552-
virtual uint64_t getFPOffset() const = 0;
553552
virtual bool usesSlot1ScratchSpill() const = 0;
554553

555554
virtual llvm::ArrayRef<char> getGenDebug() const = 0;

IGC/VectorCompiler/lib/GenXCodeGen/GenXDebugInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,6 @@ class GenXFunction final : public IGC::VISAModule {
10551055
bool hasPTO() const override { return false; }
10561056
int getPTOReg() const override { return -1; }
10571057
int getFPReg() const override { return -1; }
1058-
uint64_t getFPOffset() const override { return 16; }
10591058

10601059
bool usesSlot1ScratchSpill() const override { return false; }
10611060

IGC/common/igc_flags.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,6 @@ DECLARE_IGC_REGKEY(bool, AvoidUsingR0R1, false, "Do not use r0 an
383383
DECLARE_IGC_REGKEY(bool, EnableGTLocationDebugging, true, "Setting this to 1 (true) enables GT location expression emission for GPU debugger", true)
384384
DECLARE_IGC_REGKEY(bool, UseOffsetInLocation, true, "Setting this to 1 (true) preserves private base and per thread offset and removes preservation of any other debug variables", true)
385385
DECLARE_IGC_REGKEY(bool, EnableRelocations, false, "Setting this to 1 (true) makes IGC emit relocatable ELF with debug info", true)
386-
DECLARE_IGC_REGKEY(bool, EnableWriteOldFPToStack, true, "Setting this to 1 (true) writes the caller frame's frame-pointer to the start of callee's frame on stack, to support stack walk", false)
387386
DECLARE_IGC_REGKEY(bool, ZeBinCompatibleDebugging, true, "Setting this to 1 (true) enables embed debug info in zeBinary", true)
388387
DECLARE_IGC_REGKEY(bool, DebugInfoEnforceAmd64EM, false, "Enforces elf file with the debug infomation to have eMachine set to AMD64", false)
389388
DECLARE_IGC_REGKEY(bool, DebugInfoValidation, false, "Enable optional (strict) checks to detect debug information inconsistencies", false)

0 commit comments

Comments
 (0)