Skip to content

Commit e3cf0a2

Browse files
jgu222sys_zuul
authored and
sys_zuul
committed
No need to set CR for denorm and rounding mode separately.
This change combines denorms and rounding modes together and set it just once. No functional change expected. Change-Id: I877187f9c004172c491b12c47a447d187d61dcfe
1 parent 64ebb06 commit e3cf0a2

File tree

2 files changed

+126
-125
lines changed

2 files changed

+126
-125
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 125 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,37 +2738,46 @@ namespace IGC
27382738
V(vKernel->AppendVISACFRetInst(predOpnd, vISA_EMASK_M1, EXEC_SIZE_1));
27392739
}
27402740

2741-
void CEncoder::SetFloatDenormMode(VISAKernel* vKernel, Float_DenormMode mode16,
2742-
Float_DenormMode mode32, Float_DenormMode mode64)
2741+
// Init Control register for denorm modes, rounding modes, etc.
2742+
void CEncoder::initCR(VISAKernel* vKernel)
27432743
{
2744-
VISA_VectorOpnd* src0_Opnd = nullptr;
2745-
VISA_VectorOpnd* src1_Opnd = nullptr;
2746-
VISA_VectorOpnd* dst_Opnd = nullptr;
2747-
VISA_GenVar* cr0_var = nullptr;
2748-
uint imm_data = 0;
2749-
if (mode16 == FLOAT_DENORM_RETAIN)
2750-
imm_data |= 0x400;
2751-
if (mode32 == FLOAT_DENORM_RETAIN)
2752-
imm_data |= 0x80;
2753-
if (mode64 == FLOAT_DENORM_RETAIN)
2754-
imm_data |= 0x40;
2755-
// If we are in the default mode no need to set the CR
2744+
// Those bits must be zero'ed on entry to kernel/shader.
2745+
// (If not, this function needs to be changed accordingly.)
2746+
VISA_VectorOpnd* src0_Opnd = nullptr;
2747+
VISA_VectorOpnd* src1_Opnd = nullptr;
2748+
VISA_VectorOpnd* dst_Opnd = nullptr;
2749+
VISA_GenVar* cr0_var = nullptr;
2750+
uint imm_data = 0;
2751+
2752+
CodeGenContext* pCtx = m_program->GetContext();
2753+
if (pCtx->m_floatDenormMode16 == FLOAT_DENORM_RETAIN)
2754+
imm_data |= 0x400;
2755+
if (pCtx->m_floatDenormMode32 == FLOAT_DENORM_RETAIN)
2756+
imm_data |= 0x80;
2757+
if (pCtx->m_floatDenormMode64 == FLOAT_DENORM_RETAIN)
2758+
imm_data |= 0x40;
2759+
2760+
uint RM_bits = getEncoderRoundingMode(
2761+
static_cast<Float_RoundingMode>(pCtx->getModuleMetaData()->compOpt.FloatRoundingMode));
2762+
imm_data |= RM_bits;
2763+
2764+
// If we are in the default mode no need to set the CR
27562765
if (imm_data != 0)
2757-
{
2758-
V(vKernel->GetPredefinedVar(cr0_var, PREDEFINED_CR0));
2759-
V(vKernel->CreateVISASrcOperand(src0_Opnd, cr0_var, MODIFIER_NONE, 0, 1, 0, 0, 0));
2760-
V(vKernel->CreateVISAImmediate(src1_Opnd, &imm_data, ISA_TYPE_UD));
2761-
V(vKernel->CreateVISADstOperand(dst_Opnd, cr0_var, 1, 0, 0));
2762-
V(vKernel->AppendVISAArithmeticInst(
2763-
ISA_OR,
2764-
nullptr,
2765-
false,
2766-
vISA_EMASK_M1_NM,
2767-
EXEC_SIZE_1,
2768-
dst_Opnd,
2769-
src0_Opnd,
2770-
src1_Opnd));
2771-
}
2766+
{
2767+
V(vKernel->GetPredefinedVar(cr0_var, PREDEFINED_CR0));
2768+
V(vKernel->CreateVISASrcOperand(src0_Opnd, cr0_var, MODIFIER_NONE, 0, 1, 0, 0, 0));
2769+
V(vKernel->CreateVISAImmediate(src1_Opnd, &imm_data, ISA_TYPE_UD));
2770+
V(vKernel->CreateVISADstOperand(dst_Opnd, cr0_var, 1, 0, 0));
2771+
V(vKernel->AppendVISAArithmeticInst(
2772+
ISA_OR,
2773+
nullptr,
2774+
false,
2775+
vISA_EMASK_M1_NM,
2776+
EXEC_SIZE_1,
2777+
dst_Opnd,
2778+
src0_Opnd,
2779+
src1_Opnd));
2780+
}
27722781
}
27732782

27742783
void CEncoder::SetVectorMask(bool VMask)
@@ -4192,126 +4201,119 @@ namespace IGC
41924201

41934202
void CEncoder::InitEncoder(bool canAbortOnSpill, bool hasStackCall)
41944203
{
4195-
m_aliasesMap.clear();
4196-
m_encoderState.m_SubSpanDestination = false;
4197-
CodeGenContext* context = m_program->GetContext();
4198-
m_encoderState.m_secondHalf = false;
4199-
m_enableVISAdump = false;
4200-
labelMap.clear();
4201-
labelMap.resize(m_program->entry->size(), nullptr);
4202-
labelCounter = 0;
4203-
m_hasInlineAsm = context->m_DriverInfo.SupportInlineAssembly() && context->m_instrTypes.hasInlineAsm;
4204-
4205-
vbuilder = nullptr;
4206-
vAsmTextBuilder = nullptr;
4207-
TARGET_PLATFORM VISAPlatform = GetVISAPlatform(&(context->platform));
4204+
m_aliasesMap.clear();
4205+
m_encoderState.m_SubSpanDestination = false;
4206+
CodeGenContext* context = m_program->GetContext();
4207+
m_encoderState.m_secondHalf = false;
4208+
m_enableVISAdump = false;
4209+
labelMap.clear();
4210+
labelMap.resize(m_program->entry->size(), nullptr);
4211+
labelCounter = 0;
4212+
m_hasInlineAsm = context->m_DriverInfo.SupportInlineAssembly() && context->m_instrTypes.hasInlineAsm;
4213+
4214+
vbuilder = nullptr;
4215+
vAsmTextBuilder = nullptr;
4216+
TARGET_PLATFORM VISAPlatform = GetVISAPlatform(&(context->platform));
42084217

4209-
SetVISAWaTable(m_program->m_Platform->getWATable());
4218+
SetVISAWaTable(m_program->m_Platform->getWATable());
42104219

4211-
llvm::SmallVector<const char*, 10> params;
4212-
if (!m_hasInlineAsm)
4213-
{
4214-
// Asm text writer mode doesnt need dump params
4215-
InitBuildParams(params);
4216-
}
4220+
llvm::SmallVector<const char*, 10> params;
4221+
if (!m_hasInlineAsm)
4222+
{
4223+
// Asm text writer mode doesnt need dump params
4224+
InitBuildParams(params);
4225+
}
42174226

4218-
COMPILER_TIME_START(m_program->GetContext(), TIME_CG_vISACompile);
4219-
bool enableVISADump = IGC_IS_FLAG_ENABLED(EnableVISASlowpath) || IGC_IS_FLAG_ENABLED(ShaderDumpEnable);
4220-
auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4221-
auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4222-
V(CreateVISABuilder(vbuilder, builderMode, builderOpt, VISAPlatform, params.size(), params.data(), &m_WaTable));
4227+
COMPILER_TIME_START(m_program->GetContext(), TIME_CG_vISACompile);
4228+
bool enableVISADump = IGC_IS_FLAG_ENABLED(EnableVISASlowpath) || IGC_IS_FLAG_ENABLED(ShaderDumpEnable);
4229+
auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4230+
auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4231+
V(CreateVISABuilder(vbuilder, builderMode, builderOpt, VISAPlatform, params.size(), params.data(), &m_WaTable));
42234232

4224-
InitVISABuilderOptions(VISAPlatform, canAbortOnSpill, hasStackCall);
4233+
InitVISABuilderOptions(VISAPlatform, canAbortOnSpill, hasStackCall);
42254234

4226-
// Pass all build options to builder
4227-
SetBuilderOptions(vbuilder);
4235+
// Pass all build options to builder
4236+
SetBuilderOptions(vbuilder);
42284237

4229-
vKernel = nullptr;
4238+
vKernel = nullptr;
42304239

4231-
std::string kernelName = m_program->entry->getName();
4232-
if (context->m_instrTypes.hasDebugInfo)
4233-
{
4234-
// This metadata node is added by TransformBlocks pass for device side
4235-
// enqueue feature of OCL2.0+.
4236-
// The problem is that for device side enqueue, kernel name used in
4237-
// IGC differs the one used to create JIT kernel. This leads to different
4238-
// kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4239-
// merge the two together. With this metadata node we create a mapping
4240-
// between the two names and when debug info is enabled, make JIT use
4241-
// same name as IGC.
4242-
// Names earlier -
4243-
// ParentKernel_dispatch_0 in dbg and
4244-
// __ParentKernel_block_invoke in elf
4245-
// when kernel name is ParentKernel
4246-
//
4247-
auto md = m_program->entry->getParent()->getNamedMetadata("igc.device.enqueue");
4248-
if (md)
4240+
std::string kernelName = m_program->entry->getName();
4241+
if (context->m_instrTypes.hasDebugInfo)
42494242
{
4250-
for (unsigned int i = 0; i < md->getNumOperands(); i++)
4243+
// This metadata node is added by TransformBlocks pass for device side
4244+
// enqueue feature of OCL2.0+.
4245+
// The problem is that for device side enqueue, kernel name used in
4246+
// IGC differs the one used to create JIT kernel. This leads to different
4247+
// kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4248+
// merge the two together. With this metadata node we create a mapping
4249+
// between the two names and when debug info is enabled, make JIT use
4250+
// same name as IGC.
4251+
// Names earlier -
4252+
// ParentKernel_dispatch_0 in dbg and
4253+
// __ParentKernel_block_invoke in elf
4254+
// when kernel name is ParentKernel
4255+
//
4256+
auto md = m_program->entry->getParent()->getNamedMetadata("igc.device.enqueue");
4257+
if (md)
42514258
{
4252-
auto mdOpnd = md->getOperand(i);
4253-
auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand(1));
4254-
if (first &&
4255-
first->getString().equals(kernelName))
4259+
for (unsigned int i = 0; i < md->getNumOperands(); i++)
42564260
{
4257-
auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand(0));
4258-
if (second)
4261+
auto mdOpnd = md->getOperand(i);
4262+
auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand(1));
4263+
if (first &&
4264+
first->getString().equals(kernelName))
42594265
{
4260-
kernelName = second->getString();
4266+
auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand(0));
4267+
if (second)
4268+
{
4269+
kernelName = second->getString();
4270+
}
42614271
}
42624272
}
42634273
}
42644274
}
4265-
}
42664275

4267-
std::string asmName;
4268-
if (m_enableVISAdump || context->m_instrTypes.hasDebugInfo)
4269-
{
4270-
// vISA does not support string of length >= 255. Truncate if this exceeds
4271-
// the limit. Note that vISA may append an extension, so relax it to a
4272-
// random number 240 here.
4273-
const int MAX_VISA_STRING_LENGTH = 240;
4274-
if (kernelName.size() >= MAX_VISA_STRING_LENGTH)
4276+
std::string asmName;
4277+
if (m_enableVISAdump || context->m_instrTypes.hasDebugInfo)
42754278
{
4276-
kernelName.resize(MAX_VISA_STRING_LENGTH);
4279+
// vISA does not support string of length >= 255. Truncate if this exceeds
4280+
// the limit. Note that vISA may append an extension, so relax it to a
4281+
// random number 240 here.
4282+
const int MAX_VISA_STRING_LENGTH = 240;
4283+
if (kernelName.size() >= MAX_VISA_STRING_LENGTH)
4284+
{
4285+
kernelName.resize(MAX_VISA_STRING_LENGTH);
4286+
}
4287+
asmName = GetDumpFileName("asm");
4288+
}
4289+
else
4290+
{
4291+
kernelName = "kernel";
4292+
asmName = "kernel.asm";
42774293
}
4278-
asmName = GetDumpFileName("asm");
4279-
}
4280-
else
4281-
{
4282-
kernelName = "kernel";
4283-
asmName = "kernel.asm";
4284-
}
42854294

4286-
V(vbuilder->AddKernel(vKernel, kernelName.c_str()));
4287-
V(vKernel->AddKernelAttribute("OutputAsmPath", asmName.length(), asmName.c_str()));
4295+
V(vbuilder->AddKernel(vKernel, kernelName.c_str()));
4296+
V(vKernel->AddKernelAttribute("OutputAsmPath", asmName.length(), asmName.c_str()));
42884297

4289-
vMainKernel = vKernel;
4298+
vMainKernel = vKernel;
42904299

4291-
auto gtpin_init = context->gtpin_init;
4292-
if (gtpin_init)
4293-
{
4294-
vKernel->SetGTPinInit(gtpin_init);
4295-
}
4300+
auto gtpin_init = context->gtpin_init;
4301+
if (gtpin_init)
4302+
{
4303+
vKernel->SetGTPinInit(gtpin_init);
4304+
}
42964305

4297-
// Right now only 1 main function in the kernel
4306+
// Right now only 1 main function in the kernel
42984307
VISA_LabelOpnd* functionLabel = nullptr;
4299-
V(vKernel->CreateVISALabelVar(functionLabel, "main", LABEL_SUBROUTINE));
4300-
V(vKernel->AppendVISACFLabelInst(functionLabel));
4301-
4302-
V(vKernel->CreateVISASurfaceVar(dummySurface, "", 1));
4308+
V(vKernel->CreateVISALabelVar(functionLabel, "main", LABEL_SUBROUTINE));
4309+
V(vKernel->AppendVISACFLabelInst(functionLabel));
43034310

4304-
V(vKernel->CreateVISASamplerVar(samplervar, "", 1));
4311+
V(vKernel->CreateVISASurfaceVar(dummySurface, "", 1));
43054312

4306-
CEncoder::SetFloatDenormMode(vKernel, context->m_floatDenormMode16,
4307-
context->m_floatDenormMode32,
4308-
context->m_floatDenormMode64);
4313+
V(vKernel->CreateVISASamplerVar(samplervar, "", 1));
43094314

4310-
// The instruction is generated only if mode != FLOAT_ROUND_TO_NEAREST_EVEN
4311-
CEncoder::SetFloatRoundingMode(
4312-
getEncoderRoundingMode(FLOAT_ROUND_TO_NEAREST_EVEN),
4313-
getEncoderRoundingMode(static_cast<Float_RoundingMode>(
4314-
context->getModuleMetaData()->compOpt.FloatRoundingMode)));
4315+
// Set float denorm modes and rounding modes as default
4316+
initCR(vKernel);
43154317
}
43164318

43174319
void CEncoder::SetKernelStackPointer64()

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,8 +353,7 @@ namespace IGC
353353
void Init();
354354
void Push();
355355

356-
void SetFloatDenormMode(VISAKernel* vKernel, Float_DenormMode mode16,
357-
Float_DenormMode mode32, Float_DenormMode mode64);
356+
void initCR(VISAKernel* vKernel);
358357
void SetVectorMask(bool vMask);
359358
// RM bits in CR0.0.
360359
// float RM bits: [5:4];

0 commit comments

Comments
 (0)