@@ -2738,37 +2738,46 @@ namespace IGC
2738
2738
V (vKernel->AppendVISACFRetInst (predOpnd, vISA_EMASK_M1, EXEC_SIZE_1));
2739
2739
}
2740
2740
2741
- void CEncoder::SetFloatDenormMode (VISAKernel* vKernel, Float_DenormMode mode16,
2742
- Float_DenormMode mode32, Float_DenormMode mode64 )
2741
+ // Init Control register for denorm modes, rounding modes, etc.
2742
+ void CEncoder::initCR (VISAKernel* vKernel )
2743
2743
{
2744
- VISA_VectorOpnd* src0_Opnd = nullptr ;
2745
- VISA_VectorOpnd* src1_Opnd = nullptr ;
2746
- VISA_VectorOpnd* dst_Opnd = nullptr ;
2747
- VISA_GenVar* cr0_var = nullptr ;
2748
- uint imm_data = 0 ;
2749
- if (mode16 == FLOAT_DENORM_RETAIN)
2750
- imm_data |= 0x400 ;
2751
- if (mode32 == FLOAT_DENORM_RETAIN)
2752
- imm_data |= 0x80 ;
2753
- if (mode64 == FLOAT_DENORM_RETAIN)
2754
- imm_data |= 0x40 ;
2755
- // If we are in the default mode no need to set the CR
2744
+ // Those bits must be zero'ed on entry to kernel/shader.
2745
+ // (If not, this function needs to be changed accordingly.)
2746
+ VISA_VectorOpnd* src0_Opnd = nullptr ;
2747
+ VISA_VectorOpnd* src1_Opnd = nullptr ;
2748
+ VISA_VectorOpnd* dst_Opnd = nullptr ;
2749
+ VISA_GenVar* cr0_var = nullptr ;
2750
+ uint imm_data = 0 ;
2751
+
2752
+ CodeGenContext* pCtx = m_program->GetContext ();
2753
+ if (pCtx->m_floatDenormMode16 == FLOAT_DENORM_RETAIN)
2754
+ imm_data |= 0x400 ;
2755
+ if (pCtx->m_floatDenormMode32 == FLOAT_DENORM_RETAIN)
2756
+ imm_data |= 0x80 ;
2757
+ if (pCtx->m_floatDenormMode64 == FLOAT_DENORM_RETAIN)
2758
+ imm_data |= 0x40 ;
2759
+
2760
+ uint RM_bits = getEncoderRoundingMode (
2761
+ static_cast <Float_RoundingMode>(pCtx->getModuleMetaData ()->compOpt .FloatRoundingMode ));
2762
+ imm_data |= RM_bits;
2763
+
2764
+ // If we are in the default mode no need to set the CR
2756
2765
if (imm_data != 0 )
2757
- {
2758
- V (vKernel->GetPredefinedVar (cr0_var, PREDEFINED_CR0));
2759
- V (vKernel->CreateVISASrcOperand (src0_Opnd, cr0_var, MODIFIER_NONE, 0 , 1 , 0 , 0 , 0 ));
2760
- V (vKernel->CreateVISAImmediate (src1_Opnd, &imm_data, ISA_TYPE_UD));
2761
- V (vKernel->CreateVISADstOperand (dst_Opnd, cr0_var, 1 , 0 , 0 ));
2762
- V (vKernel->AppendVISAArithmeticInst (
2763
- ISA_OR,
2764
- nullptr ,
2765
- false ,
2766
- vISA_EMASK_M1_NM,
2767
- EXEC_SIZE_1,
2768
- dst_Opnd,
2769
- src0_Opnd,
2770
- src1_Opnd));
2771
- }
2766
+ {
2767
+ V (vKernel->GetPredefinedVar (cr0_var, PREDEFINED_CR0));
2768
+ V (vKernel->CreateVISASrcOperand (src0_Opnd, cr0_var, MODIFIER_NONE, 0 , 1 , 0 , 0 , 0 ));
2769
+ V (vKernel->CreateVISAImmediate (src1_Opnd, &imm_data, ISA_TYPE_UD));
2770
+ V (vKernel->CreateVISADstOperand (dst_Opnd, cr0_var, 1 , 0 , 0 ));
2771
+ V (vKernel->AppendVISAArithmeticInst (
2772
+ ISA_OR,
2773
+ nullptr ,
2774
+ false ,
2775
+ vISA_EMASK_M1_NM,
2776
+ EXEC_SIZE_1,
2777
+ dst_Opnd,
2778
+ src0_Opnd,
2779
+ src1_Opnd));
2780
+ }
2772
2781
}
2773
2782
2774
2783
void CEncoder::SetVectorMask (bool VMask)
@@ -4192,126 +4201,119 @@ namespace IGC
4192
4201
4193
4202
void CEncoder::InitEncoder (bool canAbortOnSpill, bool hasStackCall)
4194
4203
{
4195
- m_aliasesMap.clear ();
4196
- m_encoderState.m_SubSpanDestination = false ;
4197
- CodeGenContext* context = m_program->GetContext ();
4198
- m_encoderState.m_secondHalf = false ;
4199
- m_enableVISAdump = false ;
4200
- labelMap.clear ();
4201
- labelMap.resize (m_program->entry ->size (), nullptr );
4202
- labelCounter = 0 ;
4203
- m_hasInlineAsm = context->m_DriverInfo .SupportInlineAssembly () && context->m_instrTypes .hasInlineAsm ;
4204
-
4205
- vbuilder = nullptr ;
4206
- vAsmTextBuilder = nullptr ;
4207
- TARGET_PLATFORM VISAPlatform = GetVISAPlatform (&(context->platform ));
4204
+ m_aliasesMap.clear ();
4205
+ m_encoderState.m_SubSpanDestination = false ;
4206
+ CodeGenContext* context = m_program->GetContext ();
4207
+ m_encoderState.m_secondHalf = false ;
4208
+ m_enableVISAdump = false ;
4209
+ labelMap.clear ();
4210
+ labelMap.resize (m_program->entry ->size (), nullptr );
4211
+ labelCounter = 0 ;
4212
+ m_hasInlineAsm = context->m_DriverInfo .SupportInlineAssembly () && context->m_instrTypes .hasInlineAsm ;
4213
+
4214
+ vbuilder = nullptr ;
4215
+ vAsmTextBuilder = nullptr ;
4216
+ TARGET_PLATFORM VISAPlatform = GetVISAPlatform (&(context->platform ));
4208
4217
4209
- SetVISAWaTable (m_program->m_Platform ->getWATable ());
4218
+ SetVISAWaTable (m_program->m_Platform ->getWATable ());
4210
4219
4211
- llvm::SmallVector<const char *, 10 > params;
4212
- if (!m_hasInlineAsm)
4213
- {
4214
- // Asm text writer mode doesnt need dump params
4215
- InitBuildParams (params);
4216
- }
4220
+ llvm::SmallVector<const char *, 10 > params;
4221
+ if (!m_hasInlineAsm)
4222
+ {
4223
+ // Asm text writer mode doesnt need dump params
4224
+ InitBuildParams (params);
4225
+ }
4217
4226
4218
- COMPILER_TIME_START (m_program->GetContext (), TIME_CG_vISACompile);
4219
- bool enableVISADump = IGC_IS_FLAG_ENABLED (EnableVISASlowpath) || IGC_IS_FLAG_ENABLED (ShaderDumpEnable);
4220
- auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4221
- auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4222
- V (CreateVISABuilder (vbuilder, builderMode, builderOpt, VISAPlatform, params.size (), params.data (), &m_WaTable));
4227
+ COMPILER_TIME_START (m_program->GetContext (), TIME_CG_vISACompile);
4228
+ bool enableVISADump = IGC_IS_FLAG_ENABLED (EnableVISASlowpath) || IGC_IS_FLAG_ENABLED (ShaderDumpEnable);
4229
+ auto builderMode = m_hasInlineAsm ? vISA_ASM_WRITER : vISA_3D;
4230
+ auto builderOpt = (enableVISADump || m_hasInlineAsm) ? CM_CISA_BUILDER_BOTH : CM_CISA_BUILDER_GEN;
4231
+ V (CreateVISABuilder (vbuilder, builderMode, builderOpt, VISAPlatform, params.size (), params.data (), &m_WaTable));
4223
4232
4224
- InitVISABuilderOptions (VISAPlatform, canAbortOnSpill, hasStackCall);
4233
+ InitVISABuilderOptions (VISAPlatform, canAbortOnSpill, hasStackCall);
4225
4234
4226
- // Pass all build options to builder
4227
- SetBuilderOptions (vbuilder);
4235
+ // Pass all build options to builder
4236
+ SetBuilderOptions (vbuilder);
4228
4237
4229
- vKernel = nullptr ;
4238
+ vKernel = nullptr ;
4230
4239
4231
- std::string kernelName = m_program->entry ->getName ();
4232
- if (context->m_instrTypes .hasDebugInfo )
4233
- {
4234
- // This metadata node is added by TransformBlocks pass for device side
4235
- // enqueue feature of OCL2.0+.
4236
- // The problem is that for device side enqueue, kernel name used in
4237
- // IGC differs the one used to create JIT kernel. This leads to different
4238
- // kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4239
- // merge the two together. With this metadata node we create a mapping
4240
- // between the two names and when debug info is enabled, make JIT use
4241
- // same name as IGC.
4242
- // Names earlier -
4243
- // ParentKernel_dispatch_0 in dbg and
4244
- // __ParentKernel_block_invoke in elf
4245
- // when kernel name is ParentKernel
4246
- //
4247
- auto md = m_program->entry ->getParent ()->getNamedMetadata (" igc.device.enqueue" );
4248
- if (md)
4240
+ std::string kernelName = m_program->entry ->getName ();
4241
+ if (context->m_instrTypes .hasDebugInfo )
4249
4242
{
4250
- for (unsigned int i = 0 ; i < md->getNumOperands (); i++)
4243
+ // This metadata node is added by TransformBlocks pass for device side
4244
+ // enqueue feature of OCL2.0+.
4245
+ // The problem is that for device side enqueue, kernel name used in
4246
+ // IGC differs the one used to create JIT kernel. This leads to different
4247
+ // kernel names in .elf file and .dbg file. So dbgmerge tool cannot
4248
+ // merge the two together. With this metadata node we create a mapping
4249
+ // between the two names and when debug info is enabled, make JIT use
4250
+ // same name as IGC.
4251
+ // Names earlier -
4252
+ // ParentKernel_dispatch_0 in dbg and
4253
+ // __ParentKernel_block_invoke in elf
4254
+ // when kernel name is ParentKernel
4255
+ //
4256
+ auto md = m_program->entry ->getParent ()->getNamedMetadata (" igc.device.enqueue" );
4257
+ if (md)
4251
4258
{
4252
- auto mdOpnd = md->getOperand (i);
4253
- auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand (1 ));
4254
- if (first &&
4255
- first->getString ().equals (kernelName))
4259
+ for (unsigned int i = 0 ; i < md->getNumOperands (); i++)
4256
4260
{
4257
- auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand (0 ));
4258
- if (second)
4261
+ auto mdOpnd = md->getOperand (i);
4262
+ auto first = dyn_cast_or_null<MDString>(mdOpnd->getOperand (1 ));
4263
+ if (first &&
4264
+ first->getString ().equals (kernelName))
4259
4265
{
4260
- kernelName = second->getString ();
4266
+ auto second = dyn_cast_or_null<MDString>(mdOpnd->getOperand (0 ));
4267
+ if (second)
4268
+ {
4269
+ kernelName = second->getString ();
4270
+ }
4261
4271
}
4262
4272
}
4263
4273
}
4264
4274
}
4265
- }
4266
4275
4267
- std::string asmName;
4268
- if (m_enableVISAdump || context->m_instrTypes .hasDebugInfo )
4269
- {
4270
- // vISA does not support string of length >= 255. Truncate if this exceeds
4271
- // the limit. Note that vISA may append an extension, so relax it to a
4272
- // random number 240 here.
4273
- const int MAX_VISA_STRING_LENGTH = 240 ;
4274
- if (kernelName.size () >= MAX_VISA_STRING_LENGTH)
4276
+ std::string asmName;
4277
+ if (m_enableVISAdump || context->m_instrTypes .hasDebugInfo )
4275
4278
{
4276
- kernelName.resize (MAX_VISA_STRING_LENGTH);
4279
+ // vISA does not support string of length >= 255. Truncate if this exceeds
4280
+ // the limit. Note that vISA may append an extension, so relax it to a
4281
+ // random number 240 here.
4282
+ const int MAX_VISA_STRING_LENGTH = 240 ;
4283
+ if (kernelName.size () >= MAX_VISA_STRING_LENGTH)
4284
+ {
4285
+ kernelName.resize (MAX_VISA_STRING_LENGTH);
4286
+ }
4287
+ asmName = GetDumpFileName (" asm" );
4288
+ }
4289
+ else
4290
+ {
4291
+ kernelName = " kernel" ;
4292
+ asmName = " kernel.asm" ;
4277
4293
}
4278
- asmName = GetDumpFileName (" asm" );
4279
- }
4280
- else
4281
- {
4282
- kernelName = " kernel" ;
4283
- asmName = " kernel.asm" ;
4284
- }
4285
4294
4286
- V (vbuilder->AddKernel (vKernel, kernelName.c_str ()));
4287
- V (vKernel->AddKernelAttribute (" OutputAsmPath" , asmName.length (), asmName.c_str ()));
4295
+ V (vbuilder->AddKernel (vKernel, kernelName.c_str ()));
4296
+ V (vKernel->AddKernelAttribute (" OutputAsmPath" , asmName.length (), asmName.c_str ()));
4288
4297
4289
- vMainKernel = vKernel;
4298
+ vMainKernel = vKernel;
4290
4299
4291
- auto gtpin_init = context->gtpin_init ;
4292
- if (gtpin_init)
4293
- {
4294
- vKernel->SetGTPinInit (gtpin_init);
4295
- }
4300
+ auto gtpin_init = context->gtpin_init ;
4301
+ if (gtpin_init)
4302
+ {
4303
+ vKernel->SetGTPinInit (gtpin_init);
4304
+ }
4296
4305
4297
- // Right now only 1 main function in the kernel
4306
+ // Right now only 1 main function in the kernel
4298
4307
VISA_LabelOpnd* functionLabel = nullptr ;
4299
- V (vKernel->CreateVISALabelVar (functionLabel, " main" , LABEL_SUBROUTINE));
4300
- V (vKernel->AppendVISACFLabelInst (functionLabel));
4301
-
4302
- V (vKernel->CreateVISASurfaceVar (dummySurface, " " , 1 ));
4308
+ V (vKernel->CreateVISALabelVar (functionLabel, " main" , LABEL_SUBROUTINE));
4309
+ V (vKernel->AppendVISACFLabelInst (functionLabel));
4303
4310
4304
- V (vKernel->CreateVISASamplerVar (samplervar , " " , 1 ));
4311
+ V (vKernel->CreateVISASurfaceVar (dummySurface , " " , 1 ));
4305
4312
4306
- CEncoder::SetFloatDenormMode (vKernel, context->m_floatDenormMode16 ,
4307
- context->m_floatDenormMode32 ,
4308
- context->m_floatDenormMode64 );
4313
+ V (vKernel->CreateVISASamplerVar (samplervar, " " , 1 ));
4309
4314
4310
- // The instruction is generated only if mode != FLOAT_ROUND_TO_NEAREST_EVEN
4311
- CEncoder::SetFloatRoundingMode (
4312
- getEncoderRoundingMode (FLOAT_ROUND_TO_NEAREST_EVEN),
4313
- getEncoderRoundingMode (static_cast <Float_RoundingMode>(
4314
- context->getModuleMetaData ()->compOpt .FloatRoundingMode )));
4315
+ // Set float denorm modes and rounding modes as default
4316
+ initCR (vKernel);
4315
4317
}
4316
4318
4317
4319
void CEncoder::SetKernelStackPointer64 ()
0 commit comments