Skip to content

Commit dee29d7

Browse files
jgu222igcbot
authored andcommitted
Add igc internal flags
Add internal flags to control ldstcombine. There are two flags: -ldstcombine=[0|1] (or with prefix -cl/-ze/-cl-intel/-ze-opt) same as igc key EnableLdStCombine -ldstcombine-max-storebytes=[4|8|16|32] (or with prefix -cl/-ze/-cl-intel/-ze-opt) same as igc key MaxStoreVectorSizeInBytes Note that this flag overrides the igc keys. No functional change.
1 parent b9b2c03 commit dee29d7

File tree

6 files changed

+129
-26
lines changed

6 files changed

+129
-26
lines changed

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ SPDX-License-Identifier: MIT
3333
#include <llvm/Transforms/Utils/Local.h>
3434
#include "common/LLVMWarningsPop.hpp"
3535
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
36+
#include "Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp"
3637
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
3738
#include "Compiler/IGCPassSupport.h"
3839
#include "Compiler/MetaDataUtilsWrapper.h"
@@ -2288,36 +2289,20 @@ namespace {
22882289
// BundleConfig:
22892290
// To tell what vector size is legit. It may need GEN platform as input.
22902291
class BundleConfig {
2292+
public:
22912293
enum {
22922294
STORE_DEFAULT_BYTES_PER_LANE = 16, // 4 DW for non-uniform
22932295
LOAD_DEFAULT_BYTES_PER_LANE = 16 // 4 DW for non-uniform
22942296
};
22952297

2296-
public:
22972298
BundleConfig(LdStKind K, int ByteAlign, bool Uniform,
22982299
const AddressModel AddrModel, CodeGenContext* Ctx)
22992300
{
23002301
uint32_t maxBytes = 0;
2301-
if (K == LdStKind::IS_STORE) {
2302-
maxBytes = IGC_GET_FLAG_VALUE(MaxStoreVectorSizeInBytes);
2303-
if (maxBytes != 0) {
2304-
// legal values: [4, 32].
2305-
maxBytes = std::min(maxBytes, 32u);
2306-
maxBytes = std::max(maxBytes, 4u);
2307-
}
2308-
else
2309-
maxBytes = STORE_DEFAULT_BYTES_PER_LANE;
2310-
}
2311-
else {
2312-
maxBytes = IGC_GET_FLAG_VALUE(MaxLoadVectorSizeInBytes);
2313-
if (maxBytes != 0) {
2314-
// legal values: [4, 32]
2315-
maxBytes = std::min(maxBytes, 32u);
2316-
maxBytes = std::max(maxBytes, 4u);
2317-
}
2318-
else
2319-
maxBytes = LOAD_DEFAULT_BYTES_PER_LANE;
2320-
}
2302+
if (K == LdStKind::IS_STORE)
2303+
maxBytes = getMaxStoreBytes(Ctx);
2304+
else
2305+
maxBytes = getMaxLoadBytes(Ctx);
23212306

23222307
auto calculateSize = [=](bool Uniform) -> uint32_t
23232308
{
@@ -2504,6 +2489,12 @@ namespace {
25042489
bool m_hasLoadCombined;
25052490
bool m_hasStoreCombined;
25062491

2492+
//
2493+
// Caching
2494+
//
2495+
// If true, IGC needs to emulate I64.
2496+
bool m_hasI64Emu;
2497+
25072498
// All insts that have been combined and can be deleted.
25082499
SmallVector<Instruction*, 16> m_combinedInsts;
25092500

@@ -2519,9 +2510,6 @@ namespace {
25192510

25202511
DenseMap<const Instruction*, int> m_visited;
25212512

2522-
// If true, IGC needs to emulate I64.
2523-
bool m_hasI64Emu;
2524-
25252513
void init(BasicBlock* BB) {
25262514
m_visited.clear();
25272515
m_instOrder.clear();
@@ -2600,6 +2588,44 @@ const BundleSize_t BundleConfig::m_d64VecSizes_u = { 2,3,4,8,16,32,64 };
26002588
const BundleSize_t BundleConfig::m_d32VecSizes_u = { 2,3,4,8,16,32,64 };
26012589
const BundleSize_t BundleConfig::m_d8VecSizes_u = { 2,4,8,16,32 };
26022590

2591+
bool IGC::doLdStCombine(const CodeGenContext* CGC) {
2592+
if (CGC->type == ShaderType::OPENCL_SHADER) {
2593+
auto oclCtx = (const OpenCLProgramContext*)CGC;
2594+
// internal flag overrides IGC key
2595+
switch (oclCtx->m_InternalOptions.LdStCombine) {
2596+
default:
2597+
break;
2598+
case 0:
2599+
return false;
2600+
case 1:
2601+
return true;
2602+
}
2603+
}
2604+
return IGC_IS_FLAG_ENABLED(EnableLdStCombine);
2605+
}
2606+
2607+
uint32_t IGC::getMaxStoreBytes(const CodeGenContext* CGC) {
2608+
if (CGC->type == ShaderType::OPENCL_SHADER) {
2609+
auto oclCtx = (const OpenCLProgramContext*)CGC;
2610+
// internal flag overrides IGC key
2611+
if (oclCtx->m_InternalOptions.MaxStoreBytes != 0)
2612+
return oclCtx->m_InternalOptions.MaxStoreBytes;
2613+
}
2614+
uint32_t bytes = IGC_GET_FLAG_VALUE(MaxStoreVectorSizeInBytes);
2615+
// Use default if bytes from the key is not set or invalid
2616+
if (!(bytes >= 4 && bytes <= 32 && isPowerOf2_32(bytes)))
2617+
bytes = BundleConfig::STORE_DEFAULT_BYTES_PER_LANE;
2618+
return bytes;
2619+
}
2620+
2621+
uint32_t IGC::getMaxLoadBytes(const CodeGenContext* CGC) {
2622+
uint32_t bytes = IGC_GET_FLAG_VALUE(MaxLoadVectorSizeInBytes);
2623+
// Use default if bytes from the key is not set or invalid
2624+
if (!(bytes >=4 && bytes <= 32 && isPowerOf2_32(bytes)))
2625+
bytes = BundleConfig::LOAD_DEFAULT_BYTES_PER_LANE;
2626+
return bytes;
2627+
}
2628+
26032629
FunctionPass* IGC::createLdStCombinePass() {
26042630
return new LdStCombine();
26052631
}

IGC/Compiler/CISACodeGen/MemOpt.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,19 @@ namespace llvm {
2222
}
2323

2424
namespace IGC {
25+
class CodeGenContext;
26+
2527
llvm::FunctionPass* createMemOptPass(bool AllowNegativeSymPtrsForLoad, bool AllowVector8LoadStore);
2628
llvm::FunctionPass* createLdStCombinePass();
2729

30+
// check both igc keys and internal flags
31+
bool doLdStCombine(const CodeGenContext* CGC);
32+
uint32_t getMaxStoreBytes(const CodeGenContext* CGC);
33+
uint32_t getMaxLoadBytes(const CodeGenContext* CGC);
34+
35+
//
2836
// Utility for struct manipulation
37+
//
2938

3039
inline const char* getStructNameForSOALayout() {
3140
return "__StructSOALayout_";

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,51 @@ namespace IGC
519519
}
520520
}
521521

522+
if (const opt::Arg* arg = internalOptions.getLastArg(OPT_ldstcombine_common))
523+
{
524+
// Valid value: 0|1
525+
llvm::StringRef valStr = arg->getValue();
526+
int val = 0;
527+
if (valStr.getAsInteger(10, val) || (val !=0 && val != 1))
528+
{
529+
IGC_ASSERT_MESSAGE(false, "-ldstcombine: invalid and ignored!");
530+
}
531+
else
532+
{
533+
LdStCombine = val;
534+
}
535+
}
536+
537+
if (const opt::Arg* arg = internalOptions.getLastArg(OPT_ldstcombine_max_storebytes_common))
538+
{
539+
// Valid value: 4|8|16|32
540+
llvm::StringRef valStr = arg->getValue();
541+
int val = 0;
542+
if (valStr.getAsInteger(10, val) || !(isPowerOf2_32(val) && val >= 4 && val <= 32))
543+
{
544+
IGC_ASSERT_MESSAGE(false, "-ldstcombine_max_storebytes: invalid and ignored!");
545+
}
546+
else
547+
{
548+
MaxStoreBytes = val;
549+
}
550+
}
551+
552+
if (const opt::Arg* arg = internalOptions.getLastArg(OPT_ldstcombine_max_loadbytes_common))
553+
{
554+
// Valid value: 4|8|16|32
555+
llvm::StringRef valStr = arg->getValue();
556+
int val = 0;
557+
if (valStr.getAsInteger(10, val) || !(isPowerOf2_32(val) && val >= 4 && val <= 32))
558+
{
559+
IGC_ASSERT_MESSAGE(false, "-ldstcombine_max_loadbytes: invalid and ignored!");
560+
}
561+
else
562+
{
563+
MaxLoadBytes = val;
564+
}
565+
}
566+
522567
if (internalOptions.hasArg(OPT_fp64_gen_emu_common))
523568
{
524569
// This option enables FP64 emulation for platforms that

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,19 @@ namespace IGC
108108

109109
bool AllowRelocAdd = true;
110110

111+
// LdStCombine
112+
// EnableLdStCombine:
113+
// 0: disable LdStCombine
114+
// 1: enable LdStCombine
115+
// otherwise: ignored
116+
// MaxStoreBytes:
117+
// MaxLoadBytes:
118+
// 4, 8, 16, 32 : set max bytes for combining
119+
// otherwise: ignored.
120+
int LdStCombine = -1; // default
121+
uint32_t MaxStoreBytes = 0; // default
122+
uint32_t MaxLoadBytes = 0; // default
123+
111124
uint32_t IntelPrivateMemoryMinimalSizePerThread = 0;
112125
uint32_t IntelScratchSpacePrivateMemoryMinimalSizePerThread = 0;
113126

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -663,12 +663,12 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
663663
mpm.add(createPrepareLoadsStoresPass());
664664
}
665665

666-
// run AdvMemOpt and MemOPt back-to-back so that we only
666+
// run AdvMemOpt and MemOPt back-to-back so that we only
667667
// need to run WIAnalysis once
668668
if (IGC_IS_FLAG_ENABLED(EnableAdvMemOpt))
669669
mpm.add(createAdvMemOptPass());
670670

671-
if (IGC_IS_FLAG_ENABLED(EnableLdStCombine) &&
671+
if (doLdStCombine(&ctx) &&
672672
ctx.type == ShaderType::OPENCL_SHADER)
673673
{
674674
// start with OCL, will apply to others.

IGC/Options/include/igc/Options/IGCInternalOptions.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,16 @@ defm enable_divergent_barrier_handling : CommonFlag<"enable-divergent-barrier-ha
154154
// -cl-intel-high-accuracy-nolut-math
155155
defm high_accuracy_nolut_math : CommonFlag<"high-accuracy-nolut-math">;
156156

157+
// -[cl-intel|ze-opt]-ldstcombine=[0|1]
158+
// -[cl-intel|ze-opt]-ldstcombine-max-storebytes=[4|8|16|32]
159+
// -[cl-intel|ze-opt]-ldstcombine-max-loadbytes=[4|8|16|32]
160+
defm ldstcombine : CommonSeparate<"ldstcombine">;
161+
defm : CommonJoined<"ldstcombine=">, Alias<ldstcombine_common>;
162+
defm ldstcombine_max_storebytes : CommonSeparate<"ldstcombine_max_storebytes">;
163+
defm : CommonJoined<"ldstcombine-max-storebytes=">, Alias<ldstcombine_max_storebytes_common>;
164+
defm ldstcombine_max_loadbytes : CommonSeparate<"ldstcombine_max-loadbytes">;
165+
defm : CommonJoined<"ldstcombine-max-loadbytes=">, Alias<ldstcombine_max_loadbytes_common>;
166+
157167
// }} Backend internal options
158168

159169
// Internal options from source translation {{

0 commit comments

Comments
 (0)