Skip to content

Commit 77fb673

Browse files
jgu222igcbot
authored andcommitted
GEP canon is on only for OCL
Revert "turning GEP canon off" for non OCL shaders for now, as turning off GEP canon has perf regression on non-ocl shaders. This is partial revert and it remains on for OCL
1 parent c8f734d commit 77fb673

File tree

2 files changed

+22
-6
lines changed

2 files changed

+22
-6
lines changed

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,22 @@ namespace {
321321
return true;
322322
}
323323

324+
bool EnableCanonicalizeGEP() const {
325+
switch (IGC_GET_FLAG_VALUE(MemOptGEPCanon)) {
326+
case 1:
327+
return false;
328+
case 2:
329+
{
330+
if (CGC && CGC->type == ShaderType::OPENCL_SHADER)
331+
return false;
332+
break;
333+
}
334+
default:
335+
break;
336+
}
337+
return true;
338+
}
339+
324340
/// Canonicalize the calculation of 64-bit pointer by performing the
325341
/// following transformations to help SCEV to identify the constant offset
326342
/// between pointers.
@@ -526,7 +542,7 @@ bool MemOpt::runOnFunction(Function& F) {
526542
if (MemRefs.size() < 2)
527543
continue;
528544

529-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
545+
if (EnableCanonicalizeGEP()) {
530546
// Canonicalize 64-bit GEP to help SCEV find constant offset by
531547
// distributing `zext`/`sext` over safe expressions.
532548
for (auto& M : MemRefs)
@@ -553,7 +569,7 @@ bool MemOpt::runOnFunction(Function& F) {
553569
}
554570
}
555571

556-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
572+
if (EnableCanonicalizeGEP()) {
557573
// Optimize 64-bit GEP to reduce strength by factoring out `zext`/`sext`
558574
// over safe expressions.
559575
for (auto I : MemRefsToOptimize)
@@ -1137,7 +1153,7 @@ bool MemOpt::mergeLoad(LoadInst* LeadingLoad,
11371153
return false;
11381154
const SCEV* LeadingLastIdx = nullptr; // set on-demand
11391155
bool DoCmpOnLastIdx = false;
1140-
if (IGC_IS_FLAG_DISABLED(EnableMemOptGEPCanon)) {
1156+
if (!EnableCanonicalizeGEP()) {
11411157
auto aGEP = dyn_cast<GetElementPtrInst>(LeadingLoad->getPointerOperand());
11421158
if (aGEP && aGEP->hasIndices()) {
11431159
// index starts from 1
@@ -1305,8 +1321,8 @@ bool MemOpt::mergeLoad(LoadInst* LeadingLoad,
13051321
MaxElts = profitVec[k++];
13061322
}
13071323

1308-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
1309-
// Guard under the key to distinguish new code (EnableMemOptGEPCanon=0) from the old.
1324+
if (EnableCanonicalizeGEP()) {
1325+
// Guard under the key to distinguish new code (GEPCanon is off) from the old.
13101326
// Note: not sure about the reason for the following check.
13111327
if (NumElts == 3 && (LeadingLoadScalarType->isIntegerTy(16) || LeadingLoadScalarType->isHalfTy())) {
13121328
return false;

IGC/common/igc_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ DECLARE_IGC_REGKEY(bool, EnableHSSinglePatchDispatch, false, "Setting this to
436436
DECLARE_IGC_REGKEY(bool, DisableGPGPUIndirectPayload, false, "Disable OCL indirect GPGPU payload", false)
437437
DECLARE_IGC_REGKEY(bool, DisableDSDualPatch, false, "Setting it to true with enable Single and Dual Patch dispatch mode for Domain Shader", false)
438438
DECLARE_IGC_REGKEY(bool, DisableMemOpt, false, "Disable MemOpt, merging load/store", true)
439-
DECLARE_IGC_REGKEY(bool, EnableMemOptGEPCanon, false, "[test] Enable GEP canonicalization in MemOpt", true)
439+
DECLARE_IGC_REGKEY(DWORD, MemOptGEPCanon, 2, "[test] GEP canonicalization in MemOpt. 0 : enable; 1: disable; 2: disable only for OCL;", true)
440440
DECLARE_IGC_REGKEY(bool, DisableMemOpt2, false, "Disable MemOpt2", false)
441441
DECLARE_IGC_REGKEY(DWORD, EnableLdStCombine, 1, "Enable load/store combine pass if set to 1 or 2 (intend to replace memopt)", true)
442442
DECLARE_IGC_REGKEY(DWORD, MaxStoreVectorSizeInBytes, 0, "[LdStCombine] the max non-uniform vector size for the coalesced store. 0: compiler choice (default, 16(4DW)); others: 4/8/16/32", true)

0 commit comments

Comments
 (0)