Skip to content

Commit 62d5afa

Browse files
jgu222Zuul
authored andcommitted
Changes in code.
Change-Id: I68a598a82f10474a26a56597ce9388dfdcdb7f61
1 parent 0fa6294 commit 62d5afa

File tree

4 files changed

+54
-3
lines changed

4 files changed

+54
-3
lines changed

IGC/Compiler/CISACodeGen/DriverInfo.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,9 @@ namespace IGC
252252
/// Allow aggressive vector value aliasing
253253
virtual bool EnableVecAliasing() const { return false; }
254254

255+
/// Prevent MAD optimization if result used in Floor
256+
virtual bool PreventMadforRound() const { return false; }
257+
255258

256259

257260
};

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1797,7 +1797,7 @@ namespace IGC
17971797
return I.getType()->isFloatingPointTy();
17981798
};
17991799

1800-
if (isFpMad(I) && (m_ctx->getModuleMetaData()->isPrecise || m_ctx->getModuleMetaData()->compOpt.disableMathRefactoring))
1800+
if (isFpMad(I) && m_ctx->getModuleMetaData()->isPrecise)
18011801
{
18021802
return false;
18031803
}
@@ -1819,6 +1819,12 @@ namespace IGC
18191819
return false;
18201820
}
18211821

1822+
bool preventMadForRound = false;
1823+
if (IGC_IS_FLAG_ENABLED(EnableMadRoundDepCheck) || m_ctx->m_DriverInfo.PreventMadforRound())
1824+
{
1825+
preventMadForRound = true;
1826+
}
1827+
18221828
bool found = false;
18231829
llvm::Value* sources[3];
18241830
e_modifier src_mod[3];
@@ -1900,6 +1906,47 @@ namespace IGC
19001906
return false;
19011907
}
19021908

1909+
if (found && preventMadForRound)
1910+
{
1911+
/*=============================================================================
1912+
This checks for pattern
1913+
1914+
mul x a b
1915+
add y x c
1916+
floor z y
1917+
1918+
----or---
1919+
1920+
mul x a b
1921+
add y x c
1922+
floor z abs(y)
1923+
1924+
If the case falls in either of the two cases, Mad optimisation is skipped because
1925+
small precision difference between Mul+add and Mad can be extrapolated by floor
1926+
(currently just enabled for floor operation, if required add for ceiling in future)
1927+
=============================================================================*/
1928+
1929+
for (auto iter = I.user_begin(); iter != I.user_end(); iter++)
1930+
{
1931+
if (IntrinsicInst * source = dyn_cast<IntrinsicInst>(*iter))
1932+
{
1933+
if (source->getIntrinsicID() == Intrinsic::fabs)
1934+
{
1935+
for (auto it = source->user_begin(); it != source->user_end(); it++)
1936+
{
1937+
IntrinsicInst* source_fabs = dyn_cast<IntrinsicInst>(*it);
1938+
if (source_fabs && source_fabs->getIntrinsicID() == Intrinsic::floor)
1939+
return false;
1940+
}
1941+
}
1942+
if (source->getIntrinsicID() == Intrinsic::floor)
1943+
{
1944+
return false;
1945+
}
1946+
}
1947+
}
1948+
}
1949+
19031950
if (found)
19041951
{
19051952
MadPattern* pattern = new (m_allocator) MadPattern();

IGC/common/MDFrameWork.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ namespace IGC
201201
bool UniformWGS = false;
202202
bool disableVertexComponentPacking = false;
203203
bool PreferBindlessImages = false;
204-
bool disableMathRefactoring = false;
205204
};
206205

207206
struct ComputeShaderInfo

IGC/common/igc_flags.def

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ DECLARE_IGC_REGKEY(DWORD, RetryManagerFirstStateId, 0, "For debugging pu
186186
DECLARE_IGC_REGKEY(bool, DisableSendSrcDstOverlapWA, false, "Disable Send Source/destination overlap WA which is enabled for GEN10/GEN11 and whenever Wddm2Svm is set in WATable")
187187
DECLARE_IGC_REGKEY(debugString, DisablePassToggles, 0, "Disable each IGC pass by setting the bit. HEXADECIMAL ONLY!. Ex: C0 is to disable pass 6 and pass 7.")
188188
DECLARE_IGC_REGKEY(bool, ForceStatelessForQueueT, true, "In OCL, force to use stateless memory to hold queue_t*. This is a legacy feature to be removed.")
189+
DECLARE_IGC_REGKEY(bool, EnableMadRoundDepCheck, false, "Enable Floor Dependency on Mad Check, Check was added since the floor was extrapolating the mad precision difference.")
190+
189191

190192
DECLARE_IGC_GROUP("Shader dumping")
191193
DECLARE_IGC_REGKEY(bool, EnableCosDump, false, "Enable cos dump")
@@ -290,7 +292,7 @@ DECLARE_IGC_REGKEY(DWORD, ConstantPromotionCmpSelSize, 4, "Array size threshold
290292
DECLARE_IGC_REGKEY(bool, EnableVariableReuse, true, "Enable local variable reuse")
291293
DECLARE_IGC_REGKEY(bool, EnableVariableAlias, true, "Enable variable aliases (part of VariableReuse Pass, but separate functionality)")
292294
DECLARE_IGC_REGKEY(DWORD, EnableVATemp, 0, "[temp]Enable variable aliases sub-optimization, once it is stable, remove this key")
293-
DECLARE_IGC_REGKEY(DWORD, VATemp, 1, "[temp]New code to replace code under EnableVATemp. Once stable, remove this.")
295+
DECLARE_IGC_REGKEY(DWORD, VATemp, 0, "[temp]New code to replace code under EnableVATemp. Once stable, remove this.")
294296
DECLARE_IGC_REGKEY(bool, EnableExtractMask, false, "When enabled, it is mostly for reducing response size of send messages.")
295297
DECLARE_IGC_REGKEY(DWORD, VariableReuseByteSize, 64, "The byte size threshold for variable reuse")
296298
DECLARE_IGC_REGKEY(bool, EnableGather4cpoWA, true, "Enable WA transforming gather4cpo/gather4po into gather4c/gather4")

0 commit comments

Comments
 (0)