Skip to content

Commit 7aae34a

Browse files
krystian-andrzejewskiigcbot
authored andcommitted
Allowing to combine canonicalization and saturation
Considering mix mode operations for flushing denorms to zero
1 parent 484e05a commit 7aae34a

File tree

3 files changed

+107
-30
lines changed

3 files changed

+107
-30
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8836,10 +8836,6 @@ void EmitPass::EmitIntrinsicMessage(llvm::IntrinsicInst* inst)
88368836
emitSqrt(inst);
88378837
break;
88388838

8839-
case Intrinsic::canonicalize:
8840-
emitCanonicalize(inst);
8841-
break;
8842-
88438839
default:
88448840
inst->print(IGC::Debug::ods());
88458841
IGC_ASSERT_MESSAGE(0, "unknown intrinsic");
@@ -16849,7 +16845,7 @@ void EmitPass::emitFrc(llvm::GenIntrinsicInst* inst)
1684916845
m_encoder->Frc(m_destination, src0);
1685016846
}
1685116847

16852-
void IGC::EmitPass::emitCanonicalize(llvm::Instruction* inst)
16848+
void IGC::EmitPass::emitCanonicalize(llvm::Instruction* inst, const DstModifier& modifier)
1685316849
{
1685416850
// Force to flush denormal fp value to zero. Select one of two possible solutions:
1685516851
// 1. add inputVal, -0.0
@@ -16861,10 +16857,11 @@ void IGC::EmitPass::emitCanonicalize(llvm::Instruction* inst)
1686116857
bool flushVal = pCodeGenContext->m_floatDenormMode16 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isHalfTy();
1686216858
flushVal = flushVal || (pCodeGenContext->m_floatDenormMode32 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isFloatTy());
1686316859
flushVal = flushVal || (pCodeGenContext->m_floatDenormMode64 == ::IGC::FLOAT_DENORM_FLUSH_TO_ZERO && inst->getType()->isDoubleTy());
16864-
if (flushVal)
16860+
if (flushVal || modifier.sat)
1686516861
{
1686616862
CVariable* inputVal = GetSymbol(inst->getOperand(0));
1686716863
CVariable* negativeZero = m_currShader->GetScalarConstant(llvm::ConstantFP::get(inst->getType(), -0.0));
16864+
m_encoder->SetDstModifier(modifier);
1686816865
m_encoder->Add(m_destination, inputVal, negativeZero);
1686916866
}
1687016867
}

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ class EmitPass : public llvm::FunctionPass
416416
void emitAddPair(CVariable* Dst, CVariable* Src0, CVariable* Src1);
417417

418418
void emitSqrt(llvm::Instruction* inst);
419-
void emitCanonicalize(llvm::Instruction* inst);
419+
void emitCanonicalize(llvm::Instruction* inst, const DstModifier& modifier);
420420
void emitRsq(llvm::Instruction* inst);
421421
void emitFrc(llvm::GenIntrinsicInst* inst);
422422

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ namespace IGC
954954
switch (I.getOpcode())
955955
{
956956
case Instruction::FSub:
957-
match = MatchFloor(I) ||
957+
match = MatchFloor(I) ||
958958
MatchFrc(I) ||
959959
MatchLrp(I) ||
960960
MatchPredAdd(I) ||
@@ -999,7 +999,7 @@ namespace IGC
999999
MatchModifier(I);
10001000
break;
10011001
case Instruction::FAdd:
1002-
match =
1002+
match =
10031003
MatchLrp(I) ||
10041004
MatchPredAdd(I) ||
10051005
MatchMad(I) ||
@@ -1220,6 +1220,9 @@ namespace IGC
12201220
case Intrinsic::fshr:
12211221
match = MatchFunnelShiftRotate(I);
12221222
break;
1223+
case Intrinsic::canonicalize:
1224+
match = MatchCanonicalizeInstruction(I);
1225+
break;
12231226
default:
12241227
match = MatchSingleInstruction(I);
12251228
// no pattern for the rest of the intrinsics
@@ -1626,9 +1629,9 @@ namespace IGC
16261629
}
16271630
return found;
16281631
}
1629-
1632+
16301633
/*
1631-
below pass handles x - frac(x) = floor(x) pattern. Refer below :
1634+
below pass handles x - frac(x) = floor(x) pattern. Refer below :
16321635
16331636
frc (8|M0) r20.0<1>:f r19.0<8;8,1>:f {Compacted, @1}
16341637
add (8|M0) (ge)f0.1 r19.0<1>:f r19.0<8;8,1>:f -r20.0<8;8,1>:f {@1}
@@ -3101,43 +3104,120 @@ namespace IGC
31013104
{
31023105
struct CanonicalizeInstPattern : Pattern
31033106
{
3104-
CanonicalizeInstPattern(llvm::Instruction* pInst, bool isNeeded) : m_pInst(pInst), m_IsNeeded(isNeeded) {}
3107+
CanonicalizeInstPattern(llvm::Instruction* pInst) : m_pInst(pInst) {}
31053108

31063109
llvm::Instruction* m_pInst;
3107-
bool m_IsNeeded;
3110+
Pattern* m_pPattern = nullptr;
31083111

31093112
virtual void Emit(EmitPass* pass, const DstModifier& modifier)
31103113
{
3111-
IGC_ASSERT(modifier.sat == false && modifier.flag == nullptr);
3112-
if (m_IsNeeded)
3114+
if (m_pPattern)
31133115
{
3114-
pass->emitCanonicalize(m_pInst);
3116+
m_pPattern->Emit(pass, modifier);
3117+
}
3118+
else
3119+
{
3120+
pass->emitCanonicalize(m_pInst, modifier);
31153121
}
31163122
}
31173123
};
31183124

3119-
IGC_ASSERT(I.getNumOperands() == 1);
3120-
bool isNeeded = true;
31213125

31223126
// FAdd, FSub, FMul, FDiv instructions flush subnormals to zero.
31233127
// However, mix mode and math instructions preserve subnormals.
31243128
// Other instructions also preserve subnormals.
3125-
if (llvm::BinaryOperator * pBianaryOperator = llvm::dyn_cast<llvm::BinaryOperator>(I.getOperand(0)))
3129+
// FSat intrinsic instruction can be emitted i.e. as FAdd so such an
3130+
// instruction should be inspected recursively.
3131+
std::function<bool(llvm::Value*)> DetermineIfMixMode;
3132+
DetermineIfMixMode = [&DetermineIfMixMode, this](llvm::Value* operand) -> bool
31263133
{
3127-
switch (pBianaryOperator->getOpcode())
3134+
bool isMixModePossible = false;
3135+
if (m_Platform.supportMixMode())
31283136
{
3129-
case llvm::BinaryOperator::BinaryOps::FAdd:
3130-
case llvm::BinaryOperator::BinaryOps::FMul:
3131-
case llvm::BinaryOperator::BinaryOps::FSub:
3132-
case llvm::BinaryOperator::BinaryOps::FDiv:
3133-
isNeeded = false;
3134-
default:
3135-
break;
3137+
if (llvm::BinaryOperator* pBianaryOperator = llvm::dyn_cast<llvm::BinaryOperator>(operand))
3138+
{
3139+
// the switch instruction is executed to break the recursion if it is unneeded.
3140+
// The cause for this recursion is a possibility of constructing mad instructions.
3141+
switch (pBianaryOperator->getOpcode())
3142+
{
3143+
case llvm::BinaryOperator::BinaryOps::FAdd:
3144+
case llvm::BinaryOperator::BinaryOps::FMul:
3145+
case llvm::BinaryOperator::BinaryOps::FSub:
3146+
isMixModePossible = pBianaryOperator->getType()->isDoubleTy() == false &&
3147+
(DetermineIfMixMode(pBianaryOperator->getOperand(0)) || DetermineIfMixMode(pBianaryOperator->getOperand(1)));
3148+
break;
3149+
default:
3150+
break;
3151+
}
3152+
}
3153+
else if (isa<FPTruncInst>(operand))
3154+
{
3155+
FPTruncInst* fptruncInst = llvm::cast<FPTruncInst>(operand);
3156+
isMixModePossible = fptruncInst->getSrcTy()->isDoubleTy() == false;
3157+
}
3158+
else if (isa<FPExtInst>(operand))
3159+
{
3160+
FPExtInst* fpextInst = llvm::cast<FPExtInst>(operand);
3161+
isMixModePossible = fpextInst->getDestTy()->isDoubleTy() == false;
3162+
}
31363163
}
3137-
}
3164+
return isMixModePossible;
3165+
};
31383166

3139-
CanonicalizeInstPattern* pattern = new (m_allocator) CanonicalizeInstPattern(&I, isNeeded);
3140-
MarkAsSource(I.getOperand(0));
3167+
std::function<bool(llvm::Value*)> DetermineIfNeeded;
3168+
DetermineIfNeeded = [&DetermineIfNeeded, &DetermineIfMixMode](llvm::Value* operand) -> bool
3169+
{
3170+
bool isNeeded = true;
3171+
if (llvm::BinaryOperator* pBianaryOperator = llvm::dyn_cast<llvm::BinaryOperator>(operand))
3172+
{
3173+
// the switch instruction is to consider only the operations
3174+
// which support flushing denorms to zero.
3175+
switch (pBianaryOperator->getOpcode())
3176+
{
3177+
case llvm::BinaryOperator::BinaryOps::FAdd:
3178+
case llvm::BinaryOperator::BinaryOps::FMul:
3179+
case llvm::BinaryOperator::BinaryOps::FSub:
3180+
case llvm::BinaryOperator::BinaryOps::FDiv:
3181+
isNeeded = DetermineIfMixMode(pBianaryOperator);
3182+
break;
3183+
default:
3184+
break;
3185+
}
3186+
}
3187+
else if(GenIntrinsicInst* intrin = dyn_cast<GenIntrinsicInst>(operand))
3188+
{
3189+
switch (intrin->getIntrinsicID())
3190+
{
3191+
case GenISAIntrinsic::GenISA_fsat:
3192+
isNeeded = DetermineIfNeeded(intrin->getOperand(0));
3193+
break;
3194+
default:
3195+
break;
3196+
}
3197+
}
3198+
else if (IntrinsicInst* intrin = dyn_cast<IntrinsicInst>(operand))
3199+
{
3200+
switch (intrin->getIntrinsicID())
3201+
{
3202+
case Intrinsic::canonicalize:
3203+
isNeeded = DetermineIfNeeded(intrin->getOperand(0));
3204+
break;
3205+
default:
3206+
break;
3207+
}
3208+
}
3209+
return isNeeded;
3210+
};
3211+
3212+
CanonicalizeInstPattern* pattern = new (m_allocator) CanonicalizeInstPattern(&I);
3213+
if (DetermineIfNeeded(I.getOperand(0)))
3214+
{
3215+
MarkAsSource(I.getOperand(0));
3216+
}
3217+
else
3218+
{
3219+
pattern->m_pPattern = Match(*llvm::cast<llvm::Instruction>(I.getOperand(0)));
3220+
}
31413221

31423222
AddPattern(pattern);
31433223
return true;

0 commit comments

Comments
 (0)