Skip to content

Commit 83b66d7

Browse files
pkwasnie-inteligcbot
authored andcommitted
FPRoundingModeCoalescingImpl pass
Adds new pass attempting to reduce number of times FP rounding mode is switched by moving and grouping together instructions using the same rounding mode. Inserts new pass after optimizations, before emitter. Instructions are reordered only in the same basic block, with optional distance limit for move.
1 parent 18ae7d4 commit 83b66d7

File tree

13 files changed

+1214
-214
lines changed

13 files changed

+1214
-214
lines changed

IGC/Compiler/CISACodeGen/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
8080
"${CMAKE_CURRENT_SOURCE_DIR}/RegisterPressureEstimate.cpp"
8181
"${CMAKE_CURRENT_SOURCE_DIR}/ResolvePredefinedConstant.cpp"
8282
"${CMAKE_CURRENT_SOURCE_DIR}/ResourceLoopAnalysis.cpp"
83+
"${CMAKE_CURRENT_SOURCE_DIR}/FPRoundingModeCoalescing.cpp"
8384
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeValueLegalizationPass.cpp"
8485
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.cpp"
8586
"${CMAKE_CURRENT_SOURCE_DIR}/ScalarizerCodeGen.cpp"
@@ -179,6 +180,7 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
179180
"${CMAKE_CURRENT_SOURCE_DIR}/RegisterPressureEstimate.hpp"
180181
"${CMAKE_CURRENT_SOURCE_DIR}/ResolvePredefinedConstant.h"
181182
"${CMAKE_CURRENT_SOURCE_DIR}/ResourceLoopAnalysis.h"
183+
"${CMAKE_CURRENT_SOURCE_DIR}/FPRoundingModeCoalescing.hpp"
182184
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeValueLegalizationPass.h"
183185
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.hpp"
184186
"${CMAKE_CURRENT_SOURCE_DIR}/ScalarizerCodeGen.hpp"

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 5 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -16085,174 +16085,6 @@ void EmitPass::emitAluConditionMod(Pattern* aluPattern, Instruction* alu, CmpIns
1608516085
}
1608616086

1608716087

16088-
ERoundingMode EmitPass::GetRoundingMode_FPCvtInt(Instruction* pInst)
16089-
{
16090-
if (isa<FPToSIInst>(pInst) || isa <FPToUIInst>(pInst))
16091-
{
16092-
const ERoundingMode defaultRoundingMode_FPCvtInt = static_cast<ERoundingMode>(
16093-
m_pCtx->getModuleMetaData()->compOpt.FloatCvtIntRoundingMode);
16094-
return defaultRoundingMode_FPCvtInt;
16095-
}
16096-
16097-
if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(pInst))
16098-
{
16099-
switch (GII->getIntrinsicID())
16100-
{
16101-
default:
16102-
break;
16103-
case GenISAIntrinsic::GenISA_ftoui_rtn:
16104-
case GenISAIntrinsic::GenISA_ftoi_rtn:
16105-
return ERoundingMode::ROUND_TO_NEGATIVE;
16106-
case GenISAIntrinsic::GenISA_ftoui_rtp:
16107-
case GenISAIntrinsic::GenISA_ftoi_rtp:
16108-
return ERoundingMode::ROUND_TO_POSITIVE;
16109-
case GenISAIntrinsic::GenISA_ftoui_rte:
16110-
case GenISAIntrinsic::GenISA_ftoi_rte:
16111-
return ERoundingMode::ROUND_TO_NEAREST_EVEN;
16112-
}
16113-
}
16114-
// rounding not needed!
16115-
return ERoundingMode::ROUND_TO_ANY;
16116-
}
16117-
16118-
ERoundingMode EmitPass::GetRoundingMode_FP(Instruction* inst)
16119-
{
16120-
// Float rounding mode
16121-
ERoundingMode RM = static_cast<ERoundingMode>(m_pCtx->getModuleMetaData()->compOpt.FloatRoundingMode);
16122-
if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(inst))
16123-
{
16124-
switch (GII->getIntrinsicID())
16125-
{
16126-
case GenISAIntrinsic::GenISA_f32tof16_rtz:
16127-
case GenISAIntrinsic::GenISA_ftof_rtz:
16128-
case GenISAIntrinsic::GenISA_itof_rtz:
16129-
case GenISAIntrinsic::GenISA_uitof_rtz:
16130-
case GenISAIntrinsic::GenISA_add_rtz:
16131-
case GenISAIntrinsic::GenISA_mul_rtz:
16132-
case GenISAIntrinsic::GenISA_fma_rtz:
16133-
RM = ERoundingMode::ROUND_TO_ZERO;
16134-
break;
16135-
case GenISAIntrinsic::GenISA_ftof_rtn:
16136-
case GenISAIntrinsic::GenISA_itof_rtn:
16137-
case GenISAIntrinsic::GenISA_uitof_rtn:
16138-
case GenISAIntrinsic::GenISA_fma_rtn:
16139-
RM = ERoundingMode::ROUND_TO_NEGATIVE;
16140-
break;
16141-
case GenISAIntrinsic::GenISA_ftof_rtp:
16142-
case GenISAIntrinsic::GenISA_itof_rtp:
16143-
case GenISAIntrinsic::GenISA_uitof_rtp:
16144-
case GenISAIntrinsic::GenISA_fma_rtp:
16145-
RM = ERoundingMode::ROUND_TO_POSITIVE;
16146-
break;
16147-
case GenISAIntrinsic::GenISA_ftof_rte:
16148-
RM = ERoundingMode::ROUND_TO_NEAREST_EVEN;
16149-
break;
16150-
case GenISAIntrinsic::GenISA_ftobf:
16151-
case GenISAIntrinsic::GenISA_2fto2bf:
16152-
{
16153-
ConstantInt* rmVal;
16154-
if (GII->getIntrinsicID() == GenISAIntrinsic::GenISA_2fto2bf) {
16155-
rmVal = cast<ConstantInt>(GII->getArgOperand(2));
16156-
}
16157-
else {
16158-
rmVal = cast<ConstantInt>(GII->getArgOperand(1));
16159-
}
16160-
RM = (ERoundingMode)rmVal->getZExtValue();
16161-
break;
16162-
}
16163-
case GenISAIntrinsic::GenISA_hftobf8:
16164-
{
16165-
ConstantInt* rmVal = cast<ConstantInt>(GII->getArgOperand(1));
16166-
RM = (ERoundingMode)rmVal->getZExtValue();
16167-
break;
16168-
}
16169-
default:
16170-
break;
16171-
}
16172-
}
16173-
return RM;
16174-
}
16175-
16176-
bool EmitPass::ignoreRoundingMode(llvm::Instruction* inst) const
16177-
{
16178-
auto isFZero = [](Value* V) {
16179-
if (ConstantFP* FCST = dyn_cast<ConstantFP>(V))
16180-
{
16181-
return FCST->isZero();
16182-
}
16183-
return false;
16184-
};
16185-
16186-
if (isa<InsertElementInst>(inst) ||
16187-
isa<ExtractElementInst>(inst) ||
16188-
isa<BitCastInst>(inst) ||
16189-
isa<ICmpInst>(inst) ||
16190-
isa<FPExtInst>(inst) ||
16191-
isa<FCmpInst>(inst) ||
16192-
isa<SelectInst>(inst) ||
16193-
isa<TruncInst>(inst) ||
16194-
isa<LoadInst>(inst) ||
16195-
isa<StoreInst>(inst))
16196-
{
16197-
// these are not affected by rounding mode.
16198-
return true;
16199-
}
16200-
16201-
if (BinaryOperator* BOP = dyn_cast<BinaryOperator>(inst))
16202-
{
16203-
if (BOP->getType()->isIntOrIntVectorTy()) {
16204-
// Integer binary op does not need rounding mode
16205-
return true;
16206-
}
16207-
16208-
// float operations on EM uses RTNE only and are not affected
16209-
// by rounding mode.
16210-
if (BOP->getType()->isFPOrFPVectorTy())
16211-
{
16212-
switch (BOP->getOpcode())
16213-
{
16214-
default:
16215-
break;
16216-
case Instruction::FDiv:
16217-
return true;
16218-
case Instruction::FSub:
16219-
// Negation is okay for any rounding mode
16220-
if (isFZero(BOP->getOperand(0))) {
16221-
return true;
16222-
}
16223-
break;
16224-
}
16225-
}
16226-
}
16227-
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(inst))
16228-
{
16229-
switch (II->getIntrinsicID())
16230-
{
16231-
default:
16232-
break;
16233-
case IGCLLVM::Intrinsic::exp2:
16234-
case IGCLLVM::Intrinsic::sqrt:
16235-
return true;
16236-
}
16237-
}
16238-
16239-
if (GenIntrinsicInst * GII = dyn_cast<GenIntrinsicInst>(inst))
16240-
{
16241-
GenISAIntrinsic::ID id = GII->getIntrinsicID();
16242-
switch (id)
16243-
{
16244-
case GenISAIntrinsic::GenISA_bftof:
16245-
case GenISAIntrinsic::GenISA_bf8tohf:
16246-
case GenISAIntrinsic::GenISA_tf32tof:
16247-
return true;
16248-
default:
16249-
break;
16250-
}
16251-
}
16252-
// add more instr as needed
16253-
return false;
16254-
}
16255-
1625616088
void EmitPass::initDefaultRoundingMode()
1625716089
{
1625816090
const ERoundingMode defaultRM_FP = static_cast<ERoundingMode>(m_pCtx->getModuleMetaData()->compOpt.FloatRoundingMode);
@@ -16304,41 +16136,6 @@ void EmitPass::SetRoundingMode_FPCvtInt(ERoundingMode newRM_FPCvtInt)
1630416136
}
1630516137
}
1630616138

16307-
// Return true if inst needs specific rounding mode; false otherwise.
16308-
//
16309-
// Currently, only gen intrinsic needs rounding mode other than the default.
16310-
bool EmitPass::setRMExplicitly(Instruction* inst)
16311-
{
16312-
if (GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(inst))
16313-
{
16314-
switch (GII->getIntrinsicID())
16315-
{
16316-
case GenISAIntrinsic::GenISA_f32tof16_rtz:
16317-
case GenISAIntrinsic::GenISA_ftof_rtz:
16318-
case GenISAIntrinsic::GenISA_itof_rtz:
16319-
case GenISAIntrinsic::GenISA_uitof_rtz:
16320-
case GenISAIntrinsic::GenISA_add_rtz:
16321-
case GenISAIntrinsic::GenISA_mul_rtz:
16322-
case GenISAIntrinsic::GenISA_fma_rtz:
16323-
case GenISAIntrinsic::GenISA_fma_rtp:
16324-
case GenISAIntrinsic::GenISA_fma_rtn:
16325-
case GenISAIntrinsic::GenISA_ftof_rtn:
16326-
case GenISAIntrinsic::GenISA_itof_rtn:
16327-
case GenISAIntrinsic::GenISA_uitof_rtn:
16328-
case GenISAIntrinsic::GenISA_ftof_rtp:
16329-
case GenISAIntrinsic::GenISA_itof_rtp:
16330-
case GenISAIntrinsic::GenISA_uitof_rtp:
16331-
case GenISAIntrinsic::GenISA_ftobf:
16332-
case GenISAIntrinsic::GenISA_2fto2bf:
16333-
case GenISAIntrinsic::GenISA_hftobf8:
16334-
return true;
16335-
default:
16336-
break;
16337-
}
16338-
}
16339-
return false;
16340-
}
16341-
1634216139
void EmitPass::ResetRoundingMode(Instruction* inst)
1634316140
{
1634416141
// Reset rounding modes to default if they are not. Howerver, if
@@ -16365,11 +16162,11 @@ void EmitPass::ResetRoundingMode(Instruction* inst)
1636516162
nextInst != nullptr;
1636616163
nextInst = nextInst->getNextNonDebugInstruction())
1636716164
{
16368-
if (ignoreRoundingMode(nextInst))
16165+
if (ignoresRoundingMode(nextInst))
1636916166
{
1637016167
continue;
1637116168
}
16372-
if (setRMExplicitly(nextInst))
16169+
if (setsRMExplicitly(nextInst))
1637316170
{
1637416171
// As nextInst will set RM explicitly, no need to go further.
1637516172
break;
@@ -16378,7 +16175,7 @@ void EmitPass::ResetRoundingMode(Instruction* inst)
1637816175
// At this point, a default RM is needed. For FPCvtInt, we know
1637916176
// precisely whether FPCvtInt RM is needed or not; but for FP, we
1638016177
// do it conservatively as we do not scan all instructions here.
16381-
ERoundingMode intRM = GetRoundingMode_FPCvtInt(nextInst);
16178+
ERoundingMode intRM = GetRoundingMode_FPCvtInt(m_pCtx->getModuleMetaData(), nextInst);
1638216179

1638316180
// If it is not ROUND_TO_ANY, it uses FPCvtInt RM;
1638416181
// otherwise, it does not use FPCvtInt RM.
@@ -16463,7 +16260,7 @@ void EmitPass::emitf32tof16_rtz(llvm::GenIntrinsicInst* inst)
1646316260
void EmitPass::emitfitof(llvm::GenIntrinsicInst* inst)
1646416261
{
1646516262
CVariable* src = GetSymbol(inst->getOperand(0));
16466-
ERoundingMode RM = GetRoundingMode_FP(inst);
16263+
ERoundingMode RM = GetRoundingMode_FP(m_pCtx->getModuleMetaData(), inst);
1646716264
CVariable* dst = m_destination;
1646816265

1646916266
GenISAIntrinsic::ID id = inst->getIntrinsicID();
@@ -16558,7 +16355,7 @@ void EmitPass::emitftoi(llvm::GenIntrinsicInst* inst)
1655816355
IGC_ASSERT_MESSAGE(inst->getOperand(0)->getType()->isFloatingPointTy(), "Unsupported type");
1655916356
CVariable* src = GetSymbol(inst->getOperand(0));
1656016357
CVariable* dst = m_destination;
16561-
ERoundingMode RM = GetRoundingMode_FPCvtInt(inst);
16358+
ERoundingMode RM = GetRoundingMode_FPCvtInt(m_pCtx->getModuleMetaData(), inst);
1656216359
IGC_ASSERT_MESSAGE(RM != ERoundingMode::ROUND_TO_ANY, "Not valid FP->int rounding mode!");
1656316360

1656416361
GenISAIntrinsic::ID id = inst->getIntrinsicID();

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -959,16 +959,10 @@ class EmitPass : public llvm::FunctionPass
959959
int getGRFSize() const { return m_currShader->getGRFSize(); }
960960

961961
void initDefaultRoundingMode();
962-
ERoundingMode GetRoundingMode_FPCvtInt(llvm::Instruction* pInst);
963-
ERoundingMode GetRoundingMode_FP(llvm::Instruction* inst);
964962
void SetRoundingMode_FP(ERoundingMode RM_FP);
965963
void SetRoundingMode_FPCvtInt(ERoundingMode RM_FPCvtInt);
966-
bool setRMExplicitly(llvm::Instruction* inst);
967964
void ResetRoundingMode(llvm::Instruction* inst);
968965

969-
// returns true if the instruction does not care about the rounding mode settings
970-
bool ignoreRoundingMode(llvm::Instruction* inst) const;
971-
972966
// A64 load/store with HWA that make sure the offset hi part is the same per LS call
973967
// addrUnifrom: if the load/store address is uniform, we can skip A64 WA
974968
void emitGatherA64(llvm::Value* loadInst, CVariable* dst, CVariable* offset, unsigned elemSize, unsigned numElems, bool addrUniform);

0 commit comments

Comments
 (0)