@@ -37,7 +37,7 @@ using namespace IGC::IGCMD;
37
37
38
38
namespace IGC {
39
39
40
- bool expandFDIVInstructions (llvm::Function& F );
40
+ bool expandFDIVInstructions (llvm::Function &F, ShaderType ShaderTy );
41
41
42
42
} // namespace IGC
43
43
@@ -108,7 +108,7 @@ bool Legalization::runOnFunction(Function& F)
108
108
109
109
// Legalize fdiv if any
110
110
if (!m_ctx->platform .hasFDIV ())
111
- expandFDIVInstructions (F);
111
+ expandFDIVInstructions (F, m_ctx-> type );
112
112
return true ;
113
113
}
114
114
@@ -2806,8 +2806,7 @@ static bool needsNoScaling(Value* Val)
2806
2806
// S = 2^(-32) if exp(y) >= 200,
2807
2807
// S = 1.0f otherwise
2808
2808
//
2809
- bool IGC::expandFDIVInstructions (llvm::Function& F)
2810
- {
2809
+ bool IGC::expandFDIVInstructions (llvm::Function &F, ShaderType ShaderTy) {
2811
2810
bool Changed = false ;
2812
2811
for (auto & BB : F.getBasicBlockList ()) {
2813
2812
for (auto Iter = BB.begin (); Iter != BB.end ();) {
@@ -2855,28 +2854,36 @@ bool IGC::expandFDIVInstructions(llvm::Function& F)
2855
2854
V = Builder.CreateFMul (Y, X);
2856
2855
}
2857
2856
else {
2857
+ Value* YAsInt32 = Builder.CreateBitCast (Y, Builder.getInt32Ty ());
2858
+ Value* YExp = Builder.CreateAnd (YAsInt32, Builder.getInt32 (0x7f800000 ));
2859
+
2858
2860
float S32 = uint64_t (1 ) << 32 ;
2859
2861
ConstantFP* C0 = ConstantFP::get (Ctx, APFloat (S32));
2860
2862
ConstantFP* C1 = ConstantFP::get (Ctx, APFloat (1 .0f ));
2861
2863
ConstantFP* C2 = ConstantFP::get (Ctx, APFloat (1 .0f / S32));
2862
2864
2863
- Value* Exp = Builder.CreateAnd (
2864
- Builder.CreateBitCast (Y, Builder.getInt32Ty ()),
2865
- Builder.getInt32 (0x7f800000 ));
2866
-
2867
- // Check if B's exponent is 0, scale up.
2868
- Value* P1 = Builder.CreateICmpEQ (Exp, Builder.getInt32 (0 ));
2869
- Value* Scale = Builder.CreateSelect (P1, C0, C1);
2870
-
2871
- // Check if B's exponent >= 200, scale down.
2872
- Value* P2 = Builder.CreateICmpUGE (Exp, Builder.getInt32 (200 << 23 ));
2873
- Scale = Builder.CreateSelect (P2, C2, Scale);
2865
+ // Determine the appropriate scale based on Y's exponent.
2866
+ Value* ScaleUp = Builder.CreateSelect (Builder.CreateICmpEQ (YExp, Builder.getInt32 (0 )), C0, C1);
2867
+ Value* Scale = Builder.CreateSelect (Builder.CreateICmpUGE (YExp, Builder.getInt32 (200 << 23 )), C2, ScaleUp);
2874
2868
2875
2869
// Compute rcp(y * S) * x * S
2876
- V = Builder.CreateFMul (Y, Scale);
2877
- V = Builder.CreateFDiv (C1, V );
2878
- V = Builder.CreateFMul (V , X);
2870
+ Value *ScaledY = Builder.CreateFMul (Y, Scale);
2871
+ ScaledY = Builder.CreateFDiv (C1, ScaledY );
2872
+ V = Builder.CreateFMul (ScaledY , X);
2879
2873
V = Builder.CreateFMul (V, Scale);
2874
+
2875
+ // In case of OpenCL kernels, create comparisons to check if X or Y is +/-0, +/-Inf, +/-NaN,
2876
+ // or subnormal. If x == y and y is a normal number, select 1.0f as a result for better precision.
2877
+ if (ShaderTy == ShaderType::OPENCL_SHADER) {
2878
+ Value* CmpXY = Builder.CreateFCmpOEQ (X, Y);
2879
+ Value* YMantissa = Builder.CreateAnd (YAsInt32, Builder.getInt32 (0x007fffff ));
2880
+ Value* CmpYExpZero = Builder.CreateICmpEQ (YExp, Builder.getInt32 (0 ));
2881
+ Value* CmpYMantissaZero = Builder.CreateICmpEQ (YMantissa, Builder.getInt32 (0 ));
2882
+ Value* CmpYIsZeroOrSubnormal = Builder.CreateOr (CmpYExpZero, CmpYMantissaZero);
2883
+ Value* CmpYIsNotZeroOrSubnormal = Builder.CreateNot (CmpYIsZeroOrSubnormal);
2884
+ V = Builder.CreateSelect (Builder.CreateAnd (CmpXY, CmpYIsNotZeroOrSubnormal),
2885
+ ConstantFP::get (Ctx, APFloat (1 .0f )), V);
2886
+ }
2880
2887
}
2881
2888
2882
2889
Inst->replaceAllUsesWith (V);
@@ -2898,6 +2905,7 @@ namespace IGC {
2898
2905
void getAnalysisUsage (AnalysisUsage& AU) const override
2899
2906
{
2900
2907
AU.setPreservesCFG ();
2908
+ AU.addRequired <CodeGenContextWrapper>();
2901
2909
}
2902
2910
};
2903
2911
@@ -2920,6 +2928,7 @@ GenFDIVEmulation::GenFDIVEmulation()
2920
2928
2921
2929
bool GenFDIVEmulation::runOnFunction (Function& F)
2922
2930
{
2931
+ IGC::CodeGenContext* m_ctx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext ();
2923
2932
// Always emulate fdiv instructions.
2924
- return expandFDIVInstructions (F);
2933
+ return expandFDIVInstructions (F, m_ctx-> type );
2925
2934
}
0 commit comments