avoid y*(1/x) for double precision type

pkwasnie-intel · igcbot · commit 8d12a0f6e421 · 2023-12-11T11:53:36.000-05:00
Avoid y*(1/x) for double precision type.
diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
@@ -4089,7 +4089,10 @@ void EmitPass::BinaryUnary(llvm::Instruction* inst, const SSource source[2], con
         break;
     case Instruction::FDiv:
     {
-        if (inst->getType()->isDoubleTy() && !inst->hasApproxFunc())
+        bool canUseFast = inst->hasApproxFunc() ||
+             (inst->hasAllowReciprocal() && !isOne(source[0].value));
+
+        if (inst->getType()->isDoubleTy() && !canUseFast)
         {   // default : ieee fdiv
             EmitSimpleAlu(llvm_ieee_divide, source, modifier);
         }
diff --git a/IGC/Compiler/CISACodeGen/PatternMatchPass.cpp b/IGC/Compiler/CISACodeGen/PatternMatchPass.cpp
@@ -1185,6 +1185,9 @@ namespace IGC
                 MatchModifier(I);
             break;
         case Instruction::FMul:
+            match = MatchArcpFdiv(I) ||
+                MatchModifier(I);
+            break;
         case Instruction::URem:
         case Instruction::SRem:
         case Instruction::FRem:
@@ -4886,6 +4889,77 @@ namespace IGC
         return found;
     }
 
+    bool CodeGenPatternMatch::MatchArcpFdiv(llvm::BinaryOperator& I)
+    {
+
+        using namespace llvm::PatternMatch;
+
+        struct ArcpFdivPattern : public Pattern
+        {
+            SSource sources[2];
+            virtual void Emit(EmitPass* pass, const DstModifier& modifier)
+            {
+                pass->FDiv(sources, modifier);
+            }
+        };
+
+        if (!I.getType()->isDoubleTy() || !I.hasAllowReciprocal())
+            return false;
+
+        // Look for fdiv.
+        Instruction* fdiv = nullptr;
+        Value* dividend = nullptr,  * divisor = nullptr;
+
+        auto fdivPattern = m_OneUse(m_FDiv(m_FPOne(), m_Value(divisor)));
+
+        if (match(I.getOperand(0), fdivPattern))
+        {
+            fdiv = dyn_cast<Instruction>(I.getOperand(0));
+            dividend = I.getOperand(1);
+        }
+        else if (match(I.getOperand(1), fdivPattern))
+        {
+            fdiv = dyn_cast<Instruction>(I.getOperand(1));
+            dividend = I.getOperand(0);
+        }
+
+        if (!fdiv || !fdiv->hasAllowReciprocal())
+            return false;
+
+        // Pattern found.
+        ArcpFdivPattern* pattern = new (m_allocator)ArcpFdivPattern();
+        Value* sources[2] = { dividend, divisor };
+        e_modifier src_mod[2] = {};
+
+        if (FlushesDenormsOnInput(*fdiv))
+        {
+            sources[0] = SkipCanonicalize(sources[0]);
+            sources[1] = SkipCanonicalize(sources[1]);
+        }
+
+        GetModifier(*sources[0], src_mod[0], sources[0]);
+        GetModifier(*sources[1], src_mod[1], sources[1]);
+
+        pattern->sources[0] = GetSource(sources[0], src_mod[0], false, IsSourceOfSample(&I));
+        pattern->sources[1] = GetSource(sources[1], src_mod[1], false, IsSourceOfSample(&I));
+
+        // Try to add to constant pool whatever possible.
+        if (isCandidateForConstantPool(sources[0]))
+        {
+            AddToConstantPool(I.getParent(), sources[0]);
+            pattern->sources[0].fromConstantPool = true;
+        }
+        if (isCandidateForConstantPool(sources[1]))
+        {
+            AddToConstantPool(I.getParent(), sources[1]);
+            pattern->sources[1].fromConstantPool = true;
+        }
+
+        AddPattern(pattern);
+
+        return true;
+    }
+
     bool CodeGenPatternMatch::MatchGradient(llvm::GenIntrinsicInst& I)
     {
         struct GradientPattern : public Pattern
diff --git a/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp b/IGC/Compiler/CISACodeGen/PatternMatchPass.hpp
@@ -225,6 +225,7 @@ namespace IGC
         bool MatchDp4a(llvm::GenIntrinsicInst& I);
         bool MatchLogicAlu(llvm::BinaryOperator& I);
         bool MatchRsqrt(llvm::BinaryOperator& I);
+        bool MatchArcpFdiv(llvm::BinaryOperator& I);
         bool MatchBlockReadWritePointer(llvm::GenIntrinsicInst& I);
         bool MatchGradient(llvm::GenIntrinsicInst& I);
         bool MatchSampleDerivative(llvm::GenIntrinsicInst& I);
diff --git a/IGC/Compiler/CustomSafeOptPass.cpp b/IGC/Compiler/CustomSafeOptPass.cpp
@@ -5073,6 +5073,7 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
                     }
                     Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", insertBefore);
                     Inv->setFastMathFlags(Inst->getFastMathFlags());
+                    Inv->setDebugLoc(Inst->getDebugLoc());
                 }
 
                 Instruction* Mul = BinaryOperator::CreateFMul(I->getOperand(0), Inv, "", I);
@@ -5087,6 +5088,9 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
 
         if (!Inv)
         {
+            if (Inst->getType()->isDoubleTy())
+                return false;
+
             // Only a single use of 1 / Src1. Create Inv right before the use.
             Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", Inst);
             Inv->setFastMathFlags(Inst->getFastMathFlags());
diff --git a/IGC/Compiler/tests/CustomSafeOptPass/fdiv_arcp.ll b/IGC/Compiler/tests/CustomSafeOptPass/fdiv_arcp.ll
@@ -0,0 +1,83 @@
+;=========================== begin_copyright_notice ============================
+;
+; Copyright (C) 2023 Intel Corporation
+;
+; SPDX-License-Identifier: MIT
+;
+;============================ end_copyright_notice =============================
+;
+; RUN: igc_opt -platformpvc -debugify -GenStrengthReduction -check-debugify -S < %s 2>&1 | FileCheck %s
+
+; Debug-info related check
+; CHECK-NOT: WARNING
+; CHECK: CheckModuleDebugify: PASS
+
+
+define spir_kernel void @test_fdiv_half(half addrspace(1)* %a, half addrspace(1)* %b) {
+entry:
+; CHECK-LABEL: test_fdiv_half
+; CHECK:    %0 = load half, half addrspace(1)* %a, align 2
+; CHECK:    %1 = load half, half addrspace(1)* %b, align 2
+; CHECK:    %2 = fdiv arcp half 0xH3C00, %1
+; CHECK:    %3 = fmul arcp half %0, %2
+; CHECK:    store half %3, half addrspace(1)* %a, align 2
+; CHECK:    ret void
+  %0 = load half, half addrspace(1)* %a, align 2
+  %1 = load half, half addrspace(1)* %b, align 2
+  %conv1 = fdiv arcp half  %0, %1
+  store half %conv1, half addrspace(1)* %a, align 2
+  ret void
+}
+
+define spir_kernel void @test_fdiv_float(float addrspace(1)* %a, float addrspace(1)* %b) {
+entry:
+; CHECK-LABEL: test_fdiv_float
+; CHECK:    %0 = load float, float addrspace(1)* %a, align 4
+; CHECK:    %1 = load float, float addrspace(1)* %b, align 4
+; CHECK:    %2 = fdiv arcp float 1.000000e+00, %1
+; CHECK:    %3 = fmul arcp float %0, %2
+; CHECK:    store float %3, float addrspace(1)* %a, align 4
+; CHECK:    ret void
+  %0 = load float, float addrspace(1)* %a, align 4
+  %1 = load float, float addrspace(1)* %b, align 4
+  %conv1 = fdiv arcp float  %0, %1
+  store float %conv1, float addrspace(1)* %a, align 4
+  ret void
+}
+
+define spir_kernel void @test_fdiv_double_once(double addrspace(1)* %a, double addrspace(1)* %b) {
+entry:
+; CHECK-LABEL: test_fdiv_double_once
+; CHECK:    %0 = load double, double addrspace(1)* %a, align 8
+; CHECK:    %1 = load double, double addrspace(1)* %b, align 8
+; CHECK:    %conv1 = fdiv arcp double  %0, %1
+; CHECK:    store double %conv1, double addrspace(1)* %a, align 8
+; CHECK:    ret void
+  %0 = load double, double addrspace(1)* %a, align 8
+  %1 = load double, double addrspace(1)* %b, align 8
+  %conv1 = fdiv arcp double  %0, %1
+  store double %conv1, double addrspace(1)* %a, align 8
+  ret void
+}
+
+define spir_kernel void @test_fdiv_double_twice(double addrspace(1)* %a, double addrspace(1)* %b, double addrspace(1)* %c) {
+entry:
+; CHECK-LABEL: test_fdiv_double_twice
+; CHECK:    %0 = load double, double addrspace(1)* %a, align 8
+; CHECK:    %1 = load double, double addrspace(1)* %b, align 8
+; CHECK:    %2 = load double, double addrspace(1)* %c, align 8
+; CHECK:    %3 = fdiv arcp double 1.000000e+00, %2
+; CHECK:    %4 = fmul arcp double %0, %3
+; CHECK:    %5 = fmul arcp double %1, %3
+; CHECK:    %add1 = fdiv double %4, %5
+; CHECK:    store double %add1, double addrspace(1)* %a, align 8
+; CHECK:    ret void
+  %0 = load double, double addrspace(1)* %a, align 8
+  %1 = load double, double addrspace(1)* %b, align 8
+  %2 = load double, double addrspace(1)* %c, align 8
+  %conv1 = fdiv arcp double  %0, %2
+  %conv2 = fdiv arcp double  %1, %2
+  %add1 = fdiv double %conv1, %conv2
+  store double %add1, double addrspace(1)* %a, align 8
+  ret void
+}

Original file line number	Diff line number	Diff line change
`@@ -4089,7 +4089,10 @@ void EmitPass::BinaryUnary(llvm::Instruction* inst, const SSource source[2], con`
`4089`	`4089`	`break;`
`4090`	`4090`	`case Instruction::FDiv:`
`4091`	`4091`	`{`
`4092`		`- if (inst->getType()->isDoubleTy() && !inst->hasApproxFunc())`
	`4092`	`+ bool canUseFast = inst->hasApproxFunc() \|\|`
	`4093`	`+ (inst->hasAllowReciprocal() && !isOne(source[0].value));`
	`4094`	`+`
	`4095`	`+ if (inst->getType()->isDoubleTy() && !canUseFast)`
`4093`	`4096`	`{ // default : ieee fdiv`
`4094`	`4097`	`EmitSimpleAlu(llvm_ieee_divide, source, modifier);`
`4095`	`4098`	`}`
Original file line number	Diff line number	Diff line change
`@@ -5073,6 +5073,7 @@ bool GenStrengthReduction::processInst(Instruction* Inst)`
`5073`	`5073`	`}`
`5074`	`5074`	`Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", insertBefore);`
`5075`	`5075`	`Inv->setFastMathFlags(Inst->getFastMathFlags());`
	`5076`	`+ Inv->setDebugLoc(Inst->getDebugLoc());`
`5076`	`5077`	`}`
`5077`	`5078`
`5078`	`5079`	`Instruction* Mul = BinaryOperator::CreateFMul(I->getOperand(0), Inv, "", I);`
`@@ -5087,6 +5088,9 @@ bool GenStrengthReduction::processInst(Instruction* Inst)`
`5087`	`5088`
`5088`	`5089`	`if (!Inv)`
`5089`	`5090`	`{`
	`5091`	`+ if (Inst->getType()->isDoubleTy())`
	`5092`	`+ return false;`
	`5093`	`+`
`5090`	`5094`	`// Only a single use of 1 / Src1. Create Inv right before the use.`
`5091`	`5095`	`Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", Inst);`
`5092`	`5096`	`Inv->setFastMathFlags(Inst->getFastMathFlags());`