Skip to content

Commit 8d12a0f

Browse files
pkwasnie-inteligcbot
authored andcommitted
avoid y*(1/x) for double precision type
Avoid y*(1/x) for double precision type.
1 parent 77fb673 commit 8d12a0f

File tree

5 files changed

+166
-1
lines changed

5 files changed

+166
-1
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4089,7 +4089,10 @@ void EmitPass::BinaryUnary(llvm::Instruction* inst, const SSource source[2], con
40894089
break;
40904090
case Instruction::FDiv:
40914091
{
4092-
if (inst->getType()->isDoubleTy() && !inst->hasApproxFunc())
4092+
bool canUseFast = inst->hasApproxFunc() ||
4093+
(inst->hasAllowReciprocal() && !isOne(source[0].value));
4094+
4095+
if (inst->getType()->isDoubleTy() && !canUseFast)
40934096
{ // default : ieee fdiv
40944097
EmitSimpleAlu(llvm_ieee_divide, source, modifier);
40954098
}

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,9 @@ namespace IGC
11851185
MatchModifier(I);
11861186
break;
11871187
case Instruction::FMul:
1188+
match = MatchArcpFdiv(I) ||
1189+
MatchModifier(I);
1190+
break;
11881191
case Instruction::URem:
11891192
case Instruction::SRem:
11901193
case Instruction::FRem:
@@ -4886,6 +4889,77 @@ namespace IGC
48864889
return found;
48874890
}
48884891

4892+
bool CodeGenPatternMatch::MatchArcpFdiv(llvm::BinaryOperator& I)
4893+
{
4894+
4895+
using namespace llvm::PatternMatch;
4896+
4897+
struct ArcpFdivPattern : public Pattern
4898+
{
4899+
SSource sources[2];
4900+
virtual void Emit(EmitPass* pass, const DstModifier& modifier)
4901+
{
4902+
pass->FDiv(sources, modifier);
4903+
}
4904+
};
4905+
4906+
if (!I.getType()->isDoubleTy() || !I.hasAllowReciprocal())
4907+
return false;
4908+
4909+
// Look for fdiv.
4910+
Instruction* fdiv = nullptr;
4911+
Value* dividend = nullptr, * divisor = nullptr;
4912+
4913+
auto fdivPattern = m_OneUse(m_FDiv(m_FPOne(), m_Value(divisor)));
4914+
4915+
if (match(I.getOperand(0), fdivPattern))
4916+
{
4917+
fdiv = dyn_cast<Instruction>(I.getOperand(0));
4918+
dividend = I.getOperand(1);
4919+
}
4920+
else if (match(I.getOperand(1), fdivPattern))
4921+
{
4922+
fdiv = dyn_cast<Instruction>(I.getOperand(1));
4923+
dividend = I.getOperand(0);
4924+
}
4925+
4926+
if (!fdiv || !fdiv->hasAllowReciprocal())
4927+
return false;
4928+
4929+
// Pattern found.
4930+
ArcpFdivPattern* pattern = new (m_allocator)ArcpFdivPattern();
4931+
Value* sources[2] = { dividend, divisor };
4932+
e_modifier src_mod[2] = {};
4933+
4934+
if (FlushesDenormsOnInput(*fdiv))
4935+
{
4936+
sources[0] = SkipCanonicalize(sources[0]);
4937+
sources[1] = SkipCanonicalize(sources[1]);
4938+
}
4939+
4940+
GetModifier(*sources[0], src_mod[0], sources[0]);
4941+
GetModifier(*sources[1], src_mod[1], sources[1]);
4942+
4943+
pattern->sources[0] = GetSource(sources[0], src_mod[0], false, IsSourceOfSample(&I));
4944+
pattern->sources[1] = GetSource(sources[1], src_mod[1], false, IsSourceOfSample(&I));
4945+
4946+
// Try to add to constant pool whatever possible.
4947+
if (isCandidateForConstantPool(sources[0]))
4948+
{
4949+
AddToConstantPool(I.getParent(), sources[0]);
4950+
pattern->sources[0].fromConstantPool = true;
4951+
}
4952+
if (isCandidateForConstantPool(sources[1]))
4953+
{
4954+
AddToConstantPool(I.getParent(), sources[1]);
4955+
pattern->sources[1].fromConstantPool = true;
4956+
}
4957+
4958+
AddPattern(pattern);
4959+
4960+
return true;
4961+
}
4962+
48894963
bool CodeGenPatternMatch::MatchGradient(llvm::GenIntrinsicInst& I)
48904964
{
48914965
struct GradientPattern : public Pattern

IGC/Compiler/CISACodeGen/PatternMatchPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ namespace IGC
225225
bool MatchDp4a(llvm::GenIntrinsicInst& I);
226226
bool MatchLogicAlu(llvm::BinaryOperator& I);
227227
bool MatchRsqrt(llvm::BinaryOperator& I);
228+
bool MatchArcpFdiv(llvm::BinaryOperator& I);
228229
bool MatchBlockReadWritePointer(llvm::GenIntrinsicInst& I);
229230
bool MatchGradient(llvm::GenIntrinsicInst& I);
230231
bool MatchSampleDerivative(llvm::GenIntrinsicInst& I);

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5073,6 +5073,7 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
50735073
}
50745074
Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", insertBefore);
50755075
Inv->setFastMathFlags(Inst->getFastMathFlags());
5076+
Inv->setDebugLoc(Inst->getDebugLoc());
50765077
}
50775078

50785079
Instruction* Mul = BinaryOperator::CreateFMul(I->getOperand(0), Inv, "", I);
@@ -5087,6 +5088,9 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
50875088

50885089
if (!Inv)
50895090
{
5091+
if (Inst->getType()->isDoubleTy())
5092+
return false;
5093+
50905094
// Only a single use of 1 / Src1. Create Inv right before the use.
50915095
Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", Inst);
50925096
Inv->setFastMathFlags(Inst->getFastMathFlags());
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt -platformpvc -debugify -GenStrengthReduction -check-debugify -S < %s 2>&1 | FileCheck %s
10+
11+
; Debug-info related check
12+
; CHECK-NOT: WARNING
13+
; CHECK: CheckModuleDebugify: PASS
14+
15+
16+
define spir_kernel void @test_fdiv_half(half addrspace(1)* %a, half addrspace(1)* %b) {
17+
entry:
18+
; CHECK-LABEL: test_fdiv_half
19+
; CHECK: %0 = load half, half addrspace(1)* %a, align 2
20+
; CHECK: %1 = load half, half addrspace(1)* %b, align 2
21+
; CHECK: %2 = fdiv arcp half 0xH3C00, %1
22+
; CHECK: %3 = fmul arcp half %0, %2
23+
; CHECK: store half %3, half addrspace(1)* %a, align 2
24+
; CHECK: ret void
25+
%0 = load half, half addrspace(1)* %a, align 2
26+
%1 = load half, half addrspace(1)* %b, align 2
27+
%conv1 = fdiv arcp half %0, %1
28+
store half %conv1, half addrspace(1)* %a, align 2
29+
ret void
30+
}
31+
32+
define spir_kernel void @test_fdiv_float(float addrspace(1)* %a, float addrspace(1)* %b) {
33+
entry:
34+
; CHECK-LABEL: test_fdiv_float
35+
; CHECK: %0 = load float, float addrspace(1)* %a, align 4
36+
; CHECK: %1 = load float, float addrspace(1)* %b, align 4
37+
; CHECK: %2 = fdiv arcp float 1.000000e+00, %1
38+
; CHECK: %3 = fmul arcp float %0, %2
39+
; CHECK: store float %3, float addrspace(1)* %a, align 4
40+
; CHECK: ret void
41+
%0 = load float, float addrspace(1)* %a, align 4
42+
%1 = load float, float addrspace(1)* %b, align 4
43+
%conv1 = fdiv arcp float %0, %1
44+
store float %conv1, float addrspace(1)* %a, align 4
45+
ret void
46+
}
47+
48+
define spir_kernel void @test_fdiv_double_once(double addrspace(1)* %a, double addrspace(1)* %b) {
49+
entry:
50+
; CHECK-LABEL: test_fdiv_double_once
51+
; CHECK: %0 = load double, double addrspace(1)* %a, align 8
52+
; CHECK: %1 = load double, double addrspace(1)* %b, align 8
53+
; CHECK: %conv1 = fdiv arcp double %0, %1
54+
; CHECK: store double %conv1, double addrspace(1)* %a, align 8
55+
; CHECK: ret void
56+
%0 = load double, double addrspace(1)* %a, align 8
57+
%1 = load double, double addrspace(1)* %b, align 8
58+
%conv1 = fdiv arcp double %0, %1
59+
store double %conv1, double addrspace(1)* %a, align 8
60+
ret void
61+
}
62+
63+
define spir_kernel void @test_fdiv_double_twice(double addrspace(1)* %a, double addrspace(1)* %b, double addrspace(1)* %c) {
64+
entry:
65+
; CHECK-LABEL: test_fdiv_double_twice
66+
; CHECK: %0 = load double, double addrspace(1)* %a, align 8
67+
; CHECK: %1 = load double, double addrspace(1)* %b, align 8
68+
; CHECK: %2 = load double, double addrspace(1)* %c, align 8
69+
; CHECK: %3 = fdiv arcp double 1.000000e+00, %2
70+
; CHECK: %4 = fmul arcp double %0, %3
71+
; CHECK: %5 = fmul arcp double %1, %3
72+
; CHECK: %add1 = fdiv double %4, %5
73+
; CHECK: store double %add1, double addrspace(1)* %a, align 8
74+
; CHECK: ret void
75+
%0 = load double, double addrspace(1)* %a, align 8
76+
%1 = load double, double addrspace(1)* %b, align 8
77+
%2 = load double, double addrspace(1)* %c, align 8
78+
%conv1 = fdiv arcp double %0, %2
79+
%conv2 = fdiv arcp double %1, %2
80+
%add1 = fdiv double %conv1, %conv2
81+
store double %add1, double addrspace(1)* %a, align 8
82+
ret void
83+
}

0 commit comments

Comments
 (0)