Skip to content

Commit 4f16957

Browse files
committed
[GlobalISel][AArch64][AMDGPU] Expand FPOWI into series of multiplication
SelectionDAG already converts FPOWI into multiplications, this patch introduces the same optimization into GlobalISel.
1 parent 2ca8c85 commit 4f16957

File tree

6 files changed

+1129
-299
lines changed

6 files changed

+1129
-299
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,10 @@ class CombinerHelper {
865865
/// By default, it erases the instruction def'd on \p MO from the function.
866866
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
867867

868+
/// Expands FPOWI into a series of multiplications and a division if the
869+
/// exponent is negative.
870+
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent);
871+
868872
/// Combine insert vector element OOB.
869873
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);
870874

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,12 @@ def sub_of_vscale : GICombineRule<
16361636
[{ return Helper.matchSubOfVScale(${root}, ${matchinfo}); }]),
16371637
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
16381638

1639+
def expand_const_fpowi : GICombineRule<
1640+
(defs root:$root),
1641+
(match (G_CONSTANT $int, $imm),
1642+
(G_FPOWI $dst, $float, $int):$root),
1643+
(apply [{ Helper.applyExpandFPowI(*${root}, ${imm}.getCImm()->getSExtValue()); }])>;
1644+
16391645
// match_extract_of_element and insert_vector_elt_oob must be the first!
16401646
def vector_ops_combines: GICombineGroup<[
16411647
match_extract_of_element_undef_vector,
@@ -1786,7 +1792,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
17861792
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
17871793
overlapping_and, mulo_by_2, mulo_by_0,
17881794
adde_to_addo,
1789-
combine_minmax_nan]>;
1795+
combine_minmax_nan, expand_const_fpowi]>;
17901796

17911797
def known_bits_simplifications : GICombineGroup<[
17921798
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7356,6 +7356,39 @@ void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
73567356
Root->eraseFromParent();
73577357
}
73587358

7359+
void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) {
7360+
auto [Dst, Base] = MI.getFirst2Regs();
7361+
LLT Ty = MRI.getType(Dst);
7362+
int64_t ExpVal = Exponent;
7363+
7364+
if (ExpVal == 0) {
7365+
Builder.buildFConstant(Dst, 1.0);
7366+
MI.removeFromParent();
7367+
return;
7368+
}
7369+
7370+
if (ExpVal < 0)
7371+
ExpVal = -ExpVal;
7372+
7373+
// We use the simple binary decomposition method from SelectionDAG ExpandPowI
7374+
// to generate the multiply sequence. There are more optimal ways to do this
7375+
// (for example, powi(x,15) generates one more multiply than it should), but
7376+
// this has the benefit of being both really simple and much better than a
7377+
// libcall.
7378+
SrcOp Res = Base;
7379+
while (--ExpVal > 0)
7380+
Res = Builder.buildFMul(Ty, Res, Base, MI.getFlags());
7381+
7382+
// If the original exponent was negative, invert the result, producing
7383+
// 1/(x*x*x).
7384+
if (Exponent < 0)
7385+
Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), Res,
7386+
MI.getFlags());
7387+
7388+
Builder.buildCopy(Dst, Res);
7389+
MI.eraseFromParent();
7390+
}
7391+
73597392
bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO,
73607393
BuildFnTy &MatchInfo) {
73617394
GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7142,8 +7142,6 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
71427142
return UnableToLegalize;
71437143
}
71447144

7145-
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7146-
// multiplication tree.
71477145
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
71487146
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
71497147
LLT Ty = MRI.getType(Dst);
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
3+
4+
---
5+
name: fpowi_s64_zero
6+
body: |
7+
bb.0:
8+
liveins: $d0, $w0
9+
10+
; CHECK-LABEL: name: fpowi_s64_zero
11+
; CHECK: liveins: $d0, $w0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
14+
; CHECK-NEXT: $d0 = COPY [[C]](s64)
15+
%0:_(s64) = COPY $d0
16+
%1:_(s32) = COPY $w0
17+
%2:_(s32) = G_CONSTANT i32 0
18+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
19+
$d0 = COPY %3(s64)
20+
...
21+
22+
---
23+
name: fpowi_s32_zero
24+
body: |
25+
bb.0:
26+
liveins: $d0, $w0
27+
28+
; CHECK-LABEL: name: fpowi_s32_zero
29+
; CHECK: liveins: $d0, $w0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
32+
; CHECK-NEXT: $s0 = COPY [[C]](s32)
33+
%0:_(s32) = COPY $s0
34+
%1:_(s32) = COPY $w0
35+
%2:_(s32) = G_CONSTANT i32 0
36+
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
37+
$s0 = COPY %3(s32)
38+
...
39+
40+
---
41+
name: fpowi_positive
42+
body: |
43+
bb.0:
44+
liveins: $d0, $w0
45+
46+
; CHECK-LABEL: name: fpowi_positive
47+
; CHECK: liveins: $d0, $w0
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
50+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[COPY]], [[COPY]]
51+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL]], [[COPY]]
52+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL1]], [[COPY]]
53+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL2]], [[COPY]]
54+
; CHECK-NEXT: $d0 = COPY [[FMUL3]](s64)
55+
%0:_(s64) = COPY $d0
56+
%1:_(s32) = COPY $w0
57+
%2:_(s32) = G_CONSTANT i32 5
58+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
59+
$d0 = COPY %3(s64)
60+
...
61+
62+
---
63+
name: fpowi_s64_negative
64+
body: |
65+
bb.0:
66+
liveins: $d0, $w0
67+
68+
; CHECK-LABEL: name: fpowi_s64_negative
69+
; CHECK: liveins: $d0, $w0
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
72+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[COPY]], [[COPY]]
73+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL]], [[COPY]]
74+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL1]], [[COPY]]
75+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL2]], [[COPY]]
76+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
77+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL3]]
78+
; CHECK-NEXT: $d0 = COPY [[FDIV]](s64)
79+
%0:_(s64) = COPY $d0
80+
%1:_(s32) = COPY $w0
81+
%2:_(s32) = G_CONSTANT i32 -5
82+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
83+
$d0 = COPY %3(s64)
84+
...
85+
86+
---
87+
name: fpowi_s32_negative
88+
body: |
89+
bb.0:
90+
liveins: $d0, $w0
91+
92+
; CHECK-LABEL: name: fpowi_s32_negative
93+
; CHECK: liveins: $d0, $w0
94+
; CHECK-NEXT: {{ $}}
95+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
96+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[COPY]], [[COPY]]
97+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL]], [[COPY]]
98+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL1]], [[COPY]]
99+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL [[FMUL2]], [[COPY]]
100+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
101+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL3]]
102+
; CHECK-NEXT: $s0 = COPY [[FDIV]](s32)
103+
%0:_(s32) = COPY $s0
104+
%1:_(s32) = COPY $w0
105+
%2:_(s32) = G_CONSTANT i32 -5
106+
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
107+
$s0 = COPY %3(s32)
108+
...
109+
110+
---
111+
name: fpowi_libcall
112+
body: |
113+
bb.0:
114+
liveins: $d0, $w0
115+
116+
; CHECK-LABEL: name: fpowi_libcall
117+
; CHECK: liveins: $d0, $w0
118+
; CHECK-NEXT: {{ $}}
119+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
120+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
121+
; CHECK-NEXT: [[FPOWI:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[COPY]], [[COPY1]](s32)
122+
; CHECK-NEXT: $d0 = COPY [[FPOWI]](s64)
123+
%0:_(s64) = COPY $d0
124+
%1:_(s32) = COPY $w0
125+
%2:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %1(s32)
126+
$d0 = COPY %2(s64)
127+
...

0 commit comments

Comments
 (0)