Skip to content

Commit 937d79b

Browse files
authored
[GlobalISel][AArch64][AMDGPU] Expand FPOWI into series of multiplication (#95217)
SelectionDAG already converts FPOWI into a series of optimized multiplications, this patch introduces the same optimization into GlobalISel.
1 parent 5db3579 commit 937d79b

File tree

7 files changed

+413
-300
lines changed

7 files changed

+413
-300
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,13 @@ class CombinerHelper {
865865
/// By default, it erases the instruction def'd on \p MO from the function.
866866
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);
867867

868+
/// Match FPOWI if it's safe to extend it into a series of multiplications.
869+
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent);
870+
871+
/// Expands FPOWI into a series of multiplications and a division if the
872+
/// exponent is negative.
873+
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent);
874+
868875
/// Combine insert vector element OOB.
869876
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);
870877

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,13 @@ def sub_of_vscale : GICombineRule<
16361636
[{ return Helper.matchSubOfVScale(${root}, ${matchinfo}); }]),
16371637
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
16381638

1639+
def expand_const_fpowi : GICombineRule<
1640+
(defs root:$root),
1641+
(match (G_CONSTANT $int, $imm),
1642+
(G_FPOWI $dst, $float, $int):$root,
1643+
[{ return Helper.matchFPowIExpansion(*${root}, ${imm}.getCImm()->getSExtValue()); }]),
1644+
(apply [{ Helper.applyExpandFPowI(*${root}, ${imm}.getCImm()->getSExtValue()); }])>;
1645+
16391646
// match_extract_of_element and insert_vector_elt_oob must be the first!
16401647
def vector_ops_combines: GICombineGroup<[
16411648
match_extract_of_element_undef_vector,
@@ -1786,7 +1793,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
17861793
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
17871794
overlapping_and, mulo_by_2, mulo_by_0,
17881795
adde_to_addo,
1789-
combine_minmax_nan]>;
1796+
combine_minmax_nan, expand_const_fpowi]>;
17901797

17911798
def known_bits_simplifications : GICombineGroup<[
17921799
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7349,6 +7349,54 @@ void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
73497349
Root->eraseFromParent();
73507350
}
73517351

7352+
bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) {
7353+
bool OptForSize = MI.getMF()->getFunction().hasOptSize();
7354+
return getTargetLowering().isBeneficialToExpandPowI(Exponent, OptForSize);
7355+
}
7356+
7357+
void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) {
7358+
auto [Dst, Base] = MI.getFirst2Regs();
7359+
LLT Ty = MRI.getType(Dst);
7360+
int64_t ExpVal = Exponent;
7361+
7362+
if (ExpVal == 0) {
7363+
Builder.buildFConstant(Dst, 1.0);
7364+
MI.removeFromParent();
7365+
return;
7366+
}
7367+
7368+
if (ExpVal < 0)
7369+
ExpVal = -ExpVal;
7370+
7371+
// We use the simple binary decomposition method from SelectionDAG ExpandPowI
7372+
// to generate the multiply sequence. There are more optimal ways to do this
7373+
// (for example, powi(x,15) generates one more multiply than it should), but
7374+
// this has the benefit of being both really simple and much better than a
7375+
// libcall.
7376+
std::optional<SrcOp> Res;
7377+
SrcOp CurSquare = Base;
7378+
while (ExpVal > 0) {
7379+
if (ExpVal & 1) {
7380+
if (!Res)
7381+
Res = CurSquare;
7382+
else
7383+
Res = Builder.buildFMul(Ty, *Res, CurSquare);
7384+
}
7385+
7386+
CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
7387+
ExpVal >>= 1;
7388+
}
7389+
7390+
// If the original exponent was negative, invert the result, producing
7391+
// 1/(x*x*x).
7392+
if (Exponent < 0)
7393+
Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
7394+
MI.getFlags());
7395+
7396+
Builder.buildCopy(Dst, *Res);
7397+
MI.eraseFromParent();
7398+
}
7399+
73527400
bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO,
73537401
BuildFnTy &MatchInfo) {
73547402
GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7140,8 +7140,6 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
71407140
return UnableToLegalize;
71417141
}
71427142

7143-
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7144-
// multiplication tree.
71457143
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
71467144
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
71477145
LLT Ty = MRI.getType(Dst);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
3+
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s
4+
5+
define double @pow_optsize(double %x) nounwind optsize {
6+
; CHECK-LABEL: pow_optsize:
7+
; CHECK: // %bb.0: // %entry
8+
; CHECK-NEXT: mov w0, #15 // =0xf
9+
; CHECK-NEXT: b __powidf2
10+
entry:
11+
%0 = call double @llvm.powi.f64.i32(double %x, i32 15)
12+
ret double %0
13+
}
14+
15+
define double @pow_optsize_expand(double %x) nounwind optsize {
16+
; CHECK-LABEL: pow_optsize_expand:
17+
; CHECK: // %bb.0: // %entry
18+
; CHECK-NEXT: fmul d0, d0, d0
19+
; CHECK-NEXT: fmul d0, d0, d0
20+
; CHECK-NEXT: fmul d0, d0, d0
21+
; CHECK-NEXT: fmul d0, d0, d0
22+
; CHECK-NEXT: ret
23+
entry:
24+
%0 = call double @llvm.powi.f64.i32(double %x, i32 16)
25+
ret double %0
26+
}
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
3+
4+
---
5+
name: fpowi_s64_zero
6+
body: |
7+
bb.0:
8+
liveins: $d0, $w0
9+
10+
; CHECK-LABEL: name: fpowi_s64_zero
11+
; CHECK: liveins: $d0, $w0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
14+
; CHECK-NEXT: $d0 = COPY [[C]](s64)
15+
%0:_(s64) = COPY $d0
16+
%1:_(s32) = COPY $w0
17+
%2:_(s32) = G_CONSTANT i32 0
18+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
19+
$d0 = COPY %3(s64)
20+
...
21+
22+
---
23+
name: fpowi_s32_zero
24+
body: |
25+
bb.0:
26+
liveins: $d0, $w0
27+
28+
; CHECK-LABEL: name: fpowi_s32_zero
29+
; CHECK: liveins: $d0, $w0
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
32+
; CHECK-NEXT: $s0 = COPY [[C]](s32)
33+
%0:_(s32) = COPY $s0
34+
%1:_(s32) = COPY $w0
35+
%2:_(s32) = G_CONSTANT i32 0
36+
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
37+
$s0 = COPY %3(s32)
38+
...
39+
40+
---
41+
name: fpowi_positive
42+
body: |
43+
bb.0:
44+
liveins: $d0, $w0
45+
46+
; CHECK-LABEL: name: fpowi_positive
47+
; CHECK: liveins: $d0, $w0
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
50+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]]
51+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[FMUL]]
52+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMUL1]]
53+
; CHECK-NEXT: $d0 = COPY [[FMUL2]](s64)
54+
%0:_(s64) = COPY $d0
55+
%1:_(s32) = COPY $w0
56+
%2:_(s32) = G_CONSTANT i32 5
57+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
58+
$d0 = COPY %3(s64)
59+
...
60+
61+
---
62+
name: fpowi_s64_negative
63+
body: |
64+
bb.0:
65+
liveins: $d0, $w0
66+
67+
; CHECK-LABEL: name: fpowi_s64_negative
68+
; CHECK: liveins: $d0, $w0
69+
; CHECK-NEXT: {{ $}}
70+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
71+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]]
72+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[FMUL]]
73+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMUL1]]
74+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
75+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL2]]
76+
; CHECK-NEXT: $d0 = COPY [[FDIV]](s64)
77+
%0:_(s64) = COPY $d0
78+
%1:_(s32) = COPY $w0
79+
%2:_(s32) = G_CONSTANT i32 -5
80+
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
81+
$d0 = COPY %3(s64)
82+
...
83+
84+
---
85+
name: fpowi_s32_negative
86+
body: |
87+
bb.0:
88+
liveins: $d0, $w0
89+
90+
; CHECK-LABEL: name: fpowi_s32_negative
91+
; CHECK: liveins: $d0, $w0
92+
; CHECK-NEXT: {{ $}}
93+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
94+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY]]
95+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[FMUL]]
96+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FMUL1]]
97+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
98+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL2]]
99+
; CHECK-NEXT: $s0 = COPY [[FDIV]](s32)
100+
%0:_(s32) = COPY $s0
101+
%1:_(s32) = COPY $w0
102+
%2:_(s32) = G_CONSTANT i32 -5
103+
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
104+
$s0 = COPY %3(s32)
105+
...
106+
107+
---
108+
name: fpowi_libcall
109+
body: |
110+
bb.0:
111+
liveins: $d0, $w0
112+
113+
; CHECK-LABEL: name: fpowi_libcall
114+
; CHECK: liveins: $d0, $w0
115+
; CHECK-NEXT: {{ $}}
116+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
117+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
118+
; CHECK-NEXT: [[FPOWI:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[COPY]], [[COPY1]](s32)
119+
; CHECK-NEXT: $d0 = COPY [[FPOWI]](s64)
120+
%0:_(s64) = COPY $d0
121+
%1:_(s32) = COPY $w0
122+
%2:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %1(s32)
123+
$d0 = COPY %2(s64)
124+
...

0 commit comments

Comments
 (0)