Skip to content

Commit 63d01dc

Browse files
committed
[GlobalISel][AArch64][AMDGPU] Lower FPOWI into series of multiplication
SelectionDAG already converts FPOWI into multiplications, this patch introduces the same optimization into GlobalISel.
1 parent 2ca8c85 commit 63d01dc

File tree

7 files changed

+1182
-307
lines changed

7 files changed

+1182
-307
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7142,14 +7142,50 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
71427142
return UnableToLegalize;
71437143
}
71447144

7145-
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7146-
// multiplication tree.
71477145
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
7148-
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7146+
auto [Dst, Base, Exp] = MI.getFirst3Regs();
71497147
LLT Ty = MRI.getType(Dst);
71507148

7151-
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7152-
MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7149+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7150+
std::optional<int64_t> ConstantExpValue = getIConstantVRegSExtVal(Exp, MRI);
7151+
7152+
if (!ConstantExpValue)
7153+
return UnableToLegalize;
7154+
7155+
int64_t OriginalExprVal = *ConstantExpValue;
7156+
int64_t ExpVal = OriginalExprVal;
7157+
7158+
if (ExpVal == 0) {
7159+
MIRBuilder.buildFConstant(Dst, 1.0);
7160+
MI.removeFromParent();
7161+
return Legalized;
7162+
}
7163+
7164+
if (ExpVal < 0)
7165+
ExpVal = -ExpVal;
7166+
7167+
Register Res = MRI.createGenericVirtualRegister(Ty);
7168+
MIRBuilder.buildCopy(Res, Base);
7169+
7170+
while (--ExpVal > 0) {
7171+
Register Tmp = MRI.createGenericVirtualRegister(Ty);
7172+
MIRBuilder.buildFMul(Tmp, Res, Base);
7173+
7174+
Res = Tmp;
7175+
}
7176+
7177+
// If the original was negative, invert the result, producing 1/(x*x*x).
7178+
if (OriginalExprVal < 0) {
7179+
Register One = MRI.createGenericVirtualRegister(Ty);
7180+
MIRBuilder.buildFConstant(One, 1.0);
7181+
7182+
Register Quotient = MRI.createGenericVirtualRegister(Ty);
7183+
MIRBuilder.buildFDiv(Quotient, One, Res);
7184+
7185+
Res = Quotient;
7186+
}
7187+
7188+
MIRBuilder.buildCopy(Dst, Res);
71537189
MI.eraseFromParent();
71547190
return Legalized;
71557191
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,10 +274,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
274274
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
275275
.minScalar(0, s32)
276276
.libcallFor({s32, s64});
277-
getActionDefinitionsBuilder(G_FPOWI)
278-
.scalarize(0)
279-
.minScalar(0, s32)
280-
.libcallFor({{s32, s32}, {s64, s32}});
277+
getActionDefinitionsBuilder(G_FPOWI).scalarize(0).minScalar(0, s32).customFor(
278+
{{s32, s32}, {s64, s32}});
281279

282280
getActionDefinitionsBuilder(G_INSERT)
283281
.legalIf(all(typeInSet(0, {s32, s64, p0}),
@@ -1263,6 +1261,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
12631261
case TargetOpcode::G_FSHL:
12641262
case TargetOpcode::G_FSHR:
12651263
return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper);
1264+
case TargetOpcode::G_FPOWI:
1265+
return legalizeFPowI(MI, LocObserver, Helper);
12661266
case TargetOpcode::G_ROTR:
12671267
return legalizeRotate(MI, MRI, Helper);
12681268
case TargetOpcode::G_CTPOP:
@@ -1344,6 +1344,15 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
13441344
return true;
13451345
}
13461346

1347+
bool AArch64LegalizerInfo::legalizeFPowI(MachineInstr &MI,
1348+
LostDebugLocObserver &Observer,
1349+
LegalizerHelper &Helper) const {
1350+
if (Helper.lowerFPOWI(MI) == LegalizerHelper::Legalized)
1351+
return true;
1352+
1353+
return Helper.libcall(MI, Observer) == LegalizerHelper::Legalized;
1354+
}
1355+
13471356
bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
13481357
MachineRegisterInfo &MRI,
13491358
MachineIRBuilder &MIRBuilder) const {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
5656
MachineIRBuilder &MIRBuilder,
5757
GISelChangeObserver &Observer,
5858
LegalizerHelper &Helper) const;
59+
bool legalizeFPowI(MachineInstr &MI, LostDebugLocObserver &Observer,
60+
LegalizerHelper &Helper) const;
5961
bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI,
6062
LegalizerHelper &Helper) const;
6163
bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI,

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,8 +1216,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
12161216
.scalarize(0);
12171217

12181218
getActionDefinitionsBuilder(G_FPOWI)
1219-
.clampScalar(0, MinScalarFPTy, S32)
1220-
.lower();
1219+
.clampScalar(0, MinScalarFPTy, S32)
1220+
.custom();
12211221

12221222
auto &Log2Ops = getActionDefinitionsBuilder({G_FLOG2, G_FEXP2});
12231223
Log2Ops.customFor({S32});
@@ -2127,6 +2127,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21272127
return legalizeFExp(MI, B);
21282128
case TargetOpcode::G_FPOW:
21292129
return legalizeFPow(MI, B);
2130+
case TargetOpcode::G_FPOWI:
2131+
return legalizeFPowI(Helper, MI, B, LocObserver);
21302132
case TargetOpcode::G_FFLOOR:
21312133
return legalizeFFloor(MI, MRI, B);
21322134
case TargetOpcode::G_BUILD_VECTOR:
@@ -3731,6 +3733,22 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
37313733
return true;
37323734
}
37333735

3736+
bool AMDGPULegalizerInfo::legalizeFPowI(
3737+
LegalizerHelper &Helper, MachineInstr &MI, MachineIRBuilder &B,
3738+
LostDebugLocObserver &LocObserver) const {
3739+
if (Helper.lowerFPOWI(MI) == LegalizerHelper::Legalized)
3740+
return true;
3741+
3742+
auto [Dst, Base, Exp] = MI.getFirst3Regs();
3743+
LLT Ty = B.getMRI()->getType(Dst);
3744+
3745+
auto CvtSrc1 = B.buildSITOFP(Ty, Exp);
3746+
B.buildFPow(Dst, Base, CvtSrc1, MI.getFlags());
3747+
MI.eraseFromParent();
3748+
3749+
return true;
3750+
}
3751+
37343752
// Find a source register, ignoring any possible source modifiers.
37353753
static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) {
37363754
Register ModSrc = OrigSrc;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
9595
unsigned Flags) const;
9696
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
9797
bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
98+
bool legalizeFPowI(LegalizerHelper &Helper, MachineInstr &MI,
99+
MachineIRBuilder &B,
100+
LostDebugLocObserver &LocObserver) const;
98101
bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
99102
MachineIRBuilder &B) const;
100103

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-- -run-pass=legalizer %s -o - | FileCheck %s
3+
4+
---
5+
name: fpowi_s64_zero
6+
body: |
7+
bb.0:
8+
liveins: $d0, $w0
9+
10+
; CHECK-LABEL: name: fpowi_s64_zero
11+
; CHECK: liveins: $d0, $w0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
15+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
16+
; CHECK-NEXT: $d0 = COPY [[C]](s64)
17+
%0:_(s64) = COPY $d0
18+
%1:_(s32) = COPY $w0
19+
%2:_(s32) = G_CONSTANT i32 0
20+
%3:_(s64) = G_FPOWI %0, %2(s32)
21+
$d0 = COPY %3(s64)
22+
...
23+
24+
---
25+
name: fpowi_s32_zero
26+
body: |
27+
bb.0:
28+
liveins: $d0, $w0
29+
30+
; CHECK-LABEL: name: fpowi_s32_zero
31+
; CHECK: liveins: $d0, $w0
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
34+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
35+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
36+
; CHECK-NEXT: $s0 = COPY [[C]](s32)
37+
%0:_(s32) = COPY $s0
38+
%1:_(s32) = COPY $w0
39+
%2:_(s32) = G_CONSTANT i32 0
40+
%3:_(s32) = G_FPOWI %0, %2(s32)
41+
$s0 = COPY %3(s32)
42+
...
43+
44+
---
45+
name: fpowi_positive
46+
body: |
47+
bb.0:
48+
liveins: $d0, $w0
49+
50+
; CHECK-LABEL: name: fpowi_positive
51+
; CHECK: liveins: $d0, $w0
52+
; CHECK-NEXT: {{ $}}
53+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
54+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
55+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
56+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY2]], [[COPY]]
57+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[COPY]]
58+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[FMUL1]], [[COPY]]
59+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s64) = G_FMUL [[FMUL2]], [[COPY]]
60+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[FMUL3]](s64)
61+
; CHECK-NEXT: $d0 = COPY [[COPY3]](s64)
62+
%0:_(s64) = COPY $d0
63+
%1:_(s32) = COPY $w0
64+
%2:_(s32) = G_CONSTANT i32 5
65+
%3:_(s64) = G_FPOWI %0, %2(s32)
66+
$d0 = COPY %3(s64)
67+
...
68+
69+
---
70+
name: fpowi_s64_negative
71+
body: |
72+
bb.0:
73+
liveins: $d0, $w0
74+
75+
; CHECK-LABEL: name: fpowi_s64_negative
76+
; CHECK: liveins: $d0, $w0
77+
; CHECK-NEXT: {{ $}}
78+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
79+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
80+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
81+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY2]], [[COPY]]
82+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[COPY]]
83+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[FMUL1]], [[COPY]]
84+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s64) = G_FMUL [[FMUL2]], [[COPY]]
85+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
86+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s64) = G_FDIV [[C]], [[FMUL3]]
87+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[FDIV]](s64)
88+
; CHECK-NEXT: $d0 = COPY [[COPY3]](s64)
89+
%0:_(s64) = COPY $d0
90+
%1:_(s32) = COPY $w0
91+
%2:_(s32) = G_CONSTANT i32 -5
92+
%3:_(s64) = G_FPOWI %0, %2(s32)
93+
$d0 = COPY %3(s64)
94+
...
95+
96+
---
97+
name: fpowi_s32_negative
98+
body: |
99+
bb.0:
100+
liveins: $d0, $w0
101+
102+
; CHECK-LABEL: name: fpowi_s32_negative
103+
; CHECK: liveins: $d0, $w0
104+
; CHECK-NEXT: {{ $}}
105+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
106+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
107+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
108+
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY2]], [[COPY]]
109+
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[COPY]]
110+
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[COPY]]
111+
; CHECK-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FMUL2]], [[COPY]]
112+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
113+
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s32) = G_FDIV [[C]], [[FMUL3]]
114+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[FDIV]](s32)
115+
; CHECK-NEXT: $s0 = COPY [[COPY3]](s32)
116+
%0:_(s32) = COPY $s0
117+
%1:_(s32) = COPY $w0
118+
%2:_(s32) = G_CONSTANT i32 -5
119+
%3:_(s32) = G_FPOWI %0, %2(s32)
120+
$s0 = COPY %3(s32)
121+
...
122+
123+
---
124+
name: fpowi_libcall
125+
body: |
126+
bb.0:
127+
liveins: $d0, $w0
128+
129+
; CHECK-LABEL: name: fpowi_libcall
130+
; CHECK: liveins: $d0, $w0
131+
; CHECK-NEXT: {{ $}}
132+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
133+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
134+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
135+
; CHECK-NEXT: $d0 = COPY [[COPY]](s64)
136+
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
137+
; CHECK-NEXT: BL &__powidf2, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $w0, implicit-def $d0
138+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
139+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0
140+
; CHECK-NEXT: $d0 = COPY [[COPY2]](s64)
141+
%0:_(s64) = COPY $d0
142+
%1:_(s32) = COPY $w0
143+
%2:_(s64) = G_FPOWI %0, %1(s32)
144+
$d0 = COPY %2(s64)
145+
...

0 commit comments

Comments
 (0)