Skip to content

Commit cbd1782

Browse files
committed
AMDGPU/GlobalISel: Legalize sin/cos
llvm-svn: 370402
1 parent 1755617 commit cbd1782

File tree

4 files changed

+1125
-0
lines changed

4 files changed

+1125
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
275275
auto &FPOpActions = getActionDefinitionsBuilder(
276276
{ G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
277277
.legalFor({S32, S64});
278+
auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
279+
.customFor({S32, S64});
278280

279281
if (ST.has16BitInsts()) {
280282
if (ST.hasVOP3PInsts())
281283
FPOpActions.legalFor({S16, V2S16});
282284
else
283285
FPOpActions.legalFor({S16});
286+
287+
TrigActions.customFor({S16});
284288
}
285289

286290
auto &MinNumMaxNum = getActionDefinitionsBuilder({
@@ -306,10 +310,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
306310

307311
if (ST.hasVOP3PInsts())
308312
FPOpActions.clampMaxNumElements(0, S16, 2);
313+
309314
FPOpActions
310315
.scalarize(0)
311316
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
312317

318+
TrigActions
319+
.scalarize(0)
320+
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
321+
313322
if (ST.has16BitInsts()) {
314323
getActionDefinitionsBuilder(G_FSQRT)
315324
.legalFor({S32, S64, S16})
@@ -816,6 +825,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
816825
return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
817826
case TargetOpcode::G_INSERT_VECTOR_ELT:
818827
return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
828+
case TargetOpcode::G_FSIN:
829+
case TargetOpcode::G_FCOS:
830+
return legalizeSinCos(MI, MRI, MIRBuilder);
819831
default:
820832
return false;
821833
}
@@ -1231,6 +1243,35 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
12311243
return true;
12321244
}
12331245

1246+
bool AMDGPULegalizerInfo::legalizeSinCos(
1247+
MachineInstr &MI, MachineRegisterInfo &MRI,
1248+
MachineIRBuilder &B) const {
1249+
B.setInstr(MI);
1250+
1251+
Register DstReg = MI.getOperand(0).getReg();
1252+
Register SrcReg = MI.getOperand(1).getReg();
1253+
LLT Ty = MRI.getType(DstReg);
1254+
unsigned Flags = MI.getFlags();
1255+
1256+
Register TrigVal;
1257+
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
1258+
if (ST.hasTrigReducedRange()) {
1259+
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
1260+
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
1261+
.addUse(MulVal.getReg(0))
1262+
.setMIFlags(Flags).getReg(0);
1263+
} else
1264+
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
1265+
1266+
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
1267+
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
1268+
B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
1269+
.addUse(TrigVal)
1270+
.setMIFlags(Flags);
1271+
MI.eraseFromParent();
1272+
return true;
1273+
}
1274+
12341275
// Return the use branch instruction, otherwise null if the usage is invalid.
12351276
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
12361277
MachineRegisterInfo &MRI) {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
5555
MachineIRBuilder &MIRBuilder) const;
5656
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
5757
MachineIRBuilder &MIRBuilder) const;
58+
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
59+
MachineIRBuilder &MIRBuilder) const;
5860

5961
Register getLiveInRegister(MachineRegisterInfo &MRI,
6062
Register Reg, LLT Ty) const;

0 commit comments

Comments
 (0)