@@ -275,12 +275,16 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
275
275
auto &FPOpActions = getActionDefinitionsBuilder (
276
276
{ G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
277
277
.legalFor ({S32, S64});
278
+ auto &TrigActions = getActionDefinitionsBuilder ({G_FSIN, G_FCOS})
279
+ .customFor ({S32, S64});
278
280
279
281
if (ST.has16BitInsts ()) {
280
282
if (ST.hasVOP3PInsts ())
281
283
FPOpActions.legalFor ({S16, V2S16});
282
284
else
283
285
FPOpActions.legalFor ({S16});
286
+
287
+ TrigActions.customFor ({S16});
284
288
}
285
289
286
290
auto &MinNumMaxNum = getActionDefinitionsBuilder ({
@@ -306,10 +310,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
306
310
307
311
if (ST.hasVOP3PInsts ())
308
312
FPOpActions.clampMaxNumElements (0 , S16, 2 );
313
+
309
314
FPOpActions
310
315
.scalarize (0 )
311
316
.clampScalar (0 , ST.has16BitInsts () ? S16 : S32, S64);
312
317
318
+ TrigActions
319
+ .scalarize (0 )
320
+ .clampScalar (0 , ST.has16BitInsts () ? S16 : S32, S64);
321
+
313
322
if (ST.has16BitInsts ()) {
314
323
getActionDefinitionsBuilder (G_FSQRT)
315
324
.legalFor ({S32, S64, S16})
@@ -816,6 +825,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
816
825
return legalizeExtractVectorElt (MI, MRI, MIRBuilder);
817
826
case TargetOpcode::G_INSERT_VECTOR_ELT:
818
827
return legalizeInsertVectorElt (MI, MRI, MIRBuilder);
828
+ case TargetOpcode::G_FSIN:
829
+ case TargetOpcode::G_FCOS:
830
+ return legalizeSinCos (MI, MRI, MIRBuilder);
819
831
default :
820
832
return false ;
821
833
}
@@ -1231,6 +1243,35 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
1231
1243
return true ;
1232
1244
}
1233
1245
1246
+ bool AMDGPULegalizerInfo::legalizeSinCos (
1247
+ MachineInstr &MI, MachineRegisterInfo &MRI,
1248
+ MachineIRBuilder &B) const {
1249
+ B.setInstr (MI);
1250
+
1251
+ Register DstReg = MI.getOperand (0 ).getReg ();
1252
+ Register SrcReg = MI.getOperand (1 ).getReg ();
1253
+ LLT Ty = MRI.getType (DstReg);
1254
+ unsigned Flags = MI.getFlags ();
1255
+
1256
+ Register TrigVal;
1257
+ auto OneOver2Pi = B.buildFConstant (Ty, 0.5 / M_PI);
1258
+ if (ST.hasTrigReducedRange ()) {
1259
+ auto MulVal = B.buildFMul (Ty, SrcReg, OneOver2Pi, Flags);
1260
+ TrigVal = B.buildIntrinsic (Intrinsic::amdgcn_fract, {Ty}, false )
1261
+ .addUse (MulVal.getReg (0 ))
1262
+ .setMIFlags (Flags).getReg (0 );
1263
+ } else
1264
+ TrigVal = B.buildFMul (Ty, SrcReg, OneOver2Pi, Flags).getReg (0 );
1265
+
1266
+ Intrinsic::ID TrigIntrin = MI.getOpcode () == AMDGPU::G_FSIN ?
1267
+ Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
1268
+ B.buildIntrinsic (TrigIntrin, makeArrayRef<Register>(DstReg), false )
1269
+ .addUse (TrigVal)
1270
+ .setMIFlags (Flags);
1271
+ MI.eraseFromParent ();
1272
+ return true ;
1273
+ }
1274
+
1234
1275
// Return the use branch instruction, otherwise null if the usage is invalid.
1235
1276
static MachineInstr *verifyCFIntrinsic (MachineInstr &MI,
1236
1277
MachineRegisterInfo &MRI) {
0 commit comments