Skip to content

Commit 2f4d0ac

Browse files
changpengpravinjagtap
authored andcommitted
AMDGPU: Use pattern to select instruction for intrinsic llvm.fptrunc.round (llvm#105761)
Use GCNPat instead of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
1 parent 1e8ff34 commit 2f4d0ac

16 files changed

+128
-161
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def : GINodeEquiv<G_FFLOOR, ffloor>;
161161
def : GINodeEquiv<G_FRINT, frint>;
162162
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
163163
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
164+
def : GINodeEquiv<G_INTRINSIC_FPTRUNC_ROUND, fptrunc_round>;
164165
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
165166
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
166167
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
158158
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
159159
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
160160
]>;
161+
def SDTFPTruncRoundOp : SDTypeProfile<1, 2, [
162+
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
163+
]>;
161164
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
162165
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
163166
]>;
@@ -545,6 +548,8 @@ def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
545548
def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>;
546549
def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>;
547550

551+
def fptrunc_round : SDNode<"ISD::FPTRUNC_ROUND", SDTFPTruncRoundOp>;
552+
548553
def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
549554
def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
550555
def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,6 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
297297
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
298298
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;
299299

300-
def : GINodeEquiv<G_FPTRUNC_ROUND, SIfptrunc_round>;
301-
302300
class GISelSop2Pat <
303301
SDPatternOperator node,
304302
Instruction inst,
@@ -417,3 +415,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameInde
417415

418416
def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
419417
GISDNodeXFormEquiv<FPPow2ToExponentXForm>;
418+
419+
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
420+
GISDNodeXFormEquiv<as_hw_round_mode>;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5508,7 +5508,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
55085508
NODE_NAME_CASE(CONST_DATA_PTR)
55095509
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
55105510
NODE_NAME_CASE(LDS)
5511-
NODE_NAME_CASE(FPTRUNC_ROUND)
55125511
NODE_NAME_CASE(DUMMY_CHAIN)
55135512
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
55145513
NODE_NAME_CASE(LOAD_D16_HI)

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,6 @@ enum NodeType : unsigned {
555555
CONST_DATA_PTR,
556556
PC_ADD_REL_OFFSET,
557557
LDS,
558-
FPTRUNC_ROUND,
559558

560559
DUMMY_CHAIN,
561560
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5701,6 +5701,16 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
57015701
MIB.addImm(ExpVal);
57025702
}
57035703

5704+
void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
5705+
const MachineInstr &MI,
5706+
int OpIdx) const {
5707+
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
5708+
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
5709+
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
5710+
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
5711+
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
5712+
}
5713+
57045714
bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
57055715
return TII.isInlineConstant(Imm);
57065716
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
351351
void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
352352
int OpIdx) const;
353353

354+
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
355+
int OpIdx) const;
356+
354357
bool isInlineImmediate(const APInt &Imm) const;
355358
bool isInlineImmediate(const APFloat &Imm) const;
356359

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
11371137
.lower();
11381138

11391139
getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
1140-
.customFor({S16, S32})
1140+
.legalFor({S16, S32})
11411141
.scalarize(0)
11421142
.lower();
11431143

@@ -2179,8 +2179,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21792179
return legalizeCTLZ_CTTZ(MI, MRI, B);
21802180
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
21812181
return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
2182-
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
2183-
return legalizeFPTruncRound(MI, B);
21842182
case TargetOpcode::G_STACKSAVE:
21852183
return legalizeStackSave(MI, B);
21862184
case TargetOpcode::G_GET_FPENV:
@@ -7112,35 +7110,6 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
71127110
return true;
71137111
}
71147112

7115-
bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
7116-
MachineIRBuilder &B) const {
7117-
MachineRegisterInfo &MRI = *B.getMRI();
7118-
Register Src = MI.getOperand(1).getReg();
7119-
if (MRI.getType(Src) != LLT::scalar(32))
7120-
return false;
7121-
7122-
// Only support towardzero, tonearest, upward and downward.
7123-
int RoundMode = MI.getOperand(2).getImm();
7124-
if (RoundMode != (int)RoundingMode::TowardZero &&
7125-
RoundMode != (int)RoundingMode::NearestTiesToEven &&
7126-
RoundMode != (int)RoundingMode::TowardPositive &&
7127-
RoundMode != (int)RoundingMode::TowardNegative)
7128-
return false;
7129-
7130-
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
7131-
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
7132-
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
7133-
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
7134-
unsigned HW_Mode = (RoundMode + 3) % 4;
7135-
B.buildInstr(AMDGPU::G_FPTRUNC_ROUND)
7136-
.addDef(MI.getOperand(0).getReg())
7137-
.addUse(Src)
7138-
.addImm(HW_Mode);
7139-
7140-
MI.eraseFromParent();
7141-
return true;
7142-
}
7143-
71447113
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
71457114
MachineIRBuilder &B) const {
71467115
const SITargetLowering *TLI = ST.getTargetLowering();

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
212212

213213
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
214214

215-
bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
216215
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
217216
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
218217

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5251,7 +5251,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52515251
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
52525252
break;
52535253
}
5254-
case AMDGPU::G_FPTRUNC_ROUND:
5254+
case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
52555255
return getDefaultMappingVOP(MI);
52565256
case AMDGPU::G_PREFETCH:
52575257
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
597597

598598
// F16 - VOP1 Actions.
599599
setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS,
600-
ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
600+
ISD::FSIN, ISD::FROUND},
601601
MVT::f16, Custom);
602602

603603
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
@@ -5810,8 +5810,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
58105810
case ISD::FP_ROUND:
58115811
case ISD::STRICT_FP_ROUND:
58125812
return lowerFP_ROUND(Op, DAG);
5813-
case ISD::FPTRUNC_ROUND:
5814-
return lowerFPTRUNC_ROUND(Op, DAG);
58155813
case ISD::TRAP:
58165814
return lowerTRAP(Op, DAG);
58175815
case ISD::DEBUGTRAP:
@@ -6661,30 +6659,6 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
66616659
DAG.getTargetConstant(0, DL, MVT::i32));
66626660
}
66636661

6664-
SDValue SITargetLowering::lowerFPTRUNC_ROUND(SDValue Op,
6665-
SelectionDAG &DAG) const {
6666-
if (Op.getOperand(0)->getValueType(0) != MVT::f32)
6667-
return SDValue();
6668-
6669-
// Only support towardzero, tonearest, upward and downward.
6670-
int RoundMode = Op.getConstantOperandVal(1);
6671-
if (RoundMode != (int)RoundingMode::TowardZero &&
6672-
RoundMode != (int)RoundingMode::NearestTiesToEven &&
6673-
RoundMode != (int)RoundingMode::TowardPositive &&
6674-
RoundMode != (int)RoundingMode::TowardNegative)
6675-
return SDValue();
6676-
6677-
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
6678-
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
6679-
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
6680-
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
6681-
unsigned HW_Mode = (RoundMode + 3) % 4;
6682-
SDLoc DL(Op);
6683-
SDValue RoundFlag = DAG.getTargetConstant(HW_Mode, DL, MVT::i32);
6684-
return DAG.getNode(AMDGPUISD::FPTRUNC_ROUND, DL, Op.getNode()->getVTList(),
6685-
Op->getOperand(0), RoundFlag);
6686-
}
6687-
66886662
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
66896663
assert(Op.getValueType() == MVT::f16 &&
66906664
"Do not know how to custom lower FP_ROUND for non-f16 type");

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
145145

146146
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148-
SDValue lowerFPTRUNC_ROUND(SDValue Op, SelectionDAG &DAG) const;
149148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
150149
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
151150
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,12 +304,6 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
304304
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305305
>;
306306

307-
def SDTFPRoundModeOp : SDTypeProfile<1, 2, [
308-
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
309-
]>;
310-
311-
def SIfptrunc_round : SDNode<"AMDGPUISD::FPTRUNC_ROUND", SDTFPRoundModeOp>;
312-
313307
//===----------------------------------------------------------------------===//
314308
// ValueType helpers
315309
//===----------------------------------------------------------------------===//
@@ -796,6 +790,22 @@ return CurDAG->getTargetConstant(
796790
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
797791
}]>;
798792

793+
def as_hw_round_mode : SDNodeXForm<timm, [{
794+
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
795+
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
796+
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
797+
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
798+
return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N),
799+
MVT::i32);
800+
}]>;
801+
802+
def SupportedRoundMode : TImmLeaf<i32, [{
803+
return Imm == (int)RoundingMode::TowardZero ||
804+
Imm == (int)RoundingMode::NearestTiesToEven ||
805+
Imm == (int)RoundingMode::TowardPositive ||
806+
Imm == (int)RoundingMode::TowardNegative;
807+
}]>;
808+
799809
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
800810
uint64_t Imm = N->getZExtValue();
801811
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,12 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
229229
// in the ModeRegister pass.
230230
let Uses = [MODE, EXEC] in {
231231
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
232-
(ins VGPR_32:$src0, i32imm:$round),
233-
[(set f16:$vdst, (SIfptrunc_round f32:$src0, i32:$round))]>;
232+
(ins VGPR_32:$src0, i32imm:$round)>;
234233
} // End Uses = [MODE, EXEC]
235234

235+
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
236+
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
237+
236238
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
237239
// restoring it after we're done.
238240
let Defs = [SCC], isConvergent = 1 in {
@@ -4013,11 +4015,6 @@ def G_SI_CALL : AMDGPUGenericInstruction {
40134015
let isConvergent = 1;
40144016
}
40154017

4016-
def G_FPTRUNC_ROUND : AMDGPUGenericInstruction {
4017-
let OutOperandList = (outs type0:$vdst);
4018-
let InOperandList = (ins type1:$src0, untyped_imm_0:$round);
4019-
let hasSideEffects = 0;
4020-
}
40214018

40224019
//============================================================================//
40234020
// Dummy Instructions

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefixes=SDAG-FAIL
2-
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=GISEL-FAIL
1+
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
2+
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
33

44
define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) {
5-
; SDAG-FAIL: LLVM ERROR: Cannot select
6-
; GISEL-FAIL: unable to legalize instruction
5+
; FAIL: LLVM ERROR: Cannot select
76
%res = call half @llvm.fptrunc.round.f16.f64(double %a, metadata !"round.upward")
87
store half %res, ptr addrspace(1) %out, align 4
98
ret void

0 commit comments

Comments
 (0)