Skip to content

AMDGPU: Use pattern to select instruction for intrinsic llvm.fptrunc.round #105761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def : GINodeEquiv<G_FFLOOR, ffloor>;
def : GINodeEquiv<G_FRINT, frint>;
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
def : GINodeEquiv<G_INTRINSIC_FPTRUNC_ROUND, fptrunc_round>;
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ def SDTFPUnaryOp : SDTypeProfile<1, 1, [ // fneg, fsqrt, etc
def SDTFPRoundOp : SDTypeProfile<1, 1, [ // fpround
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;
def SDTFPTruncRoundOp : SDTypeProfile<1, 2, [
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;
def SDTFPExtendOp : SDTypeProfile<1, 1, [ // fpextend
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1>
]>;
Expand Down Expand Up @@ -552,6 +555,8 @@ def llround : SDNode<"ISD::LLROUND" , SDTFPToIntOp>;
def lrint : SDNode<"ISD::LRINT" , SDTFPToIntOp>;
def llrint : SDNode<"ISD::LLRINT" , SDTFPToIntOp>;

def fptrunc_round : SDNode<"ISD::FPTRUNC_ROUND", SDTFPTruncRoundOp>;

def fpround : SDNode<"ISD::FP_ROUND" , SDTFPRoundOp>;
def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>;
def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,6 @@ def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_UBYTE, SIsbuffer_load_ubyte>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_SSHORT, SIsbuffer_load_short>;
def : GINodeEquiv<G_AMDGPU_S_BUFFER_LOAD_USHORT, SIsbuffer_load_ushort>;

def : GINodeEquiv<G_FPTRUNC_ROUND, SIfptrunc_round>;

class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
Expand Down Expand Up @@ -419,3 +417,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameInde

def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
GISDNodeXFormEquiv<FPPow2ToExponentXForm>;

def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5511,7 +5511,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(LDS)
NODE_NAME_CASE(FPTRUNC_ROUND)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(LOAD_D16_HI)
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,6 @@ enum NodeType : unsigned {
CONST_DATA_PTR,
PC_ADD_REL_OFFSET,
LDS,
FPTRUNC_ROUND,

DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5594,6 +5594,16 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
MIB.addImm(ExpVal);
}

void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}

bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
return TII.isInlineConstant(Imm);
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

bool isInlineImmediate(const APInt &Imm) const;
bool isInlineImmediate(const APFloat &Imm) const;

Expand Down
33 changes: 1 addition & 32 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.lower();

getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
.customFor({S16, S32})
.legalFor({S16, S32})
.scalarize(0)
.lower();

Expand Down Expand Up @@ -2179,8 +2179,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeCTLZ_CTTZ(MI, MRI, B);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
return legalizeFPTruncRound(MI, B);
case TargetOpcode::G_STACKSAVE:
return legalizeStackSave(MI, B);
case TargetOpcode::G_GET_FPENV:
Expand Down Expand Up @@ -7093,35 +7091,6 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return true;
}

bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
MachineIRBuilder &B) const {
MachineRegisterInfo &MRI = *B.getMRI();
Register Src = MI.getOperand(1).getReg();
if (MRI.getType(Src) != LLT::scalar(32))
return false;

// Only support towardzero, tonearest, upward and downward.
int RoundMode = MI.getOperand(2).getImm();
if (RoundMode != (int)RoundingMode::TowardZero &&
RoundMode != (int)RoundingMode::NearestTiesToEven &&
RoundMode != (int)RoundingMode::TowardPositive &&
RoundMode != (int)RoundingMode::TowardNegative)
return false;

// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
unsigned HW_Mode = (RoundMode + 3) % 4;
B.buildInstr(AMDGPU::G_FPTRUNC_ROUND)
.addDef(MI.getOperand(0).getReg())
.addUse(Src)
.addImm(HW_Mode);

MI.eraseFromParent();
return true;
}

bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
MachineIRBuilder &B) const {
const SITargetLowering *TLI = ST.getTargetLowering();
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {

bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;

bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5255,7 +5255,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
break;
}
case AMDGPU::G_FPTRUNC_ROUND:
case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
return getDefaultMappingVOP(MI);
case AMDGPU::G_PREFETCH:
OpdsMapping[0] = getSGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
Expand Down
28 changes: 1 addition & 27 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

// F16 - VOP1 Actions.
setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS,
ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
ISD::FSIN, ISD::FROUND},
MVT::f16, Custom);

setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
Expand Down Expand Up @@ -5797,8 +5797,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return lowerFP_ROUND(Op, DAG);
case ISD::FPTRUNC_ROUND:
return lowerFPTRUNC_ROUND(Op, DAG);
case ISD::TRAP:
return lowerTRAP(Op, DAG);
case ISD::DEBUGTRAP:
Expand Down Expand Up @@ -6648,30 +6646,6 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
DAG.getTargetConstant(0, DL, MVT::i32));
}

SDValue SITargetLowering::lowerFPTRUNC_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getOperand(0)->getValueType(0) != MVT::f32)
return SDValue();

// Only support towardzero, tonearest, upward and downward.
int RoundMode = Op.getConstantOperandVal(1);
if (RoundMode != (int)RoundingMode::TowardZero &&
RoundMode != (int)RoundingMode::NearestTiesToEven &&
RoundMode != (int)RoundingMode::TowardPositive &&
RoundMode != (int)RoundingMode::TowardNegative)
return SDValue();

// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
unsigned HW_Mode = (RoundMode + 3) % 4;
SDLoc DL(Op);
SDValue RoundFlag = DAG.getTargetConstant(HW_Mode, DL, MVT::i32);
return DAG.getNode(AMDGPUISD::FPTRUNC_ROUND, DL, Op.getNode()->getVTList(),
Op->getOperand(0), RoundFlag);
}

SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f16 &&
"Do not know how to custom lower FP_ROUND for non-f16 type");
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {

/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFPTRUNC_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
Expand Down
22 changes: 16 additions & 6 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -304,12 +304,6 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
>;

def SDTFPRoundModeOp : SDTypeProfile<1, 2, [
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;

def SIfptrunc_round : SDNode<"AMDGPUISD::FPTRUNC_ROUND", SDTFPRoundModeOp>;

//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -796,6 +790,22 @@ return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;

def as_hw_round_mode : SDNodeXForm<timm, [{
// "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
// "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
// "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
// "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N),
MVT::i32);
}]>;

def SupportedRoundMode : TImmLeaf<i32, [{
return Imm == (int)RoundingMode::TowardZero ||
Imm == (int)RoundingMode::NearestTiesToEven ||
Imm == (int)RoundingMode::TowardPositive ||
Imm == (int)RoundingMode::TowardNegative;
}]>;

class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
Expand Down
11 changes: 4 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,12 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
// in the ModeRegister pass.
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round),
[(set f16:$vdst, (SIfptrunc_round f32:$src0, i32:$round))]>;
(ins VGPR_32:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]

def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;

// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
Expand Down Expand Up @@ -4055,11 +4057,6 @@ def G_SI_CALL : AMDGPUGenericInstruction {
let isConvergent = 1;
}

def G_FPTRUNC_ROUND : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$vdst);
let InOperandList = (ins type1:$src0, untyped_imm_0:$round);
let hasSideEffects = 0;
}

//============================================================================//
// Dummy Instructions
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefixes=SDAG-FAIL
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=GISEL-FAIL
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL

define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) {
; SDAG-FAIL: LLVM ERROR: Cannot select
; GISEL-FAIL: unable to legalize instruction
; FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f16.f64(double %a, metadata !"round.upward")
store half %res, ptr addrspace(1) %out, align 4
ret void
Expand Down
Loading
Loading