-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Use pattern to select instruction for intrinsic llvm.fptrunc.round #105761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Changpeng Fang (changpeng) ChangesUse GCNPat insteam of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported by the hardware. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
Patch is 29.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105761.diff 19 Files Affected:
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 9fb6de49fb2055..80e3c90d346244 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -343,7 +343,7 @@ HANDLE_TARGET_OPCODE(G_FREEZE)
HANDLE_TARGET_OPCODE(G_CONSTANT_FOLD_BARRIER)
// INTRINSIC fptrunc_round intrinsic.
-HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND)
+HANDLE_TARGET_OPCODE(G_FPTRUNC_ROUND)
/// INTRINSIC trunc intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 36a0a087ba457c..439600d940ed3e 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1141,7 +1141,7 @@ def G_RESET_FPMODE : GenericInstruction {
//------------------------------------------------------------------------------
// Opcodes for LLVM Intrinsics
//------------------------------------------------------------------------------
-def G_INTRINSIC_FPTRUNC_ROUND : GenericInstruction {
+def G_FPTRUNC_ROUND : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src1, i32imm:$round_mode);
let hasSideEffects = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f44af78cded46d..2f7315b48a03c8 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2522,8 +2522,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Add the Rounding mode as an integer
MIRBuilder
- .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
- {getOrCreateVReg(CI)},
+ .buildInstr(TargetOpcode::G_FPTRUNC_ROUND, {getOrCreateVReg(CI)},
{getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
.addImm((int)*RoundMode);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3fece81df1f2fd..8224551ac76061 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5041,7 +5041,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
case G_BITCAST:
return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
- case G_INTRINSIC_FPTRUNC_ROUND:
+ case G_FPTRUNC_ROUND:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
default:
return UnableToLegalize;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 60dcb118542785..851c646b1cb333 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6957,8 +6957,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Result;
Result = DAG.getNode(
ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
- DAG.getTargetConstant((int)*RoundMode, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getTargetConstant((int)*RoundMode, sdl, MVT::i32));
setValue(&I, Result);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 8bee84b8a87f27..2fcb5727cb5e97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -419,3 +419,6 @@ def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameInde
def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
GISDNodeXFormEquiv<FPPow2ToExponentXForm>;
+
+def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
+ GISDNodeXFormEquiv<as_hw_round_mode>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e57c8f8b7b4835..4ace7d08ea59ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5500,7 +5500,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(LDS)
- NODE_NAME_CASE(FPTRUNC_ROUND)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(LOAD_D16_HI)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 59f640ea99de3e..dd9d97bd593bda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -553,7 +553,6 @@ enum NodeType : unsigned {
CONST_DATA_PTR,
PC_ADD_REL_OFFSET,
LDS,
- FPTRUNC_ROUND,
DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 05ed1b322c0d1b..369165f82643c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -5595,6 +5595,12 @@ void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
MIB.addImm(ExpVal);
}
+void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
+}
+
bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
return TII.isInlineConstant(Imm);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 69806b240cf2bc..40c6cf8fd3b51f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -359,6 +359,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
bool isInlineImmediate(const APInt &Imm) const;
bool isInlineImmediate(const APFloat &Imm) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4fd917f5ea7fa8..eaf540003ec6fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1136,8 +1136,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
- getActionDefinitionsBuilder(G_INTRINSIC_FPTRUNC_ROUND)
- .customFor({S16, S32})
+ getActionDefinitionsBuilder(G_FPTRUNC_ROUND)
+ .legalFor({S16, S32})
.scalarize(0)
.lower();
@@ -2179,8 +2179,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
return legalizeCTLZ_CTTZ(MI, MRI, B);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
- case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
- return legalizeFPTruncRound(MI, B);
case TargetOpcode::G_STACKSAVE:
return legalizeStackSave(MI, B);
case TargetOpcode::G_GET_FPENV:
@@ -7093,35 +7091,6 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
- MachineIRBuilder &B) const {
- MachineRegisterInfo &MRI = *B.getMRI();
- Register Src = MI.getOperand(1).getReg();
- if (MRI.getType(Src) != LLT::scalar(32))
- return false;
-
- // Only support towardzero, tonearest, upward and downward.
- int RoundMode = MI.getOperand(2).getImm();
- if (RoundMode != (int)RoundingMode::TowardZero &&
- RoundMode != (int)RoundingMode::NearestTiesToEven &&
- RoundMode != (int)RoundingMode::TowardPositive &&
- RoundMode != (int)RoundingMode::TowardNegative)
- return false;
-
- // "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
- // "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
- // "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
- // "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
- unsigned HW_Mode = (RoundMode + 3) % 4;
- B.buildInstr(AMDGPU::G_FPTRUNC_ROUND)
- .addDef(MI.getOperand(0).getReg())
- .addUse(Src)
- .addImm(HW_Mode);
-
- MI.eraseFromParent();
- return true;
-}
-
bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
MachineIRBuilder &B) const {
const SITargetLowering *TLI = ST.getTargetLowering();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index db1c5874093a71..a815e87a7da35f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -212,7 +212,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;
- bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c954c0aa71f734..e50bc2a3c42ef7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -598,8 +598,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP1 Actions.
setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS,
- ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
- MVT::f16, Custom);
+ ISD::FSIN, ISD::FROUND}, MVT::f16, Custom);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote);
@@ -5796,8 +5795,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return lowerFP_ROUND(Op, DAG);
- case ISD::FPTRUNC_ROUND:
- return lowerFPTRUNC_ROUND(Op, DAG);
case ISD::TRAP:
return lowerTRAP(Op, DAG);
case ISD::DEBUGTRAP:
@@ -6647,30 +6644,6 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
DAG.getTargetConstant(0, DL, MVT::i32));
}
-SDValue SITargetLowering::lowerFPTRUNC_ROUND(SDValue Op,
- SelectionDAG &DAG) const {
- if (Op.getOperand(0)->getValueType(0) != MVT::f32)
- return SDValue();
-
- // Only support towardzero, tonearest, upward and downward.
- int RoundMode = Op.getConstantOperandVal(1);
- if (RoundMode != (int)RoundingMode::TowardZero &&
- RoundMode != (int)RoundingMode::NearestTiesToEven &&
- RoundMode != (int)RoundingMode::TowardPositive &&
- RoundMode != (int)RoundingMode::TowardNegative)
- return SDValue();
-
- // "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3
- // "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
- // "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1
- // "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2
- unsigned HW_Mode = (RoundMode + 3) % 4;
- SDLoc DL(Op);
- SDValue RoundFlag = DAG.getTargetConstant(HW_Mode, DL, MVT::i32);
- return DAG.getNode(AMDGPUISD::FPTRUNC_ROUND, DL, Op.getNode()->getVTList(),
- Op->getOperand(0), RoundFlag);
-}
-
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f16 &&
"Do not know how to custom lower FP_ROUND for non-f16 type");
@@ -12830,7 +12803,7 @@ bool SITargetLowering::isCanonicalized(Register Reg, const MachineFunction &MF,
case AMDGPU::G_FFLOOR:
case AMDGPU::G_FRINT:
case AMDGPU::G_FNEARBYINT:
- case AMDGPU::G_INTRINSIC_FPTRUNC_ROUND:
+ case AMDGPU::G_FPTRUNC_ROUND:
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUNDEVEN:
case AMDGPU::G_FMA:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index eed4b3e79cdeee..1f198a92c0fa6a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -145,7 +145,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFPTRUNC_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2b54429dc9a03f..1690e90957a707 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -308,7 +308,7 @@ def SDTFPRoundModeOp : SDTypeProfile<1, 2, [
SDTCisFP<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>
]>;
-def SIfptrunc_round : SDNode<"AMDGPUISD::FPTRUNC_ROUND", SDTFPRoundModeOp>;
+def SIfptrunc_round : SDNode<"ISD::FPTRUNC_ROUND", SDTFPRoundModeOp>;
//===----------------------------------------------------------------------===//
// ValueType helpers
@@ -796,6 +796,18 @@ return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;
+def as_hw_round_mode : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N),
+ MVT::i32);
+}]>;
+
+def SupportedRoundMode : TImmLeaf<i32, [{
+ return Imm == (int)RoundingMode::TowardZero ||
+ Imm == (int)RoundingMode::NearestTiesToEven ||
+ Imm == (int)RoundingMode::TowardPositive ||
+ Imm == (int)RoundingMode::TowardNegative;
+}]>;
+
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e7831d00a3a4a8..22abaf4448a3af 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -229,10 +229,12 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
// in the ModeRegister pass.
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
- (ins VGPR_32:$src0, i32imm:$round),
- [(set f16:$vdst, (SIfptrunc_round f32:$src0, i32:$round))]>;
+ (ins VGPR_32:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]
+def : GCNPat <(f16 (SIfptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
+
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
@@ -4064,11 +4066,6 @@ def G_SI_CALL : AMDGPUGenericInstruction {
let isConvergent = 1;
}
-def G_FPTRUNC_ROUND : AMDGPUGenericInstruction {
- let OutOperandList = (outs type0:$vdst);
- let InOperandList = (ins type1:$src0, untyped_imm_0:$round);
- let hasSideEffects = 0;
-}
//============================================================================//
// Dummy Instructions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 87a415b45cca9a..5d0fd91566bcca 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -145,7 +145,7 @@
# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
#
-# DEBUG-NEXT: G_INTRINSIC_FPTRUNC_ROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
+# DEBUG-NEXT: G_FPTRUNC_ROUND (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
#
diff --git a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
index 9fa3eb22a554a8..6454e04d2fa432 100644
--- a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
@@ -1,9 +1,8 @@
-; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefixes=SDAG-FAIL
-; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=GISEL-FAIL
+; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
+; RUN: not --crash llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 -o /dev/null %s 2>&1 | FileCheck %s --ignore-case --check-prefix=FAIL
define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) {
-; SDAG-FAIL: LLVM ERROR: Cannot select
-; GISEL-FAIL: unable to legalize instruction
+; FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f64(double %a, metadata !"round.upward")
store half %res, ptr addrspace(1) %out, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 71d0ee524bab73..54ed6f1eb42820 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -176,8 +176,7 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> %
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GISEL-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
ret <2 x half> %res
@@ -197,8 +196,7 @@ define amdgpu_gs <2 x half> @v_fptrunc_round_v2f32_to_v2f16_downward(<2 x float>
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GISEL-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward")
ret <2 x half> %res
@@ -228,23 +226,18 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GISEL-NEXT: v_cvt_f16_f32_e32 v6, v2
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v6, v2
; GISEL-NEXT: v_cvt_f16_f32_e32 v7, v3
-; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
+; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
-; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
-; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GISEL-NEXT: v_lshl_or_b32 v1, v7, 16, v6
-; GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT: v_cvt_f16_f32_e32 v2, v3
+; GISEL-NEXT: v_pack_b32_f16 v3, v6, v7
+; GISEL-NEXT: v_pack_b32_f16 v1, v1, v2
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
I couldn't find neither the old nor the new name of FPTRUNC_ROUND in https://llvm.org/docs/GlobalISel/GenericOpcode.html. Possibly this list does not include opcodes for llvm intrinsics? |
Many llvm intrinsics that have been raised to named opcodes are in this list, e.g. vscale. |
I don't like secret opcodes that are not vendor opcodes. If it is a standard named opcodes, I prefer to advertise your achievemnts. |
Yes, it should always be good to document when we introduce something that is new. Now I don't do the rename of the opcode in this patch, it should be a separate patch to document. By the way, https://llvm.org/docs/GlobalISel/GenericOpcode.html only lists a subset of the opcodes defined in llvm/include/llvm/Target/GenericOpcodes.td |
No worries, you are not adding new opcodes. The latter is a sad state, that's why I always ask to advertise achievements when new opcodes are introduced. |
ping |
|
||
define amdgpu_gs void @test_fptrunc_round_f64(double %a, ptr addrspace(1) %out) { | ||
; SDAG-FAIL: LLVM ERROR: Cannot select | ||
; GISEL-FAIL: unable to legalize instruction | ||
; FAIL: LLVM ERROR: Cannot select | ||
%res = call half @llvm.fptrunc.round.f64(double %a, metadata !"round.upward") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't using the correct suffix on the intrinsic, it's missing the mangled result type. I guess this was an existing bug, can you precommit the fix for that?
Also using the f32->f16 case would be more representative
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed the suffix.
Plan to use a separate patch to add more tests on unsupported rounding mode and types.
We need this immediate type to be consistent. This is the pre-commit for #105761
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with the node equiv moved to the generic place
Use GCNPat insteam of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported by the hardware. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
Thanks. |
…round (llvm#105761) Use GCNPat instead of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.
…6424) We need this immediate type to be consistent. This is the pre-commit for llvm#105761
Use GCNPat instead of Custom Lowering to select instructions for intrinsic llvm.fptrunc.round. "SupportedRoundMode : TImmLeaf" is used as a predicate to select only when the rounding mode is supported. "as_hw_round_mode : SDNodeXForm" is developed to translate the round modes to the corresponding ones that hardware recognizes.