Skip to content

Commit f01f082

Browse files
authored
AMDGPU: Make v2f32 -> v2f16 legal when target supports v_cvt_pk_f16_f32 (llvm#139956)
If targets support v_cvt_pk_f16_f32 instruction, v2f32 -> v2f16 should be legal. However, SelectionDAG does not allow us to specify the source type in the legalization rules. To workaround this, we make FP_ROUND Custom for v2f16 then set up v2f32 -> v2f16 to be legal during custom lowering. Fixes: SWDEV-532608 -- expected v_cvt_pk_f16_f32 was not generated.
1 parent a4eb0db commit f01f082

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
919919
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
920920
}
921921

922+
if (Subtarget->hasCvtPkF16F32Inst())
923+
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
924+
922925
setTargetDAGCombine({ISD::ADD,
923926
ISD::UADDO_CARRY,
924927
ISD::SUB,
@@ -6899,10 +6902,16 @@ SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG, SDValue Op,
68996902
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
69006903
SDValue Src = Op.getOperand(0);
69016904
EVT SrcVT = Src.getValueType();
6905+
EVT DstVT = Op.getValueType();
6906+
6907+
if (DstVT == MVT::v2f16) {
6908+
assert(Subtarget->hasCvtPkF16F32Inst() && "support v_cvt_pk_f16_f32");
6909+
return SrcVT == MVT::v2f32 ? Op : SDValue();
6910+
}
6911+
69026912
if (SrcVT.getScalarType() != MVT::f64)
69036913
return Op;
69046914

6905-
EVT DstVT = Op.getValueType();
69066915
SDLoc DL(Op);
69076916
if (DstVT == MVT::f16) {
69086917
// TODO: Handle strictfp

llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.path.ll renamed to llvm/test/CodeGen/AMDGPU/fptrunc.v2f16.no.fast.math.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@ define <2 x half> @v_test_cvt_v2f32_v2f16(<2 x float> %src) {
1212
ret <2 x half> %res
1313
}
1414

15+
define half @fptrunc_v2f32_v2f16_then_extract(<2 x float> %src) {
16+
; GFX950-LABEL: fptrunc_v2f32_v2f16_then_extract:
17+
; GFX950: ; %bb.0:
18+
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+
; GFX950-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
20+
; GFX950-NEXT: v_add_f16_sdwa v0, v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
21+
; GFX950-NEXT: s_setpc_b64 s[30:31]
22+
%vec_half = fptrunc <2 x float> %src to <2 x half>
23+
%first = extractelement <2 x half> %vec_half, i64 1
24+
%second = extractelement <2 x half> %vec_half, i64 0
25+
%res = fadd half %first, %second
26+
ret half %res
27+
}
28+
1529
define <2 x half> @v_test_cvt_v2f64_v2f16(<2 x double> %src) {
1630
; GFX950-SDAG-LABEL: v_test_cvt_v2f64_v2f16:
1731
; GFX950-SDAG: ; %bb.0:

0 commit comments

Comments
 (0)