Skip to content

Commit 50082d6

Browse files
authored
DAG: Fix widening of fptrunc_round vectors (#89918)
1 parent 21ef187 commit 50082d6

File tree

3 files changed

+51
-12
lines changed

3 files changed

+51
-12
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
984984
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
985985
SDValue WidenVecRes_XRINT(SDNode *N);
986986
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
987-
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
987+
SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N);
988988
SDValue WidenVecRes_ExpOp(SDNode *N);
989989
SDValue WidenVecRes_Unary(SDNode *N);
990990
SDValue WidenVecRes_InregOp(SDNode *N);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4242,7 +4242,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
42424242
break;
42434243

42444244
case ISD::IS_FPCLASS:
4245-
Res = WidenVecRes_IS_FPCLASS(N);
4245+
case ISD::FPTRUNC_ROUND:
4246+
Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
42464247
break;
42474248

42484249
case ISD::FLDEXP:
@@ -5004,7 +5005,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
50045005
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
50055006
}
50065007

5007-
SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
5008+
/// Result and first source operand are different scalar types, but must have
5009+
/// the same number of elements. There is an additional control argument which
5010+
/// should be passed through unchanged.
5011+
SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
50085012
SDValue FpValue = N->getOperand(0);
50095013
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
50105014
if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
266266
ret void
267267
}
268268

269-
; FIXME
270-
; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
271-
; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
272-
; ret <3 x half> %res
273-
; }
269+
define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
270+
; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
271+
; SDAG: ; %bb.0:
272+
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
273+
; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
274+
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
275+
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
276+
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
277+
; SDAG-NEXT: ; return to shader part epilog
278+
;
279+
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
280+
; GISEL: ; %bb.0:
281+
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
282+
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
283+
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
284+
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
285+
; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
286+
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
287+
; GISEL-NEXT: ; return to shader part epilog
288+
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
289+
ret <3 x half> %res
290+
}
274291

275-
; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
276-
; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
277-
; ret <3 x half> %res
278-
; }
292+
define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
293+
; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
294+
; SDAG: ; %bb.0:
295+
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
296+
; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
297+
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
298+
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
299+
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
300+
; SDAG-NEXT: ; return to shader part epilog
301+
;
302+
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
303+
; GISEL: ; %bb.0:
304+
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
305+
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
306+
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
307+
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
308+
; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
309+
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
310+
; GISEL-NEXT: ; return to shader part epilog
311+
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
312+
ret <3 x half> %res
313+
}
279314

280315
define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) {
281316
; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:

0 commit comments

Comments
 (0)