Skip to content

Commit c1d7377

Browse files
committed
Revert "AMDGPU: Fix the double rounding issue in v2f64 -> v2f16 conversion (llvm#135659)"
This reverts commit 8b46b98.
1 parent 472eecd commit c1d7377

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,7 +1054,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10541054

10551055
auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC);
10561056
if (ST.hasCvtPkF16F32Inst())
1057-
FPTruncActions.legalFor({{S32, S64}, {S16, S32}, {V2S16, V2S32}});
1057+
FPTruncActions.legalFor(
1058+
{{S32, S64}, {S16, S32}, {V2S16, V2S32}, {V2S16, V2S64}});
10581059
else
10591060
FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
10601061
FPTruncActions.scalarize(0).lower();

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
915915
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Legal);
916916
}
917917

918+
if (Subtarget->hasCvtPkF16F32Inst()) {
919+
setOperationAction(ISD::FP_ROUND, MVT::v2f16, Legal);
920+
}
921+
918922
setTargetDAGCombine({ISD::ADD,
919923
ISD::UADDO_CARRY,
920924
ISD::SUB,

llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -654,9 +654,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
654654
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
655655
; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
656656
; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
657-
; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
658-
; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
659-
; GFX950-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0
657+
; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
660658
; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
661659
; GFX950-SDAG-NEXT: s_endpgm
662660
;
@@ -668,11 +666,11 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
668666
; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
669667
; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
670668
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
671-
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
672-
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
673-
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
674-
; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
675-
; GFX950-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
669+
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
670+
; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7]
671+
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
672+
; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
673+
; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
676674
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
677675
; GFX950-GISEL-NEXT: s_endpgm
678676
;

0 commit comments

Comments
 (0)