Skip to content

AMDGPU: Add f64 to f32 support for llvm.fptrunc.round #107481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;

def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]

def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;

def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;

// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Target/AMDGPU/SIModeRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
Expand All @@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
B.addImm(0); // omod
} else
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
return Status(FP_ROUND_MODE_DP(3),
FP_ROUND_MODE_DP(Mode));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
unsigned Mode = MI.getOperand(2).getImm();
MI.removeOperand(2);
MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
default:
return DefaultStatus;
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s

; TODO: check for GISEL when bfloat is supported.
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s

; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s

;--- f16-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
Expand All @@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
ret void
}

;--- f32-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
; F32-F64-FAIL: LLVM ERROR: Cannot select
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
store float %res, ptr addrspace(1) %out, align 4
ret void
}

;--- bf16-f32-err.ll
define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
; BF16-F32-FAIL: LLVM ERROR: Cannot select
Expand All @@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %

;--- f16-f32-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
store half %res, ptr addrspace(1) %out, align 2
ret void
}

;--- f32-f64-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
store float %res, ptr addrspace(1) %out, align 4
ret void
}
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
Original file line number Diff line number Diff line change
Expand Up @@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
%res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
ret <8 x half> %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
; CHECK: ; %bb.0:
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
ret float %res
}

define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; CHECK-NEXT: ; return to shader part epilog
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
ret float %res
}
Loading