Skip to content

Commit 24267a7

Browse files
authored
AMDGPU: Add f64 to f32 support for llvm.fptrunc.round (llvm#107481)
1 parent dcfa147 commit 24267a7

File tree

4 files changed

+68
-17
lines changed

4 files changed

+68
-17
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
230230
let Uses = [MODE, EXEC] in {
231231
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
232232
(ins VGPR_32:$src0, i32imm:$round)>;
233+
234+
def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
235+
(ins VReg_64:$src0, i32imm:$round)>;
233236
} // End Uses = [MODE, EXEC]
234237

235238
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
236239
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
237240

241+
def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
242+
(FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
243+
238244
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
239245
// restoring it after we're done.
240246
let Defs = [SCC], isConvergent = 1 in {

llvm/lib/Target/AMDGPU/SIModeRegister.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
165165
const SIInstrInfo *TII) {
166166
unsigned Opcode = MI.getOpcode();
167167
if (TII->usesFPDPRounding(MI) ||
168-
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
168+
Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
169+
Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
169170
switch (Opcode) {
170171
case AMDGPU::V_INTERP_P1LL_F16:
171172
case AMDGPU::V_INTERP_P1LV_F16:
@@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
189190
B.addImm(0); // omod
190191
} else
191192
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
192-
return Status(FP_ROUND_MODE_DP(3),
193-
FP_ROUND_MODE_DP(Mode));
193+
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
194+
}
195+
case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
196+
unsigned Mode = MI.getOperand(2).getImm();
197+
MI.removeOperand(2);
198+
MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
199+
return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
194200
}
195201
default:
196202
return DefaultStatus;

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
44
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
55

6-
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
7-
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
8-
96
; TODO: check for GISEL when bfloat is supported.
107
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
118
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
129

13-
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
14-
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
10+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
11+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
12+
13+
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
14+
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
1515

1616
;--- f16-f64-err.ll
1717
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
@@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
2121
ret void
2222
}
2323

24-
;--- f32-f64-err.ll
25-
define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
26-
; F32-F64-FAIL: LLVM ERROR: Cannot select
27-
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
28-
store float %res, ptr addrspace(1) %out, align 4
29-
ret void
30-
}
31-
3224
;--- bf16-f32-err.ll
3325
define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
3426
; BF16-F32-FAIL: LLVM ERROR: Cannot select
@@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %
4739

4840
;--- f16-f32-tonearestaway-err.ll
4941
define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
50-
; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
42+
; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
5143
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
5244
store half %res, ptr addrspace(1) %out, align 2
5345
ret void
5446
}
47+
48+
;--- f32-f64-tonearestaway-err.ll
49+
define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
50+
; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
51+
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
52+
store float %res, ptr addrspace(1) %out, align 4
53+
ret void
54+
}

llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
516516
%res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
517517
ret <8 x half> %res
518518
}
519+
520+
define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
521+
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
522+
; CHECK: ; %bb.0:
523+
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
524+
; CHECK-NEXT: ; return to shader part epilog
525+
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
526+
ret float %res
527+
}
528+
529+
define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
530+
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
531+
; CHECK: ; %bb.0:
532+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
533+
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
534+
; CHECK-NEXT: ; return to shader part epilog
535+
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
536+
ret float %res
537+
}
538+
539+
define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
540+
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
541+
; CHECK: ; %bb.0:
542+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
543+
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
544+
; CHECK-NEXT: ; return to shader part epilog
545+
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
546+
ret float %res
547+
}
548+
549+
define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
550+
; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
551+
; CHECK: ; %bb.0:
552+
; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
553+
; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
554+
; CHECK-NEXT: ; return to shader part epilog
555+
%res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
556+
ret float %res
557+
}

0 commit comments

Comments
 (0)