Skip to content

AMDGPU: Add f64 to f32 support for llvm.fptrunc.round #107481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2024

Conversation

changpeng
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 5, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/107481.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+6)
  • (modified) llvm/lib/Target/AMDGPU/SIModeRegister.cpp (+9-3)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll (+14-14)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll (+39)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 69e1b9a38324f2..c0154645b391df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
 let Uses = [MODE, EXEC] in {
 def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VGPR_32:$src0, i32imm:$round)>;
+
+def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+  (ins VReg_64:$src0, i32imm:$round)>;
 } // End Uses = [MODE, EXEC]
 
 def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
      (FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
 
+def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
+     (FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
+
 // Invert the exec mask and overwrite the inactive lanes of dst with inactive,
 // restoring it after we're done.
 let Defs = [SCC], isConvergent = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index a590c6560942cf..6bcf9757d29457 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
                                           const SIInstrInfo *TII) {
   unsigned Opcode = MI.getOpcode();
   if (TII->usesFPDPRounding(MI) ||
-      Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
+      Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+      Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
     switch (Opcode) {
     case AMDGPU::V_INTERP_P1LL_F16:
     case AMDGPU::V_INTERP_P1LV_F16:
@@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
         B.addImm(0); // omod
       } else
         MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
-      return Status(FP_ROUND_MODE_DP(3),
-                    FP_ROUND_MODE_DP(Mode));
+      return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+    }
+    case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
+      unsigned Mode = MI.getOperand(2).getImm();
+      MI.removeOperand(2);
+      MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
+      return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
     }
     default:
       return DefaultStatus;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index 291fe00a6177bd..21fe1ce4dc1d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -3,15 +3,15 @@
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
 ; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
 
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-
 ; TODO: check for GISEL when bfloat is supported.
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
 
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
 
 ;--- f16-f64-err.ll
 define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
@@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
   ret void
 }
 
-;--- f32-f64-err.ll
-define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
-; F32-F64-FAIL: LLVM ERROR: Cannot select
-  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
-  store float %res, ptr addrspace(1) %out, align 4
-  ret void
-}
-
 ;--- bf16-f32-err.ll
 define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
 ; BF16-F32-FAIL: LLVM ERROR: Cannot select
@@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %
 
 ;--- f16-f32-tonearestaway-err.ll
 define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
-; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
   %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
   store half %res, ptr addrspace(1) %out, align 2
   ret void
 }
+
+;--- f32-f64-tonearestaway-err.ll
+define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
+; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
+  store float %res, ptr addrspace(1) %out, align 4
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 54ed6f1eb42820..3d9ce6e79d9d28 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
   %res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
   ret <8 x half> %res
 }
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT:    ; return to shader part epilog
+  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
+  ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT:    ; return to shader part epilog
+  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
+  ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT:    ; return to shader part epilog
+  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
+  ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
+; CHECK-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT:    ; return to shader part epilog
+  %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
+  ret float %res
+}

@changpeng changpeng merged commit 24267a7 into llvm:main Sep 6, 2024
10 checks passed
@changpeng changpeng deleted the rounding branch September 6, 2024 05:57
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants