-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add f64 to f32 support for llvm.fptrunc.round #107481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) ChangesFull diff: https://github.com/llvm/llvm-project/pull/107481.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 69e1b9a38324f2..c0154645b391df 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -230,11 +230,17 @@ def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
let Uses = [MODE, EXEC] in {
def FPTRUNC_ROUND_F16_F32_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32:$src0, i32imm:$round)>;
+
+def FPTRUNC_ROUND_F32_F64_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
+ (ins VReg_64:$src0, i32imm:$round)>;
} // End Uses = [MODE, EXEC]
def : GCNPat <(f16 (fptrunc_round f32:$src0, (i32 SupportedRoundMode:$round))),
(FPTRUNC_ROUND_F16_F32_PSEUDO $src0, (as_hw_round_mode $round))>;
+def : GCNPat <(f32 (fptrunc_round f64:$src0, (i32 SupportedRoundMode:$round))),
+ (FPTRUNC_ROUND_F32_F64_PSEUDO $src0, (as_hw_round_mode $round))>;
+
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index a590c6560942cf..6bcf9757d29457 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -165,7 +165,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
const SIInstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
if (TII->usesFPDPRounding(MI) ||
- Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO) {
+ Opcode == AMDGPU::FPTRUNC_ROUND_F16_F32_PSEUDO ||
+ Opcode == AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO) {
switch (Opcode) {
case AMDGPU::V_INTERP_P1LL_F16:
case AMDGPU::V_INTERP_P1LV_F16:
@@ -189,8 +190,13 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
B.addImm(0); // omod
} else
MI.setDesc(TII->get(AMDGPU::V_CVT_F16_F32_e32));
- return Status(FP_ROUND_MODE_DP(3),
- FP_ROUND_MODE_DP(Mode));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
+ }
+ case AMDGPU::FPTRUNC_ROUND_F32_F64_PSEUDO: {
+ unsigned Mode = MI.getOperand(2).getImm();
+ MI.removeOperand(2);
+ MI.setDesc(TII->get(AMDGPU::V_CVT_F32_F64_e32));
+ return Status(FP_ROUND_MODE_DP(3), FP_ROUND_MODE_DP(Mode));
}
default:
return DefaultStatus;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index 291fe00a6177bd..21fe1ce4dc1d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -3,15 +3,15 @@
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-FAIL %s
-
; TODO: check for GISEL when bfloat is supported.
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
-; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
-; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+
+; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
+; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
;--- f16-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
@@ -21,14 +21,6 @@ define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %o
ret void
}
-;--- f32-f64-err.ll
-define amdgpu_gs void @test_fptrunc_round_f32_f64(double %a, ptr addrspace(1) %out) {
-; F32-F64-FAIL: LLVM ERROR: Cannot select
- %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
- store float %res, ptr addrspace(1) %out, align 4
- ret void
-}
-
;--- bf16-f32-err.ll
define amdgpu_gs void @test_fptrunc_round_bf16_f32(float %a, ptr addrspace(1) %out) {
; BF16-F32-FAIL: LLVM ERROR: Cannot select
@@ -47,8 +39,16 @@ define amdgpu_gs void @test_fptrunc_round_bf16_f64(double %a, ptr addrspace(1) %
;--- f16-f32-tonearestaway-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f32_tonearestaway(float %a, ptr addrspace(1) %out) {
-; TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+; F16-F32-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.tonearestaway")
store half %res, ptr addrspace(1) %out, align 2
ret void
}
+
+;--- f32-f64-tonearestaway-err.ll
+define amdgpu_gs void @test_fptrunc_round_f32_f64_tonearestaway(double %a, ptr addrspace(1) %out) {
+; F32-F64-TONEARESTAWAY-FAIL: LLVM ERROR: Cannot select
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearestaway")
+ store float %res, ptr addrspace(1) %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 54ed6f1eb42820..3d9ce6e79d9d28 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -516,3 +516,42 @@ define amdgpu_gs <8 x half> @v_fptrunc_round_v8f32_to_v8f16_downward(<8 x float>
%res = call <8 x half> @llvm.fptrunc.round.v8f16.v8f32(<8 x float> %a, metadata !"round.downward")
ret <8 x half> %res
}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_tonearest(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_tonearest:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.tonearest")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_upward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_upward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.upward")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_downward(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_downward:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.downward")
+ ret float %res
+}
+
+define amdgpu_gs float @v_fptrunc_round_f64_to_f32_towardzero(double %a) {
+; CHECK-LABEL: v_fptrunc_round_f64_to_f32_towardzero:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
+; CHECK-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; CHECK-NEXT: ; return to shader part epilog
+ %res = call float @llvm.fptrunc.round.f32.f64(double %a, metadata !"round.towardzero")
+ ret float %res
+}
|
arsenm
approved these changes
Sep 6, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.