Skip to content

Commit 2579ac6

Browse files
committed
keep src mod and clamp for v_s_xxx_f16 lowering
1 parent d9f7979 commit 2579ac6

File tree

3 files changed

+161
-4
lines changed

3 files changed

+161
-4
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7744,11 +7744,12 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77447744
? &AMDGPU::VGPR_16RegClass
77457745
: &AMDGPU::VGPR_32RegClass);
77467746
auto NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
7747-
.addImm(0) // src0_modifiers
7747+
.add(Inst.getOperand(1)) // src0_modifiers
77487748
.add(Inst.getOperand(2))
7749-
.addImm(0) // clamp
7750-
.addImm(0); // omod
7751-
if (ST.useRealTrue16Insts())
7749+
.add(Inst.getOperand(3)) // clamp
7750+
.add(Inst.getOperand(4)) // omod
7751+
.setMIFlags(Inst.getFlags());
7752+
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
77527753
NewInstr.addImm(0); // opsel0
77537754
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
77547755
legalizeOperandsVALUt16(*NewInstr, MRI);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3+
4+
---
5+
name: v_s_exp_f16
6+
body: |
7+
bb.0.entry:
8+
; GCN-LABEL: name: v_s_exp_f16
9+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
11+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12+
; GCN-NEXT: [[V_EXP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
13+
%0:vgpr_32 = IMPLICIT_DEF
14+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
15+
%2:sreg_32 = COPY %1:vgpr_32
16+
%3:sreg_32_xexec = V_S_EXP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: v_s_log_f16
21+
body: |
22+
bb.0.entry:
23+
; GCN-LABEL: name: v_s_log_f16
24+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
25+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
26+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
27+
; GCN-NEXT: [[V_LOG_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LOG_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
28+
%0:vgpr_32 = IMPLICIT_DEF
29+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
30+
%2:sreg_32 = COPY %1:vgpr_32
31+
%3:sreg_32_xexec = V_S_LOG_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
32+
...
33+
34+
---
35+
name: v_s_rcp_f16
36+
body: |
37+
bb.0.entry:
38+
; GCN-LABEL: name: v_s_rcp_f16
39+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
40+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
41+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
42+
; GCN-NEXT: [[V_RCP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
43+
%0:vgpr_32 = IMPLICIT_DEF
44+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
45+
%2:sreg_32 = COPY %1:vgpr_32
46+
%3:sreg_32_xexec = V_S_RCP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
47+
...
48+
49+
---
50+
name: v_s_rsq_f16
51+
body: |
52+
bb.0.entry:
53+
; GCN-LABEL: name: v_s_rsq_f16
54+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
55+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
56+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
57+
; GCN-NEXT: [[V_RSQ_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
58+
%0:vgpr_32 = IMPLICIT_DEF
59+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
60+
%2:sreg_32 = COPY %1:vgpr_32
61+
%3:sreg_32_xexec = V_S_RSQ_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
62+
...
63+
64+
---
65+
name: v_s_sqrt_f16
66+
body: |
67+
bb.0.entry:
68+
; GCN-LABEL: name: v_s_sqrt_f16
69+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
70+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
71+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
72+
; GCN-NEXT: [[V_SQRT_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SQRT_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
73+
%0:vgpr_32 = IMPLICIT_DEF
74+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
75+
%2:sreg_32 = COPY %1:vgpr_32
76+
%3:sreg_32_xexec = V_S_SQRT_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
77+
...
78+
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3+
4+
---
5+
name: v_s_exp_f16
6+
body: |
7+
bb.0.entry:
8+
; GCN-LABEL: name: v_s_exp_f16
9+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
11+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12+
; GCN-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_EXP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
13+
%0:vgpr_32 = IMPLICIT_DEF
14+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
15+
%2:sreg_32 = COPY %1:vgpr_32
16+
%3:sreg_32_xexec = V_S_EXP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: v_s_log_f16
21+
body: |
22+
bb.0.entry:
23+
; GCN-LABEL: name: v_s_log_f16
24+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
25+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
26+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
27+
; GCN-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_LOG_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
28+
%0:vgpr_32 = IMPLICIT_DEF
29+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
30+
%2:sreg_32 = COPY %1:vgpr_32
31+
%3:sreg_32_xexec = V_S_LOG_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
32+
...
33+
34+
---
35+
name: v_s_rcp_f16
36+
body: |
37+
bb.0.entry:
38+
; GCN-LABEL: name: v_s_rcp_f16
39+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
40+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
41+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
42+
; GCN-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RCP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
43+
%0:vgpr_32 = IMPLICIT_DEF
44+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
45+
%2:sreg_32 = COPY %1:vgpr_32
46+
%3:sreg_32_xexec = V_S_RCP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
47+
...
48+
49+
---
50+
name: v_s_rsq_f16
51+
body: |
52+
bb.0.entry:
53+
; GCN-LABEL: name: v_s_rsq_f16
54+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
55+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
56+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
57+
; GCN-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RSQ_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
58+
%0:vgpr_32 = IMPLICIT_DEF
59+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
60+
%2:sreg_32 = COPY %1:vgpr_32
61+
%3:sreg_32_xexec = V_S_RSQ_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
62+
...
63+
64+
---
65+
name: v_s_sqrt_f16
66+
body: |
67+
bb.0.entry:
68+
; GCN-LABEL: name: v_s_sqrt_f16
69+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
70+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
71+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
72+
; GCN-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_SQRT_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
73+
%0:vgpr_32 = IMPLICIT_DEF
74+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
75+
%2:sreg_32 = COPY %1:vgpr_32
76+
%3:sreg_32_xexec = V_S_SQRT_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
77+
...
78+

0 commit comments

Comments
 (0)