Skip to content

Commit e75e248

Browse files
authored
[AMDGPU][True16][Codegen] keep srcmod/clamp/omod from v_s_xxx_f16 when moved to VALU (#144781)
#141152 causes an issue in v_s_xxx_f16 lowering in both true16/fake16 flow. V_S_XXX_F16 are special insts which has scalar input/output but in VALU VOP3 format. Need to keep the srcmod/clamp/omod when lower it to its corresponding VALU inst with vector input/output.
1 parent f87b662 commit e75e248

File tree

5 files changed

+171
-14
lines changed

5 files changed

+171
-14
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7744,11 +7744,12 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
77447744
? &AMDGPU::VGPR_16RegClass
77457745
: &AMDGPU::VGPR_32RegClass);
77467746
auto NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
7747-
.addImm(0) // src0_modifiers
7747+
.add(Inst.getOperand(1)) // src0_modifiers
77487748
.add(Inst.getOperand(2))
7749-
.addImm(0) // clamp
7750-
.addImm(0); // omod
7751-
if (ST.useRealTrue16Insts())
7749+
.add(Inst.getOperand(3)) // clamp
7750+
.add(Inst.getOperand(4)) // omod
7751+
.setMIFlags(Inst.getFlags());
7752+
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::op_sel))
77527753
NewInstr.addImm(0); // opsel0
77537754
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
77547755
legalizeOperandsVALUt16(*NewInstr, MRI);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3+
4+
---
5+
name: v_s_exp_f16
6+
body: |
7+
bb.0.entry:
8+
; GCN-LABEL: name: v_s_exp_f16
9+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
11+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12+
; GCN-NEXT: [[V_EXP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
13+
%0:vgpr_32 = IMPLICIT_DEF
14+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
15+
%2:sreg_32 = COPY %1:vgpr_32
16+
%3:sreg_32_xexec = V_S_EXP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: v_s_log_f16
21+
body: |
22+
bb.0.entry:
23+
; GCN-LABEL: name: v_s_log_f16
24+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
25+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
26+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
27+
; GCN-NEXT: [[V_LOG_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LOG_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
28+
%0:vgpr_32 = IMPLICIT_DEF
29+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
30+
%2:sreg_32 = COPY %1:vgpr_32
31+
%3:sreg_32_xexec = V_S_LOG_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
32+
...
33+
34+
---
35+
name: v_s_rcp_f16
36+
body: |
37+
bb.0.entry:
38+
; GCN-LABEL: name: v_s_rcp_f16
39+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
40+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
41+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
42+
; GCN-NEXT: [[V_RCP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
43+
%0:vgpr_32 = IMPLICIT_DEF
44+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
45+
%2:sreg_32 = COPY %1:vgpr_32
46+
%3:sreg_32_xexec = V_S_RCP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
47+
...
48+
49+
---
50+
name: v_s_rsq_f16
51+
body: |
52+
bb.0.entry:
53+
; GCN-LABEL: name: v_s_rsq_f16
54+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
55+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
56+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
57+
; GCN-NEXT: [[V_RSQ_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
58+
%0:vgpr_32 = IMPLICIT_DEF
59+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
60+
%2:sreg_32 = COPY %1:vgpr_32
61+
%3:sreg_32_xexec = V_S_RSQ_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
62+
...
63+
64+
---
65+
name: v_s_sqrt_f16
66+
body: |
67+
bb.0.entry:
68+
; GCN-LABEL: name: v_s_sqrt_f16
69+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
70+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
71+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
72+
; GCN-NEXT: [[V_SQRT_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SQRT_F16_fake16_e64 1, [[V_CVT_F32_U32_e64_]], 1, 1, implicit $mode, implicit $exec
73+
%0:vgpr_32 = IMPLICIT_DEF
74+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
75+
%2:sreg_32 = COPY %1:vgpr_32
76+
%3:sreg_32_xexec = V_S_SQRT_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
77+
...
78+
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
3+
4+
---
5+
name: v_s_exp_f16
6+
body: |
7+
bb.0.entry:
8+
; GCN-LABEL: name: v_s_exp_f16
9+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
11+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
12+
; GCN-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_EXP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
13+
%0:vgpr_32 = IMPLICIT_DEF
14+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
15+
%2:sreg_32 = COPY %1:vgpr_32
16+
%3:sreg_32_xexec = V_S_EXP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
17+
...
18+
19+
---
20+
name: v_s_log_f16
21+
body: |
22+
bb.0.entry:
23+
; GCN-LABEL: name: v_s_log_f16
24+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
25+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
26+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
27+
; GCN-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_LOG_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
28+
%0:vgpr_32 = IMPLICIT_DEF
29+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
30+
%2:sreg_32 = COPY %1:vgpr_32
31+
%3:sreg_32_xexec = V_S_LOG_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
32+
...
33+
34+
---
35+
name: v_s_rcp_f16
36+
body: |
37+
bb.0.entry:
38+
; GCN-LABEL: name: v_s_rcp_f16
39+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
40+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
41+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
42+
; GCN-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RCP_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
43+
%0:vgpr_32 = IMPLICIT_DEF
44+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
45+
%2:sreg_32 = COPY %1:vgpr_32
46+
%3:sreg_32_xexec = V_S_RCP_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
47+
...
48+
49+
---
50+
name: v_s_rsq_f16
51+
body: |
52+
bb.0.entry:
53+
; GCN-LABEL: name: v_s_rsq_f16
54+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
55+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
56+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
57+
; GCN-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RSQ_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
58+
%0:vgpr_32 = IMPLICIT_DEF
59+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
60+
%2:sreg_32 = COPY %1:vgpr_32
61+
%3:sreg_32_xexec = V_S_RSQ_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
62+
...
63+
64+
---
65+
name: v_s_sqrt_f16
66+
body: |
67+
bb.0.entry:
68+
; GCN-LABEL: name: v_s_sqrt_f16
69+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
70+
; GCN-NEXT: [[V_CVT_F32_U32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
71+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
72+
; GCN-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_SQRT_F16_t16_e64 1, [[V_CVT_F32_U32_e64_]].lo16, 1, 1, 0, implicit $mode, implicit $exec
73+
%0:vgpr_32 = IMPLICIT_DEF
74+
%1:vgpr_32 = V_CVT_F32_U32_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
75+
%2:sreg_32 = COPY %1:vgpr_32
76+
%3:sreg_32_xexec = V_S_SQRT_F16_e64 1, %2:sreg_32, 1, 1, implicit $mode, implicit $exec
77+
...
78+

llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-fake16.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) {
1111
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1212
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
1313
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
14-
; CHECK-NEXT: [[V_EXP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_EXP_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
14+
; CHECK-NEXT: [[V_EXP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
1515
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xexec = IMPLICIT_DEF
1616
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_EXP_F16_fake16_e64_]]
1717
; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1)
@@ -32,7 +32,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) {
3232
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
3333
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
3434
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
35-
; CHECK-NEXT: [[V_LOG_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_LOG_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
35+
; CHECK-NEXT: [[V_LOG_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
3636
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xexec = IMPLICIT_DEF
3737
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_LOG_F16_fake16_e64_]]
3838
; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1)
@@ -53,7 +53,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) {
5353
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
5454
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
5555
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
56-
; CHECK-NEXT: [[V_RCP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RCP_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
56+
; CHECK-NEXT: [[V_RCP_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
5757
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xexec = IMPLICIT_DEF
5858
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_RCP_F16_fake16_e64_]]
5959
; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1)
@@ -74,7 +74,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) {
7474
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
7575
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
7676
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
77-
; CHECK-NEXT: [[V_RSQ_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_RSQ_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
77+
; CHECK-NEXT: [[V_RSQ_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
7878
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xexec = IMPLICIT_DEF
7979
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_RSQ_F16_fake16_e64_]]
8080
; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1)
@@ -95,7 +95,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
9595
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
9696
; CHECK-NEXT: [[GLOBAL_LOAD_USHORT_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
9797
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
98-
; CHECK-NEXT: [[V_SQRT_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_SQRT_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
98+
; CHECK-NEXT: [[V_SQRT_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F16_fake16_e64 0, [[GLOBAL_LOAD_USHORT_SADDR]], 0, 0, implicit $mode, implicit $exec
9999
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xexec = IMPLICIT_DEF
100100
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_SQRT_F16_fake16_e64_]]
101101
; CHECK-NEXT: GLOBAL_STORE_SHORT_SADDR [[V_MOV_B32_e32_]], killed [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s16) into %ir.ptr.load, addrspace 1)

llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans-f16-true16.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define amdgpu_kernel void @exp_f16(ptr addrspace(1) %ptr) {
1212
; CHECK-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
1313
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
1414
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16
15-
; CHECK-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_EXP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
15+
; CHECK-NEXT: [[V_EXP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_EXP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
1616
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
1717
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_EXP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
1818
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]]
@@ -35,7 +35,7 @@ define amdgpu_kernel void @log_f16(ptr addrspace(1) %ptr) {
3535
; CHECK-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
3636
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
3737
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16
38-
; CHECK-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_LOG_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
38+
; CHECK-NEXT: [[V_LOG_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_LOG_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
3939
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
4040
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_LOG_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
4141
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]]
@@ -58,7 +58,7 @@ define amdgpu_kernel void @rcp_f16(ptr addrspace(1) %ptr) {
5858
; CHECK-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
5959
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
6060
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16
61-
; CHECK-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RCP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
61+
; CHECK-NEXT: [[V_RCP_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_RCP_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
6262
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
6363
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RCP_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
6464
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]]
@@ -81,7 +81,7 @@ define amdgpu_kernel void @rsq_f16(ptr addrspace(1) %ptr) {
8181
; CHECK-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
8282
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
8383
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16
84-
; CHECK-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_RSQ_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
84+
; CHECK-NEXT: [[V_RSQ_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_RSQ_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
8585
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
8686
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_RSQ_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
8787
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]]
@@ -104,7 +104,7 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
104104
; CHECK-NEXT: [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_:%[0-9]+]]:vgpr_16 = GLOBAL_LOAD_SHORT_D16_SADDR_t16 [[S_LOAD_DWORDX2_IMM]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s16) from %ir.ptr.load, addrspace 1)
105105
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
106106
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[GLOBAL_LOAD_SHORT_D16_SADDR_t16_]], %subreg.lo16, [[DEF]], %subreg.hi16
107-
; CHECK-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_SQRT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
107+
; CHECK-NEXT: [[V_SQRT_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_SQRT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
108108
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
109109
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_SQRT_F16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
110110
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[REG_SEQUENCE1]]

0 commit comments

Comments
 (0)