2
2
; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
3
3
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
4
4
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
5
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX11 %s
5
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s
6
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s
6
7
7
8
; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
8
9
@@ -65,22 +66,44 @@ define amdgpu_kernel void @mad_u16(
65
66
; GFX10-NEXT: global_store_short v0, v1, s[8:9]
66
67
; GFX10-NEXT: s_endpgm
67
68
;
68
- ; GFX11-LABEL: mad_u16:
69
- ; GFX11: ; %bb.0: ; %entry
70
- ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
71
- ; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
72
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
73
- ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
74
- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
75
- ; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
76
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
77
- ; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
78
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
79
- ; GFX11-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc
80
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
81
- ; GFX11-NEXT: v_mad_u16 v0, v1, v2, v0
82
- ; GFX11-NEXT: global_store_b16 v3, v0, s[0:1]
83
- ; GFX11-NEXT: s_endpgm
69
+ ; GFX11-TRUE16-LABEL: mad_u16:
70
+ ; GFX11-TRUE16: ; %bb.0: ; %entry
71
+ ; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
72
+ ; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
73
+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
74
+ ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
75
+ ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
76
+ ; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
77
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
78
+ ; GFX11-TRUE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
79
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
80
+ ; GFX11-TRUE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc
81
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
82
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
83
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
84
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
85
+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
86
+ ; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
87
+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
88
+ ; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
89
+ ; GFX11-TRUE16-NEXT: s_endpgm
90
+ ;
91
+ ; GFX11-FAKE16-LABEL: mad_u16:
92
+ ; GFX11-FAKE16: ; %bb.0: ; %entry
93
+ ; GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
94
+ ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
95
+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
96
+ ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
97
+ ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
98
+ ; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
99
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
100
+ ; GFX11-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
101
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
102
+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc
103
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
104
+ ; GFX11-FAKE16-NEXT: v_mad_u16 v0, v1, v2, v0
105
+ ; GFX11-FAKE16-NEXT: global_store_b16 v3, v0, s[0:1]
106
+ ; GFX11-FAKE16-NEXT: s_endpgm
84
107
ptr addrspace (1 ) %r ,
85
108
ptr addrspace (1 ) %a ,
86
109
ptr addrspace (1 ) %b ,
@@ -121,11 +144,20 @@ define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) {
121
144
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
122
145
; GFX10-NEXT: s_setpc_b64 s[30:31]
123
146
;
124
- ; GFX11-LABEL: v_mad_u16:
125
- ; GFX11: ; %bb.0:
126
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127
- ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
128
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
147
+ ; GFX11-TRUE16-LABEL: v_mad_u16:
148
+ ; GFX11-TRUE16: ; %bb.0:
149
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
151
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
152
+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
153
+ ; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
154
+ ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
155
+ ;
156
+ ; GFX11-FAKE16-LABEL: v_mad_u16:
157
+ ; GFX11-FAKE16: ; %bb.0:
158
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159
+ ; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
160
+ ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
129
161
%mul = mul i16 %arg0 , %arg1
130
162
%add = add i16 %mul , %arg2
131
163
ret i16 %add
@@ -151,13 +183,23 @@ define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) {
151
183
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
152
184
; GFX10-NEXT: s_setpc_b64 s[30:31]
153
185
;
154
- ; GFX11-LABEL: v_mad_u16_zext:
155
- ; GFX11: ; %bb.0:
156
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157
- ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
158
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
159
- ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
160
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
186
+ ; GFX11-TRUE16-LABEL: v_mad_u16_zext:
187
+ ; GFX11-TRUE16: ; %bb.0:
188
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
190
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
191
+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
192
+ ; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
193
+ ; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
194
+ ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
195
+ ;
196
+ ; GFX11-FAKE16-LABEL: v_mad_u16_zext:
197
+ ; GFX11-FAKE16: ; %bb.0:
198
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199
+ ; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
200
+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
201
+ ; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
202
+ ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
161
203
%mul = mul i16 %arg0 , %arg1
162
204
%add = add i16 %mul , %arg2
163
205
%zext = zext i16 %add to i32
@@ -187,13 +229,23 @@ define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) {
187
229
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
188
230
; GFX10-NEXT: s_setpc_b64 s[30:31]
189
231
;
190
- ; GFX11-LABEL: v_mad_u16_zext64:
191
- ; GFX11: ; %bb.0:
192
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193
- ; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
194
- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
195
- ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
196
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
232
+ ; GFX11-TRUE16-LABEL: v_mad_u16_zext64:
233
+ ; GFX11-TRUE16: ; %bb.0:
234
+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
236
+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
237
+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
238
+ ; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
239
+ ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
240
+ ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
241
+ ;
242
+ ; GFX11-FAKE16-LABEL: v_mad_u16_zext64:
243
+ ; GFX11-FAKE16: ; %bb.0:
244
+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245
+ ; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
246
+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
247
+ ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
248
+ ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
197
249
%mul = mul i16 %arg0 , %arg1
198
250
%add = add i16 %mul , %arg2
199
251
%zext = zext i16 %add to i64
0 commit comments