Skip to content

Commit d1139b3

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen pats for v_mad_u16 (#124000)
true16 codegen pats for v_mad_u16 (mul+add)
1 parent 4e81275 commit d1139b3

File tree

2 files changed

+91
-36
lines changed

2 files changed

+91
-36
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,9 @@ multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
446446
>;
447447
}
448448

449+
let True16Predicate = UseRealTrue16Insts in {
450+
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_t16_e64>;
451+
} // End True16Predicates = UseRealTrue16Insts
449452
let True16Predicate = UseFakeTrue16Insts in {
450453
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_fake16_e64>;
451454
} // End True16Predicates = UseFakeTrue16Insts

llvm/test/CodeGen/AMDGPU/mad.u16.ll

Lines changed: 88 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX11 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-TRUE16 %s
6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefixes=GFX11,GFX11-FAKE16 %s
67

78
; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
89

@@ -65,22 +66,44 @@ define amdgpu_kernel void @mad_u16(
6566
; GFX10-NEXT: global_store_short v0, v1, s[8:9]
6667
; GFX10-NEXT: s_endpgm
6768
;
68-
; GFX11-LABEL: mad_u16:
69-
; GFX11: ; %bb.0: ; %entry
70-
; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
71-
; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
72-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
73-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
74-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
75-
; GFX11-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
76-
; GFX11-NEXT: s_waitcnt vmcnt(0)
77-
; GFX11-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
78-
; GFX11-NEXT: s_waitcnt vmcnt(0)
79-
; GFX11-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc
80-
; GFX11-NEXT: s_waitcnt vmcnt(0)
81-
; GFX11-NEXT: v_mad_u16 v0, v1, v2, v0
82-
; GFX11-NEXT: global_store_b16 v3, v0, s[0:1]
83-
; GFX11-NEXT: s_endpgm
69+
; GFX11-TRUE16-LABEL: mad_u16:
70+
; GFX11-TRUE16: ; %bb.0: ; %entry
71+
; GFX11-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
72+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
73+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
74+
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
75+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
76+
; GFX11-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
77+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
78+
; GFX11-TRUE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
79+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
80+
; GFX11-TRUE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc
81+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
82+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
83+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
84+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
85+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
86+
; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
87+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
88+
; GFX11-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
89+
; GFX11-TRUE16-NEXT: s_endpgm
90+
;
91+
; GFX11-FAKE16-LABEL: mad_u16:
92+
; GFX11-FAKE16: ; %bb.0: ; %entry
93+
; GFX11-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
94+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0
95+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
96+
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
97+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
98+
; GFX11-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
99+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
100+
; GFX11-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc
101+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
102+
; GFX11-FAKE16-NEXT: global_load_u16 v0, v0, s[6:7] glc dlc
103+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
104+
; GFX11-FAKE16-NEXT: v_mad_u16 v0, v1, v2, v0
105+
; GFX11-FAKE16-NEXT: global_store_b16 v3, v0, s[0:1]
106+
; GFX11-FAKE16-NEXT: s_endpgm
84107
ptr addrspace(1) %r,
85108
ptr addrspace(1) %a,
86109
ptr addrspace(1) %b,
@@ -121,11 +144,20 @@ define i16 @v_mad_u16(i16 %arg0, i16 %arg1, i16 %arg2) {
121144
; GFX10-NEXT: v_mad_u16 v0, v0, v1, v2
122145
; GFX10-NEXT: s_setpc_b64 s[30:31]
123146
;
124-
; GFX11-LABEL: v_mad_u16:
125-
; GFX11: ; %bb.0:
126-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127-
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
128-
; GFX11-NEXT: s_setpc_b64 s[30:31]
147+
; GFX11-TRUE16-LABEL: v_mad_u16:
148+
; GFX11-TRUE16: ; %bb.0:
149+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
151+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
152+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
153+
; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
154+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
155+
;
156+
; GFX11-FAKE16-LABEL: v_mad_u16:
157+
; GFX11-FAKE16: ; %bb.0:
158+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159+
; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
160+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
129161
%mul = mul i16 %arg0, %arg1
130162
%add = add i16 %mul, %arg2
131163
ret i16 %add
@@ -151,13 +183,23 @@ define i32 @v_mad_u16_zext(i16 %arg0, i16 %arg1, i16 %arg2) {
151183
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
152184
; GFX10-NEXT: s_setpc_b64 s[30:31]
153185
;
154-
; GFX11-LABEL: v_mad_u16_zext:
155-
; GFX11: ; %bb.0:
156-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157-
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
158-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
159-
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
160-
; GFX11-NEXT: s_setpc_b64 s[30:31]
186+
; GFX11-TRUE16-LABEL: v_mad_u16_zext:
187+
; GFX11-TRUE16: ; %bb.0:
188+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
190+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
191+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
192+
; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
193+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
194+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
195+
;
196+
; GFX11-FAKE16-LABEL: v_mad_u16_zext:
197+
; GFX11-FAKE16: ; %bb.0:
198+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199+
; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
200+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
201+
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
202+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
161203
%mul = mul i16 %arg0, %arg1
162204
%add = add i16 %mul, %arg2
163205
%zext = zext i16 %add to i32
@@ -187,13 +229,23 @@ define i64 @v_mad_u16_zext64(i16 %arg0, i16 %arg1, i16 %arg2) {
187229
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
188230
; GFX10-NEXT: s_setpc_b64 s[30:31]
189231
;
190-
; GFX11-LABEL: v_mad_u16_zext64:
191-
; GFX11: ; %bb.0:
192-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193-
; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
194-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
195-
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
196-
; GFX11-NEXT: s_setpc_b64 s[30:31]
232+
; GFX11-TRUE16-LABEL: v_mad_u16_zext64:
233+
; GFX11-TRUE16: ; %bb.0:
234+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
236+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
237+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
238+
; GFX11-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v1.l
239+
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
240+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
241+
;
242+
; GFX11-FAKE16-LABEL: v_mad_u16_zext64:
243+
; GFX11-FAKE16: ; %bb.0:
244+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245+
; GFX11-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
246+
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
247+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
248+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
197249
%mul = mul i16 %arg0, %arg1
198250
%add = add i16 %mul, %arg2
199251
%zext = zext i16 %add to i64

0 commit comments

Comments
 (0)