Skip to content

Commit d8e49cc

Browse files
committed
Fix memory predicates
1 parent ccc115c commit d8e49cc

File tree

5 files changed

+60
-67
lines changed

5 files changed

+60
-67
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -504,42 +504,34 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
504504

505505
def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
506506
let IsAtomic = 1;
507-
let MemoryVT = i8;
508507
}
509508

510509
def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
511510
let IsAtomic = 1;
512-
let MemoryVT = i16;
513511
}
514512

515513
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
516514
let IsAtomic = 1;
517-
let MemoryVT = i32;
518515
}
519516

520517
def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
521518
let IsAtomic = 1;
522-
let MemoryVT = i64;
523519
}
524520

525-
def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
521+
def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_8 node:$ptr)> {
526522
let IsAtomic = 1;
527-
let MemoryVT = i8;
528523
}
529524

530-
def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
525+
def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_8 node:$ptr)> {
531526
let IsAtomic = 1;
532-
let MemoryVT = i8;
533527
}
534528

535-
def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
529+
def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_16 node:$ptr)> {
536530
let IsAtomic = 1;
537-
let MemoryVT = i16;
538531
}
539532

540-
def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
533+
def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_16 node:$ptr)> {
541534
let IsAtomic = 1;
542-
let MemoryVT = i16;
543535
}
544536

545537
} // End let AddressSpaces

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,7 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
13731373
def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
13741374
def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
13751375
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1376+
def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
13761377
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
13771378

13781379
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
117117
; GCN-LABEL: atomic_load_flat_monotonic_i16_zext_to_i32:
118118
; GCN: ; %bb.0:
119119
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120-
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
120+
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
121121
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
122122
; GCN-NEXT: s_setpc_b64 s[30:31]
123123
%load = load atomic i16, ptr %ptr monotonic, align 2
@@ -129,7 +129,7 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
129129
; GFX7-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
130130
; GFX7: ; %bb.0:
131131
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132-
; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
132+
; GFX7-NEXT: flat_load_sshort v2, v[0:1] glc
133133
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
134134
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
135135
; GFX7-NEXT: v_mov_b32_e32 v0, v2
@@ -138,7 +138,7 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
138138
; GFX8-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
139139
; GFX8: ; %bb.0:
140140
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141-
; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
141+
; GFX8-NEXT: flat_load_sshort v2, v[0:1] glc
142142
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
143143
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
144144
; GFX8-NEXT: v_mov_b32_e32 v0, v2
@@ -147,7 +147,7 @@ define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
147147
; GFX9-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
148148
; GFX9: ; %bb.0:
149149
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150-
; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
150+
; GFX9-NEXT: flat_load_sshort v2, v[0:1] glc
151151
; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc
152152
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
153153
; GFX9-NEXT: v_mov_b32_e32 v0, v2
@@ -183,7 +183,7 @@ define i32 @atomic_load_flat_monotonic_f16_zext_to_i32(ptr %ptr) {
183183
; GCN-LABEL: atomic_load_flat_monotonic_f16_zext_to_i32:
184184
; GCN: ; %bb.0:
185185
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186-
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
186+
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
187187
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
188188
; GCN-NEXT: s_setpc_b64 s[30:31]
189189
%load = load atomic half, ptr %ptr monotonic, align 2
@@ -196,7 +196,7 @@ define i32 @atomic_load_flat_monotonic_bf16_zext_to_i32(ptr %ptr) {
196196
; GCN-LABEL: atomic_load_flat_monotonic_bf16_zext_to_i32:
197197
; GCN: ; %bb.0:
198198
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199-
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
199+
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
200200
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
201201
; GCN-NEXT: s_setpc_b64 s[30:31]
202202
%load = load atomic bfloat, ptr %ptr monotonic, align 2
@@ -209,7 +209,7 @@ define i32 @atomic_load_flat_monotonic_i16_d16_hi_shift(ptr %ptr) {
209209
; GCN-LABEL: atomic_load_flat_monotonic_i16_d16_hi_shift:
210210
; GCN: ; %bb.0:
211211
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212-
; GCN-NEXT: flat_load_ubyte v0, v[0:1] glc
212+
; GCN-NEXT: flat_load_ushort v0, v[0:1] glc
213213
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
214214
; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
215215
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -223,7 +223,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_hi_vector_insert(ptr %ptr,
223223
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
224224
; GFX7: ; %bb.0:
225225
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
226+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
227227
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3
228228
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
229229
; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
@@ -237,7 +237,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_hi_vector_insert(ptr %ptr,
237237
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
238238
; GFX8: ; %bb.0:
239239
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
240+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
241241
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
242242
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
243243
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
@@ -246,7 +246,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_hi_vector_insert(ptr %ptr,
246246
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_hi_vector_insert:
247247
; GFX9: ; %bb.0:
248248
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249-
; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc
249+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
250250
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
251251
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
252252
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
@@ -261,7 +261,7 @@ define i32 @atomic_load_flat_monotonic_i16_d16_lo_or(ptr %ptr, i16 %high) {
261261
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
262262
; GFX7: ; %bb.0:
263263
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
264+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
265265
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2
266266
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
267267
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -271,7 +271,7 @@ define i32 @atomic_load_flat_monotonic_i16_d16_lo_or(ptr %ptr, i16 %high) {
271271
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
272272
; GFX8: ; %bb.0:
273273
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
274+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
275275
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v2
276276
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
277277
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -281,7 +281,7 @@ define i32 @atomic_load_flat_monotonic_i16_d16_lo_or(ptr %ptr, i16 %high) {
281281
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_or:
282282
; GFX9: ; %bb.0:
283283
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284-
; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc
284+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
285285
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
286286
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
287287
; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
@@ -298,7 +298,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_lo_vector_insert(ptr %ptr,
298298
; GFX7-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
299299
; GFX7: ; %bb.0:
300300
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301-
; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
301+
; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
302302
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3
303303
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2
304304
; GFX7-NEXT: v_or_b32_e32 v1, v1, v2
@@ -311,7 +311,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_lo_vector_insert(ptr %ptr,
311311
; GFX8-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
312312
; GFX8: ; %bb.0:
313313
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314-
; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
314+
; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
315315
; GFX8-NEXT: v_and_b32_e32 v1, 0xffff0000, v2
316316
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
317317
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
@@ -320,7 +320,7 @@ define <2 x i16> @atomic_load_flat_monotonic_i16_d16_lo_vector_insert(ptr %ptr,
320320
; GFX9-LABEL: atomic_load_flat_monotonic_i16_d16_lo_vector_insert:
321321
; GFX9: ; %bb.0:
322322
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323-
; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc
323+
; GFX9-NEXT: flat_load_ushort v0, v[0:1] glc
324324
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff0000
325325
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
326326
; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0

0 commit comments

Comments
 (0)