Skip to content

Commit c771b67

Browse files
authored
[AMDGPU] Promote immediate offset to atomics (#94043)
1 parent d55e235 commit c771b67

File tree

2 files changed

+165
-7
lines changed

2 files changed

+165
-7
lines changed

llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2052,9 +2052,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
20522052
MemInfoMap &Visited,
20532053
SmallPtrSet<MachineInstr *, 4> &AnchorList) const {
20542054

2055-
if (!(MI.mayLoad() ^ MI.mayStore()))
2056-
return false;
2057-
20582055
if (!STM->hasFlatInstOffsets() || !SIInstrInfo::isFLAT(MI))
20592056
return false;
20602057

@@ -2065,10 +2062,6 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
20652062
unsigned AS = SIInstrInfo::isFLATGlobal(MI) ? AMDGPUAS::GLOBAL_ADDRESS
20662063
: AMDGPUAS::FLAT_ADDRESS;
20672064

2068-
if (MI.mayLoad() &&
2069-
TII->getNamedOperand(MI, AMDGPU::OpName::vdata) != nullptr)
2070-
return false;
2071-
20722065
if (AnchorList.count(&MI))
20732066
return false;
20742067

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,168 @@ body: |
282282
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
283283
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
284284
...
285+
286+
---
287+
# GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap
288+
# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
289+
# GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
290+
291+
# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
292+
# GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
293+
294+
name: diffoporder_add_global_atomic_cmpswap
295+
body: |
296+
bb.0.entry:
297+
298+
%0:vreg_64 = COPY $vgpr0_vgpr1
299+
300+
%1:sgpr_32 = S_MOV_B32 4000
301+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
302+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
303+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
304+
GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec
305+
306+
%8:sgpr_32 = S_MOV_B32 3000
307+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
308+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
309+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
310+
GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec
311+
...
312+
313+
---
314+
# GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap
315+
# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
316+
# GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
317+
318+
# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
319+
# GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
320+
321+
name: diffoporder_add_flat_atomic_cmpswap
322+
body: |
323+
bb.0.entry:
324+
325+
%0:vreg_64 = COPY $vgpr0_vgpr1
326+
327+
%1:sgpr_32 = S_MOV_B32 4000
328+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
329+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
330+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
331+
FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
332+
333+
334+
%8:sgpr_32 = S_MOV_B32 3000
335+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
336+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
337+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
338+
FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
339+
...
340+
341+
---
342+
# GCN-LABEL: name: diffoporder_add_global_atomic_add
343+
# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
344+
# GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
345+
346+
# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
347+
# GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
348+
349+
name: diffoporder_add_global_atomic_add
350+
body: |
351+
bb.0.entry:
352+
353+
%0:vreg_64 = COPY $vgpr0_vgpr1
354+
355+
%1:sgpr_32 = S_MOV_B32 4000
356+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
357+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
358+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
359+
GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec
360+
361+
%8:sgpr_32 = S_MOV_B32 3000
362+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
363+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
364+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
365+
GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec
366+
...
367+
368+
---
369+
# GCN-LABEL: name: diffoporder_add_flat_atomic_add
370+
# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
371+
# GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
372+
373+
# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
374+
# GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
375+
376+
name: diffoporder_add_flat_atomic_add
377+
body: |
378+
bb.0.entry:
379+
380+
%0:vreg_64 = COPY $vgpr0_vgpr1
381+
382+
%1:sgpr_32 = S_MOV_B32 4000
383+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
384+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
385+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
386+
FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
387+
388+
389+
%8:sgpr_32 = S_MOV_B32 3000
390+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
391+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
392+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
393+
FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
394+
...
395+
396+
---
397+
# GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn
398+
# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
399+
# GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
400+
401+
# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
402+
# GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
403+
404+
name: diffoporder_add_global_atomic_add_rtn
405+
body: |
406+
bb.0.entry:
407+
408+
%0:vreg_64 = COPY $vgpr0_vgpr1
409+
410+
%1:sgpr_32 = S_MOV_B32 4000
411+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
412+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
413+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
414+
%14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec
415+
416+
%8:sgpr_32 = S_MOV_B32 3000
417+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
418+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
419+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
420+
%15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec
421+
...
422+
423+
---
424+
# GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn
425+
# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
426+
# GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
427+
428+
# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
429+
# GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
430+
431+
name: diffoporder_add_flat_atomic_add_rtn
432+
body: |
433+
bb.0.entry:
434+
435+
%0:vreg_64 = COPY $vgpr0_vgpr1
436+
437+
%1:sgpr_32 = S_MOV_B32 4000
438+
%2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
439+
%4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
440+
%6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
441+
%14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
442+
443+
444+
%8:sgpr_32 = S_MOV_B32 3000
445+
%9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
446+
%11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
447+
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
448+
%15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
449+
...

0 commit comments

Comments
 (0)