Skip to content

Commit 1bb4306

Browse files
authored
PeepholeOpt: Allow introducing subregister uses on reg_sequence (llvm#127052)
This reverts d246cc6. We now handle composing subregister extracts through reg_sequence.
1 parent 93b2e47 commit 1bb4306

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+3675
-3944
lines changed

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -421,12 +421,6 @@ class RegSequenceRewriter : public Rewriter {
421421
}
422422

423423
bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
424-
// Do not introduce new subregister uses in a reg_sequence. Until composing
425-
// subregister indices is supported while folding, we're just blocking
426-
// folding of subregister copies later in the function.
427-
if (NewSubReg)
428-
return false;
429-
430424
MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
431425
MO.setReg(NewReg);
432426
MO.setSubReg(NewSubReg);

llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Lines changed: 112 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,7 +1635,6 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16351635
; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0
16361636
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v1, vcc
16371637
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s16, v3, 0
1638-
; GFX9-NEXT: v_mov_b32_e32 v7, s11
16391638
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s16, v4, v[1:2]
16401639
; GFX9-NEXT: v_mul_hi_u32 v6, v3, v0
16411640
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s17, v3, v[1:2]
@@ -1683,149 +1682,150 @@ define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1
16831682
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v5, 0
16841683
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
16851684
; GFX9-NEXT: v_add_u32_e32 v3, v4, v3
1686-
; GFX9-NEXT: v_add3_u32 v6, v3, v2, v6
1687-
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v6, v[1:2]
1688-
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s10, v0
1689-
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s9, v5, v[1:2]
1685+
; GFX9-NEXT: v_add3_u32 v3, v3, v2, v6
1686+
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s8, v3, v[1:2]
1687+
; GFX9-NEXT: v_mov_b32_e32 v6, s11
1688+
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, s10, v0
1689+
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s9, v5, v[1:2]
16901690
; GFX9-NEXT: v_mov_b32_e32 v4, s9
16911691
; GFX9-NEXT: s_ashr_i32 s10, s3, 31
1692-
; GFX9-NEXT: v_subb_co_u32_e64 v1, s[0:1], v7, v2, vcc
1693-
; GFX9-NEXT: v_sub_u32_e32 v2, s11, v2
1694-
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v1
1695-
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
1696-
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v0
1697-
; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc
1698-
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1]
1699-
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v1
1700-
; GFX9-NEXT: v_subrev_co_u32_e32 v8, vcc, s8, v0
1701-
; GFX9-NEXT: v_cndmask_b32_e64 v7, v3, v7, s[0:1]
1702-
; GFX9-NEXT: v_subbrev_co_u32_e64 v9, s[0:1], 0, v2, vcc
1703-
; GFX9-NEXT: v_add_co_u32_e64 v10, s[0:1], 1, v5
1704-
; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v6, s[0:1]
1705-
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v9
1706-
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[0:1]
1707-
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v8
1692+
; GFX9-NEXT: v_subb_co_u32_e64 v6, s[0:1], v6, v1, vcc
1693+
; GFX9-NEXT: v_sub_u32_e32 v0, s11, v1
1694+
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v6
1695+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
1696+
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v7
1697+
; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc
1698+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[0:1]
1699+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v6
1700+
; GFX9-NEXT: v_subrev_co_u32_e32 v9, vcc, s8, v7
1701+
; GFX9-NEXT: v_cndmask_b32_e64 v8, v1, v2, s[0:1]
1702+
; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[0:1], 0, v0, vcc
1703+
; GFX9-NEXT: v_add_co_u32_e64 v2, s[0:1], 1, v5
1704+
; GFX9-NEXT: v_addc_co_u32_e64 v11, s[0:1], 0, v3, s[0:1]
1705+
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s9, v10
1706+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[0:1]
1707+
; GFX9-NEXT: v_cmp_le_u32_e64 s[0:1], s8, v9
17081708
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[0:1]
1709-
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v9
1710-
; GFX9-NEXT: v_cndmask_b32_e64 v12, v3, v12, s[0:1]
1711-
; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v10
1709+
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], s9, v10
1710+
; GFX9-NEXT: v_cndmask_b32_e64 v12, v1, v12, s[0:1]
1711+
; GFX9-NEXT: v_add_co_u32_e64 v13, s[0:1], 1, v2
17121712
; GFX9-NEXT: v_addc_co_u32_e64 v14, s[0:1], 0, v11, s[0:1]
17131713
; GFX9-NEXT: s_add_u32 s0, s18, s6
17141714
; GFX9-NEXT: s_addc_u32 s1, s19, s6
17151715
; GFX9-NEXT: s_add_u32 s2, s2, s10
17161716
; GFX9-NEXT: s_mov_b32 s11, s10
17171717
; GFX9-NEXT: s_addc_u32 s3, s3, s10
17181718
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], s[10:11]
1719-
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, s3
1719+
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3
17201720
; GFX9-NEXT: v_cvt_f32_u32_e32 v15, s2
1721-
; GFX9-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v4, vcc
1722-
; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3
1723-
; GFX9-NEXT: v_add_f32_e32 v3, v3, v15
1724-
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1725-
; GFX9-NEXT: v_subrev_co_u32_e32 v15, vcc, s8, v8
1726-
; GFX9-NEXT: v_subbrev_co_u32_e32 v16, vcc, 0, v2, vcc
1727-
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v3
1728-
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
1729-
; GFX9-NEXT: v_trunc_f32_e32 v4, v3
1730-
; GFX9-NEXT: v_mul_f32_e32 v3, 0xcf800000, v4
1731-
; GFX9-NEXT: v_add_f32_e32 v2, v3, v2
1732-
; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v2
1721+
; GFX9-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v4, vcc
1722+
; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1
1723+
; GFX9-NEXT: v_add_f32_e32 v1, v1, v15
1724+
; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1
1725+
; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s8, v9
1726+
; GFX9-NEXT: v_subbrev_co_u32_e32 v15, vcc, 0, v0, vcc
1727+
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v1
1728+
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
1729+
; GFX9-NEXT: v_trunc_f32_e32 v16, v1
1730+
; GFX9-NEXT: v_mul_f32_e32 v1, 0xcf800000, v16
1731+
; GFX9-NEXT: v_add_f32_e32 v0, v1, v0
1732+
; GFX9-NEXT: v_cvt_u32_f32_e32 v17, v0
17331733
; GFX9-NEXT: s_xor_b64 s[8:9], s[0:1], s[6:7]
17341734
; GFX9-NEXT: s_sub_u32 s5, 0, s2
17351735
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
1736-
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s5, v17, 0
1737-
; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v4
1736+
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v17, 0
1737+
; GFX9-NEXT: v_cndmask_b32_e32 v12, v2, v13, vcc
1738+
; GFX9-NEXT: v_cvt_u32_f32_e32 v13, v16
17381739
; GFX9-NEXT: s_subb_u32 s20, 0, s3
17391740
; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc
1740-
; GFX9-NEXT: v_cndmask_b32_e32 v10, v10, v13, vcc
1741-
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s5, v12, v[3:4]
1742-
; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v7
1743-
; GFX9-NEXT: v_mul_lo_u32 v7, v12, v2
1744-
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[18:19], s20, v17, v[3:4]
1745-
; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v11, s[0:1]
1746-
; GFX9-NEXT: v_cndmask_b32_e32 v6, v8, v15, vcc
1747-
; GFX9-NEXT: v_mul_lo_u32 v8, v17, v3
1748-
; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[0:1]
1749-
; GFX9-NEXT: v_mul_hi_u32 v10, v17, v2
1750-
; GFX9-NEXT: v_cndmask_b32_e32 v9, v9, v16, vcc
1751-
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v8
1752-
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1753-
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v10
1754-
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1755-
; GFX9-NEXT: v_mul_lo_u32 v10, v12, v3
1756-
; GFX9-NEXT: v_mul_hi_u32 v2, v12, v2
1757-
; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
1758-
; GFX9-NEXT: v_mul_hi_u32 v8, v17, v3
1759-
; GFX9-NEXT: v_mul_hi_u32 v3, v12, v3
1760-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1741+
; GFX9-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
1742+
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s5, v13, v[1:2]
1743+
; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v8
1744+
; GFX9-NEXT: v_cndmask_b32_e64 v8, v3, v11, s[0:1]
1745+
; GFX9-NEXT: v_mad_u64_u32 v[1:2], s[18:19], s20, v17, v[1:2]
1746+
; GFX9-NEXT: v_mul_lo_u32 v2, v13, v0
1747+
; GFX9-NEXT: v_cndmask_b32_e32 v9, v10, v15, vcc
1748+
; GFX9-NEXT: v_mul_lo_u32 v3, v17, v1
1749+
; GFX9-NEXT: v_mul_hi_u32 v10, v17, v0
1750+
; GFX9-NEXT: v_mul_hi_u32 v0, v13, v0
1751+
; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[0:1]
1752+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
1753+
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1754+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v10
1755+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1756+
; GFX9-NEXT: v_mul_lo_u32 v10, v13, v1
1757+
; GFX9-NEXT: v_add_u32_e32 v2, v3, v2
1758+
; GFX9-NEXT: v_mul_hi_u32 v3, v17, v1
1759+
; GFX9-NEXT: v_mul_hi_u32 v1, v13, v1
1760+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v10, v0
17611761
; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1762-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v8
1763-
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1764-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1765-
; GFX9-NEXT: v_add_u32_e32 v8, v10, v8
1766-
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1767-
; GFX9-NEXT: v_add3_u32 v3, v8, v7, v3
1768-
; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v17, v2
1769-
; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v12, v3, vcc
1770-
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v7, 0
1771-
; GFX9-NEXT: v_cndmask_b32_e64 v6, v0, v6, s[0:1]
1772-
; GFX9-NEXT: v_cndmask_b32_e64 v9, v1, v9, s[0:1]
1762+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v3
1763+
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1764+
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1765+
; GFX9-NEXT: v_add_u32_e32 v3, v10, v3
1766+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1767+
; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v17, v0
1768+
; GFX9-NEXT: v_add3_u32 v1, v3, v2, v1
1769+
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[18:19], s5, v10, 0
1770+
; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v13, v1, vcc
17731771
; GFX9-NEXT: v_mov_b32_e32 v0, v3
1774-
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v8, v[0:1]
1775-
; GFX9-NEXT: v_xor_b32_e32 v10, s17, v4
1772+
; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v4, s[0:1]
1773+
; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[0:1]
1774+
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s5, v11, v[0:1]
17761775
; GFX9-NEXT: v_xor_b32_e32 v5, s16, v5
1777-
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v7, v[0:1]
1778-
; GFX9-NEXT: v_mov_b32_e32 v11, s17
1776+
; GFX9-NEXT: v_xor_b32_e32 v8, s17, v8
1777+
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s20, v10, v[0:1]
1778+
; GFX9-NEXT: v_mov_b32_e32 v9, s17
17791779
; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, s16, v5
1780-
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v6
1781-
; GFX9-NEXT: v_mul_lo_u32 v5, v8, v2
1782-
; GFX9-NEXT: v_mul_lo_u32 v6, v7, v3
1783-
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v10, v11, vcc
1784-
; GFX9-NEXT: v_mul_hi_u32 v10, v7, v2
1785-
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
1786-
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1787-
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v10
1780+
; GFX9-NEXT: v_xor_b32_e32 v4, s4, v7
1781+
; GFX9-NEXT: v_mul_lo_u32 v5, v11, v2
1782+
; GFX9-NEXT: v_mul_lo_u32 v7, v10, v3
1783+
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v9, vcc
1784+
; GFX9-NEXT: v_mul_hi_u32 v8, v10, v2
1785+
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
1786+
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1787+
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
17881788
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1789-
; GFX9-NEXT: v_mul_lo_u32 v10, v8, v3
1790-
; GFX9-NEXT: v_mul_hi_u32 v2, v8, v2
1791-
; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
1792-
; GFX9-NEXT: v_mul_hi_u32 v6, v7, v3
1793-
; GFX9-NEXT: v_mul_hi_u32 v3, v8, v3
1794-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1795-
; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1796-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
1797-
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1789+
; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3
1790+
; GFX9-NEXT: v_mul_hi_u32 v2, v11, v2
1791+
; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
1792+
; GFX9-NEXT: v_mul_hi_u32 v7, v10, v3
1793+
; GFX9-NEXT: v_mul_hi_u32 v3, v11, v3
1794+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2
1795+
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1796+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1797+
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
17981798
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5
1799-
; GFX9-NEXT: v_add_u32_e32 v6, v10, v6
1799+
; GFX9-NEXT: v_add_u32_e32 v7, v8, v7
18001800
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1801-
; GFX9-NEXT: v_add3_u32 v3, v6, v5, v3
1802-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2
1803-
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v3, vcc
1801+
; GFX9-NEXT: v_add3_u32 v3, v7, v5, v3
1802+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2
1803+
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc
18041804
; GFX9-NEXT: v_mul_lo_u32 v5, s9, v2
1805-
; GFX9-NEXT: v_mul_lo_u32 v6, s8, v3
1806-
; GFX9-NEXT: v_mul_hi_u32 v8, s8, v2
1805+
; GFX9-NEXT: v_mul_lo_u32 v7, s8, v3
1806+
; GFX9-NEXT: v_mul_hi_u32 v9, s8, v2
18071807
; GFX9-NEXT: v_mul_hi_u32 v2, s9, v2
18081808
; GFX9-NEXT: v_mul_hi_u32 v12, s9, v3
1809-
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v6
1810-
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1811-
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v8
1809+
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v7
1810+
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1811+
; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v5, v9
18121812
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1813-
; GFX9-NEXT: v_mul_lo_u32 v8, s9, v3
1814-
; GFX9-NEXT: v_add_u32_e32 v5, v6, v5
1815-
; GFX9-NEXT: v_mul_hi_u32 v6, s8, v3
1816-
; GFX9-NEXT: v_xor_b32_e32 v9, s4, v9
1817-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v8, v2
1818-
; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1819-
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6
1820-
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1813+
; GFX9-NEXT: v_mul_lo_u32 v9, s9, v3
1814+
; GFX9-NEXT: v_add_u32_e32 v5, v7, v5
1815+
; GFX9-NEXT: v_mul_hi_u32 v7, s8, v3
1816+
; GFX9-NEXT: v_xor_b32_e32 v6, s4, v6
1817+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2
1818+
; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1819+
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
1820+
; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
18211821
; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v2, v5
18221822
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[0:1], s2, v10, 0
1823+
; GFX9-NEXT: v_mov_b32_e32 v8, s4
18231824
; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1824-
; GFX9-NEXT: v_add_u32_e32 v6, v8, v6
1825-
; GFX9-NEXT: v_mov_b32_e32 v7, s4
18261825
; GFX9-NEXT: v_subrev_co_u32_e32 v4, vcc, s4, v4
1826+
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v8, vcc
1827+
; GFX9-NEXT: v_add_u32_e32 v6, v9, v7
18271828
; GFX9-NEXT: v_add3_u32 v8, v6, v11, v12
1828-
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v9, v7, vcc
18291829
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[0:1], s2, v8, v[3:4]
18301830
; GFX9-NEXT: v_mov_b32_e32 v9, s9
18311831
; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, s8, v2

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,31 +2218,31 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
22182218
; GFX1264-NEXT: s_clause 0x1
22192219
; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
22202220
; GFX1264-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
2221-
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
2222-
; GFX1264-NEXT: s_mov_b32 s11, 0
2223-
; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
22242221
; GFX1264-NEXT: s_mov_b64 s[8:9], exec
2222+
; GFX1264-NEXT: s_mov_b32 s11, 0
2223+
; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
2224+
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
22252225
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2226-
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
2226+
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s9, v0
22272227
; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
22282228
; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
22292229
; GFX1264-NEXT: s_cbranch_execz .LBB4_2
22302230
; GFX1264-NEXT: ; %bb.1:
2231-
; GFX1264-NEXT: s_bcnt1_i32_b64 s10, s[6:7]
2232-
; GFX1264-NEXT: s_mov_b32 s15, 0x31016000
2231+
; GFX1264-NEXT: s_bcnt1_i32_b64 s10, s[8:9]
22332232
; GFX1264-NEXT: s_wait_kmcnt 0x0
2234-
; GFX1264-NEXT: s_mul_u64 s[6:7], s[4:5], s[10:11]
2235-
; GFX1264-NEXT: s_mov_b32 s14, -1
2233+
; GFX1264-NEXT: s_mul_u64 s[8:9], s[4:5], s[10:11]
2234+
; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
22362235
; GFX1264-NEXT: s_wait_alu 0xfffe
2237-
; GFX1264-NEXT: v_mov_b32_e32 v0, s6
2238-
; GFX1264-NEXT: v_mov_b32_e32 v1, s7
2239-
; GFX1264-NEXT: s_mov_b32 s12, s2
2240-
; GFX1264-NEXT: s_mov_b32 s13, s3
2241-
; GFX1264-NEXT: buffer_atomic_add_u64 v[0:1], off, s[12:15], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2236+
; GFX1264-NEXT: v_mov_b32_e32 v0, s8
2237+
; GFX1264-NEXT: v_mov_b32_e32 v1, s9
2238+
; GFX1264-NEXT: s_mov_b32 s10, -1
2239+
; GFX1264-NEXT: s_mov_b32 s8, s2
2240+
; GFX1264-NEXT: s_mov_b32 s9, s3
2241+
; GFX1264-NEXT: buffer_atomic_add_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22422242
; GFX1264-NEXT: s_wait_loadcnt 0x0
22432243
; GFX1264-NEXT: global_inv scope:SCOPE_DEV
22442244
; GFX1264-NEXT: .LBB4_2:
2245-
; GFX1264-NEXT: s_or_b64 exec, exec, s[8:9]
2245+
; GFX1264-NEXT: s_or_b64 exec, exec, s[6:7]
22462246
; GFX1264-NEXT: s_wait_kmcnt 0x0
22472247
; GFX1264-NEXT: v_readfirstlane_b32 s3, v1
22482248
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0
@@ -5800,31 +5800,31 @@ define amdgpu_kernel void @sub_i64_uniform(ptr addrspace(1) %out, ptr addrspace(
58005800
; GFX1264-NEXT: s_clause 0x1
58015801
; GFX1264-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
58025802
; GFX1264-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
5803-
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
5804-
; GFX1264-NEXT: s_mov_b32 s11, 0
5805-
; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
58065803
; GFX1264-NEXT: s_mov_b64 s[8:9], exec
5804+
; GFX1264-NEXT: s_mov_b32 s11, 0
5805+
; GFX1264-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0
5806+
; GFX1264-NEXT: s_mov_b64 s[6:7], exec
58075807
; GFX1264-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5808-
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s7, v0
5808+
; GFX1264-NEXT: v_mbcnt_hi_u32_b32 v2, s9, v0
58095809
; GFX1264-NEXT: ; implicit-def: $vgpr0_vgpr1
58105810
; GFX1264-NEXT: v_cmpx_eq_u32_e32 0, v2
58115811
; GFX1264-NEXT: s_cbranch_execz .LBB10_2
58125812
; GFX1264-NEXT: ; %bb.1:
5813-
; GFX1264-NEXT: s_bcnt1_i32_b64 s10, s[6:7]
5814-
; GFX1264-NEXT: s_mov_b32 s15, 0x31016000
5813+
; GFX1264-NEXT: s_bcnt1_i32_b64 s10, s[8:9]
58155814
; GFX1264-NEXT: s_wait_kmcnt 0x0
5816-
; GFX1264-NEXT: s_mul_u64 s[6:7], s[4:5], s[10:11]
5817-
; GFX1264-NEXT: s_mov_b32 s14, -1
5815+
; GFX1264-NEXT: s_mul_u64 s[8:9], s[4:5], s[10:11]
5816+
; GFX1264-NEXT: s_mov_b32 s11, 0x31016000
58185817
; GFX1264-NEXT: s_wait_alu 0xfffe
5819-
; GFX1264-NEXT: v_mov_b32_e32 v0, s6
5820-
; GFX1264-NEXT: v_mov_b32_e32 v1, s7
5821-
; GFX1264-NEXT: s_mov_b32 s12, s2
5822-
; GFX1264-NEXT: s_mov_b32 s13, s3
5823-
; GFX1264-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[12:15], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
5818+
; GFX1264-NEXT: v_mov_b32_e32 v0, s8
5819+
; GFX1264-NEXT: v_mov_b32_e32 v1, s9
5820+
; GFX1264-NEXT: s_mov_b32 s10, -1
5821+
; GFX1264-NEXT: s_mov_b32 s8, s2
5822+
; GFX1264-NEXT: s_mov_b32 s9, s3
5823+
; GFX1264-NEXT: buffer_atomic_sub_u64 v[0:1], off, s[8:11], null th:TH_ATOMIC_RETURN scope:SCOPE_DEV
58245824
; GFX1264-NEXT: s_wait_loadcnt 0x0
58255825
; GFX1264-NEXT: global_inv scope:SCOPE_DEV
58265826
; GFX1264-NEXT: .LBB10_2:
5827-
; GFX1264-NEXT: s_or_b64 exec, exec, s[8:9]
5827+
; GFX1264-NEXT: s_or_b64 exec, exec, s[6:7]
58285828
; GFX1264-NEXT: s_wait_kmcnt 0x0
58295829
; GFX1264-NEXT: v_mad_co_u64_u32 v[3:4], null, s4, v2, 0
58305830
; GFX1264-NEXT: v_readfirstlane_b32 s2, v0

0 commit comments

Comments
 (0)