Skip to content

Commit ca33649

Browse files
committed
Revert "[AMDGPU] Still set up the two SGPRs for queue ptr even it is COV5 (#112403)"
This reverts commit e215a1e as it broke both hip and openmp buildbots.
1 parent b99d411 commit ca33649

File tree

571 files changed

+133357
-124046
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

571 files changed

+133357
-124046
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,9 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
465465
CCInfo.AllocateReg(DispatchPtrReg);
466466
}
467467

468-
if (UserSGPRInfo.hasQueuePtr()) {
468+
const Module *M = MF.getFunction().getParent();
469+
if (UserSGPRInfo.hasQueuePtr() &&
470+
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
469471
Register QueuePtrReg = Info.addQueuePtr(TRI);
470472
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
471473
CCInfo.AllocateReg(QueuePtrReg);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,7 +2376,9 @@ void SITargetLowering::allocateSpecialInputSGPRs(
23762376
if (UserSGPRInfo.hasDispatchPtr())
23772377
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
23782378

2379-
if (UserSGPRInfo.hasQueuePtr())
2379+
const Module *M = MF.getFunction().getParent();
2380+
if (UserSGPRInfo.hasQueuePtr() &&
2381+
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
23802382
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
23812383

23822384
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
@@ -2427,7 +2429,9 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
24272429
CCInfo.AllocateReg(DispatchPtrReg);
24282430
}
24292431

2430-
if (UserSGPRInfo.hasQueuePtr()) {
2432+
const Module *M = MF.getFunction().getParent();
2433+
if (UserSGPRInfo.hasQueuePtr() &&
2434+
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
24312435
Register QueuePtrReg = Info.addQueuePtr(TRI);
24322436
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
24332437
CCInfo.AllocateReg(QueuePtrReg);

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
66
; GFX11-LABEL: s_add_u64:
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
9-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
10-
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
9+
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
10+
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
13-
; GFX11-NEXT: s_add_u32 s2, s2, s4
14-
; GFX11-NEXT: s_addc_u32 s3, s3, s5
13+
; GFX11-NEXT: s_add_u32 s0, s6, s0
14+
; GFX11-NEXT: s_addc_u32 s1, s7, s1
1515
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
16-
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
17-
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
16+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
17+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
1818
; GFX11-NEXT: s_endpgm
1919
;
2020
; GFX12-LABEL: s_add_u64:
2121
; GFX12: ; %bb.0: ; %entry
2222
; GFX12-NEXT: s_clause 0x1
23-
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
24-
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
23+
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
24+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
2525
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2626
; GFX12-NEXT: s_wait_kmcnt 0x0
27-
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
27+
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
2828
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
29-
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
30-
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
29+
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
30+
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
3131
; GFX12-NEXT: s_endpgm
3232
entry:
3333
%add = add i64 %a, %b
@@ -52,28 +52,28 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5252
; GFX11-LABEL: s_sub_u64:
5353
; GFX11: ; %bb.0: ; %entry
5454
; GFX11-NEXT: s_clause 0x1
55-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
56-
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
55+
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
56+
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
5757
; GFX11-NEXT: v_mov_b32_e32 v2, 0
5858
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
59-
; GFX11-NEXT: s_sub_u32 s2, s2, s4
60-
; GFX11-NEXT: s_subb_u32 s3, s3, s5
59+
; GFX11-NEXT: s_sub_u32 s0, s6, s0
60+
; GFX11-NEXT: s_subb_u32 s1, s7, s1
6161
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
62-
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
63-
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
62+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
63+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
6464
; GFX11-NEXT: s_endpgm
6565
;
6666
; GFX12-LABEL: s_sub_u64:
6767
; GFX12: ; %bb.0: ; %entry
6868
; GFX12-NEXT: s_clause 0x1
69-
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
70-
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
69+
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
70+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
7171
; GFX12-NEXT: v_mov_b32_e32 v2, 0
7272
; GFX12-NEXT: s_wait_kmcnt 0x0
73-
; GFX12-NEXT: s_sub_nc_u64 s[2:3], s[2:3], s[4:5]
73+
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]
7474
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
75-
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
76-
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
75+
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
76+
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
7777
; GFX12-NEXT: s_endpgm
7878
entry:
7979
%sub = sub i64 %a, %b

0 commit comments

Comments
 (0)