Skip to content

Commit e215a1e

Browse files
authored
[AMDGPU] Still set up the two SGPRs for queue ptr even it is COV5 (#112403)
1 parent d30a6dc commit e215a1e

File tree

571 files changed

+124086
-133397
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

571 files changed

+124086
-133397
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
465465
CCInfo.AllocateReg(DispatchPtrReg);
466466
}
467467

468-
const Module *M = MF.getFunction().getParent();
469-
if (UserSGPRInfo.hasQueuePtr() &&
470-
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
468+
if (UserSGPRInfo.hasQueuePtr()) {
471469
Register QueuePtrReg = Info.addQueuePtr(TRI);
472470
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
473471
CCInfo.AllocateReg(QueuePtrReg);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,9 +2376,7 @@ void SITargetLowering::allocateSpecialInputSGPRs(
23762376
if (UserSGPRInfo.hasDispatchPtr())
23772377
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
23782378

2379-
const Module *M = MF.getFunction().getParent();
2380-
if (UserSGPRInfo.hasQueuePtr() &&
2381-
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
2379+
if (UserSGPRInfo.hasQueuePtr())
23822380
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
23832381

23842382
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
@@ -2429,9 +2427,7 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
24292427
CCInfo.AllocateReg(DispatchPtrReg);
24302428
}
24312429

2432-
const Module *M = MF.getFunction().getParent();
2433-
if (UserSGPRInfo.hasQueuePtr() &&
2434-
AMDGPU::getAMDHSACodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
2430+
if (UserSGPRInfo.hasQueuePtr()) {
24352431
Register QueuePtrReg = Info.addQueuePtr(TRI);
24362432
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
24372433
CCInfo.AllocateReg(QueuePtrReg);

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
66
; GFX11-LABEL: s_add_u64:
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
9-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
10-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
9+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
10+
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
13-
; GFX11-NEXT: s_add_u32 s0, s6, s0
14-
; GFX11-NEXT: s_addc_u32 s1, s7, s1
13+
; GFX11-NEXT: s_add_u32 s2, s2, s4
14+
; GFX11-NEXT: s_addc_u32 s3, s3, s5
1515
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
16-
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
17-
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
16+
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
17+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
1818
; GFX11-NEXT: s_endpgm
1919
;
2020
; GFX12-LABEL: s_add_u64:
2121
; GFX12: ; %bb.0: ; %entry
2222
; GFX12-NEXT: s_clause 0x1
23-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
24-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
23+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
24+
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
2525
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2626
; GFX12-NEXT: s_wait_kmcnt 0x0
27-
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
27+
; GFX12-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5]
2828
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
29-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
30-
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
29+
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
30+
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
3131
; GFX12-NEXT: s_endpgm
3232
entry:
3333
%add = add i64 %a, %b
@@ -52,28 +52,28 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5252
; GFX11-LABEL: s_sub_u64:
5353
; GFX11: ; %bb.0: ; %entry
5454
; GFX11-NEXT: s_clause 0x1
55-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
56-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
55+
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
56+
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
5757
; GFX11-NEXT: v_mov_b32_e32 v2, 0
5858
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
59-
; GFX11-NEXT: s_sub_u32 s0, s6, s0
60-
; GFX11-NEXT: s_subb_u32 s1, s7, s1
59+
; GFX11-NEXT: s_sub_u32 s2, s2, s4
60+
; GFX11-NEXT: s_subb_u32 s3, s3, s5
6161
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
62-
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
63-
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
62+
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
63+
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
6464
; GFX11-NEXT: s_endpgm
6565
;
6666
; GFX12-LABEL: s_sub_u64:
6767
; GFX12: ; %bb.0: ; %entry
6868
; GFX12-NEXT: s_clause 0x1
69-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
70-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
69+
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
70+
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
7171
; GFX12-NEXT: v_mov_b32_e32 v2, 0
7272
; GFX12-NEXT: s_wait_kmcnt 0x0
73-
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]
73+
; GFX12-NEXT: s_sub_nc_u64 s[2:3], s[2:3], s[4:5]
7474
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
75-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
76-
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[4:5]
75+
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
76+
; GFX12-NEXT: global_store_b64 v2, v[0:1], s[0:1]
7777
; GFX12-NEXT: s_endpgm
7878
entry:
7979
%sub = sub i64 %a, %b

0 commit comments

Comments
 (0)