Skip to content

[AMDGPU] Disallow negative s_load offsets in isLegalAddressingMode #91327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1610,6 +1610,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
}

if ((AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AM.BaseOffs < 0) {
// Scalar (non-buffer) loads can only use a negative offset if
// soffset+offset is non-negative. Since the compiler can only prove that
// in a few special cases, it is safer to claim that negative offsets are
// not supported.
return false;
}

if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;

Expand Down
89 changes: 62 additions & 27 deletions llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -279,33 +279,19 @@ end:
}

define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the constant 32-bit test (6)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

; GFX678-LABEL: test_sink_smem_offset_neg400:
; GFX678: ; %bb.0: ; %entry
; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
; GFX678-NEXT: s_addc_u32 s1, s1, -1
; GFX678-NEXT: .LBB5_1: ; %loop
; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
; GFX678-NEXT: s_add_i32 s2, s2, -1
; GFX678-NEXT: s_cmp_lg_u32 s2, 0
; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
; GFX678-NEXT: ; %bb.2: ; %end
; GFX678-NEXT: s_endpgm
;
; GFX9-LABEL: test_sink_smem_offset_neg400:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: .LBB5_1: ; %loop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_i32 s2, s2, -1
; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
; GFX9-NEXT: s_addc_u32 s5, s1, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
; GFX9-NEXT: ; %bb.2: ; %end
; GFX9-NEXT: s_endpgm
; GFX6789-LABEL: test_sink_smem_offset_neg400:
; GFX6789: ; %bb.0: ; %entry
; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For GFX9 this calculation is done outside the loop. With just #89165 applied, this calculation would be done inside the loop.

; GFX6789-NEXT: s_addc_u32 s1, s1, -1
; GFX6789-NEXT: .LBB5_1: ; %loop
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
; GFX6789-NEXT: s_add_i32 s2, s2, -1
; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
; GFX6789-NEXT: ; %bb.2: ; %end
; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
Expand Down Expand Up @@ -337,3 +323,52 @@ loop:
end:
ret void
}

; Same for address space 6, constant 32-bit.
define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
; GFX6789: ; %bb.0: ; %entry
; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
; GFX6789-NEXT: s_mov_b32 s3, 0
; GFX6789-NEXT: .LBB6_1: ; %loop
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX6789-NEXT: s_add_i32 s1, s1, -1
; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
; GFX6789-NEXT: ; %bb.2: ; %end
; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
; GFX12-NEXT: s_mov_b32 s3, 0
; GFX12-NEXT: .LBB6_1: ; %loop
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX12-NEXT: s_add_co_i32 s1, s1, -1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %end
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
br label %loop

loop:
%count = phi i32 [ %dec, %loop ], [ %val, %entry ]
%dec = sub i32 %count, 1
%load = load volatile i32, ptr addrspace(6) %gep
%cond = icmp eq i32 %dec, 0
br i1 %cond, label %end, label %loop

end:
ret void
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX678: {{.*}}
; GFX9: {{.*}}
Loading