Skip to content

Commit c32a256

Browse files
jayfoadAlexisPerry
authored andcommitted
[AMDGPU] Disallow negative s_load offsets in isLegalAddressingMode (llvm#91327)
1 parent ce5090d commit c32a256

File tree

2 files changed

+72
-27
lines changed

2 files changed

+72
-27
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
16101610
return false;
16111611
}
16121612

1613+
if ((AS == AMDGPUAS::CONSTANT_ADDRESS ||
1614+
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
1615+
AM.BaseOffs < 0) {
1616+
// Scalar (non-buffer) loads can only use a negative offset if
1617+
// soffset+offset is non-negative. Since the compiler can only prove that
1618+
// in a few special cases, it is safer to claim that negative offsets are
1619+
// not supported.
1620+
return false;
1621+
}
1622+
16131623
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
16141624
return true;
16151625

llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -279,33 +279,19 @@ end:
279279
}
280280

281281
define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
282-
; GFX678-LABEL: test_sink_smem_offset_neg400:
283-
; GFX678: ; %bb.0: ; %entry
284-
; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
285-
; GFX678-NEXT: s_addc_u32 s1, s1, -1
286-
; GFX678-NEXT: .LBB5_1: ; %loop
287-
; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
288-
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
289-
; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
290-
; GFX678-NEXT: s_add_i32 s2, s2, -1
291-
; GFX678-NEXT: s_cmp_lg_u32 s2, 0
292-
; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
293-
; GFX678-NEXT: ; %bb.2: ; %end
294-
; GFX678-NEXT: s_endpgm
295-
;
296-
; GFX9-LABEL: test_sink_smem_offset_neg400:
297-
; GFX9: ; %bb.0: ; %entry
298-
; GFX9-NEXT: .LBB5_1: ; %loop
299-
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
300-
; GFX9-NEXT: s_add_i32 s2, s2, -1
301-
; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
302-
; GFX9-NEXT: s_addc_u32 s5, s1, -1
303-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
304-
; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
305-
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
306-
; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
307-
; GFX9-NEXT: ; %bb.2: ; %end
308-
; GFX9-NEXT: s_endpgm
282+
; GFX6789-LABEL: test_sink_smem_offset_neg400:
283+
; GFX6789: ; %bb.0: ; %entry
284+
; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
285+
; GFX6789-NEXT: s_addc_u32 s1, s1, -1
286+
; GFX6789-NEXT: .LBB5_1: ; %loop
287+
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
288+
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
289+
; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
290+
; GFX6789-NEXT: s_add_i32 s2, s2, -1
291+
; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
292+
; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
293+
; GFX6789-NEXT: ; %bb.2: ; %end
294+
; GFX6789-NEXT: s_endpgm
309295
;
310296
; GFX12-LABEL: test_sink_smem_offset_neg400:
311297
; GFX12: ; %bb.0: ; %entry
@@ -337,3 +323,52 @@ loop:
337323
end:
338324
ret void
339325
}
326+
327+
; Same for address space 6, constant 32-bit.
328+
define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
329+
; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
330+
; GFX6789: ; %bb.0: ; %entry
331+
; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
332+
; GFX6789-NEXT: s_mov_b32 s3, 0
333+
; GFX6789-NEXT: .LBB6_1: ; %loop
334+
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
335+
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
336+
; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
337+
; GFX6789-NEXT: s_add_i32 s1, s1, -1
338+
; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
339+
; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
340+
; GFX6789-NEXT: ; %bb.2: ; %end
341+
; GFX6789-NEXT: s_endpgm
342+
;
343+
; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
344+
; GFX12: ; %bb.0: ; %entry
345+
; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
346+
; GFX12-NEXT: s_mov_b32 s3, 0
347+
; GFX12-NEXT: .LBB6_1: ; %loop
348+
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
349+
; GFX12-NEXT: s_wait_kmcnt 0x0
350+
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
351+
; GFX12-NEXT: s_add_co_i32 s1, s1, -1
352+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
353+
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
354+
; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
355+
; GFX12-NEXT: ; %bb.2: ; %end
356+
; GFX12-NEXT: s_endpgm
357+
entry:
358+
%gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
359+
br label %loop
360+
361+
loop:
362+
%count = phi i32 [ %dec, %loop ], [ %val, %entry ]
363+
%dec = sub i32 %count, 1
364+
%load = load volatile i32, ptr addrspace(6) %gep
365+
%cond = icmp eq i32 %dec, 0
366+
br i1 %cond, label %end, label %loop
367+
368+
end:
369+
ret void
370+
}
371+
372+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
373+
; GFX678: {{.*}}
374+
; GFX9: {{.*}}

0 commit comments

Comments
 (0)