Skip to content

Commit 6869425

Browse files
authored
AMDGPU: Use getSignedTargetConstant for ImmOffset in SelectScratchSVAddr (#121978)
ImmOffset is signed and we will hit an assert with negative ImmOffset when getTargetConstant is used. Fixes: SWDEV-506453
1 parent 666eee0 commit 6869425

File tree

3 files changed

+219
-1
lines changed

3 files changed

+219
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1997,7 +1997,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
19971997
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
19981998
return false;
19991999
SAddr = SelectSAddrFI(CurDAG, SAddr);
2000-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2000+
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
20012001
return true;
20022002
}
20032003

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,6 +3990,116 @@ bb:
39903990
ret void
39913991
}
39923992

3993+
define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
3994+
; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
3995+
; GFX9: ; %bb.0: ; %bb
3996+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
3997+
; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
3998+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
3999+
; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4000+
; GFX9-NEXT: v_mov_b32_e32 v1, 15
4001+
; GFX9-NEXT: scratch_store_dword v0, v1, off
4002+
; GFX9-NEXT: s_waitcnt vmcnt(0)
4003+
; GFX9-NEXT: s_endpgm
4004+
;
4005+
; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4006+
; GFX10: ; %bb.0: ; %bb
4007+
; GFX10-NEXT: s_add_u32 s0, s0, s5
4008+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
4009+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4010+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4011+
; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4012+
; GFX10-NEXT: v_mov_b32_e32 v1, 15
4013+
; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4014+
; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4015+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4016+
; GFX10-NEXT: s_endpgm
4017+
;
4018+
; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4019+
; GFX940: ; %bb.0: ; %bb
4020+
; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4021+
; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4022+
; GFX940-NEXT: v_mov_b32_e32 v1, 15
4023+
; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4024+
; GFX940-NEXT: s_waitcnt vmcnt(0)
4025+
; GFX940-NEXT: s_endpgm
4026+
;
4027+
; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4028+
; GFX11: ; %bb.0: ; %bb
4029+
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4030+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4031+
; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4032+
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4033+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4034+
; GFX11-NEXT: s_endpgm
4035+
;
4036+
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4037+
; GFX12: ; %bb.0: ; %bb
4038+
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4039+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4040+
; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4041+
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4042+
; GFX12-NEXT: s_wait_storecnt 0x0
4043+
; GFX12-NEXT: s_endpgm
4044+
;
4045+
; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4046+
; UNALIGNED_GFX9: ; %bb.0: ; %bb
4047+
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4048+
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4049+
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4050+
; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4051+
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4052+
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4053+
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4054+
; UNALIGNED_GFX9-NEXT: s_endpgm
4055+
;
4056+
; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4057+
; UNALIGNED_GFX10: ; %bb.0: ; %bb
4058+
; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4059+
; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4060+
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4061+
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4062+
; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4063+
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4064+
; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4065+
; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4066+
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4067+
; UNALIGNED_GFX10-NEXT: s_endpgm
4068+
;
4069+
; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4070+
; UNALIGNED_GFX940: ; %bb.0: ; %bb
4071+
; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4072+
; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4073+
; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4074+
; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4075+
; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4076+
; UNALIGNED_GFX940-NEXT: s_endpgm
4077+
;
4078+
; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4079+
; UNALIGNED_GFX11: ; %bb.0: ; %bb
4080+
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4081+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4082+
; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4083+
; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4084+
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4085+
; UNALIGNED_GFX11-NEXT: s_endpgm
4086+
;
4087+
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4088+
; UNALIGNED_GFX12: ; %bb.0: ; %bb
4089+
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4090+
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4091+
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4092+
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4093+
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4094+
; UNALIGNED_GFX12-NEXT: s_endpgm
4095+
bb:
4096+
%add1 = add nsw i32 %sidx, %vidx
4097+
%add2 = add nsw i32 %add1, -16
4098+
%gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
4099+
store volatile i32 15, ptr addrspace(5) %gep, align 4
4100+
ret void
4101+
}
4102+
39934103
define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
39944104
; GFX9-LABEL: sgpr_base_negative_offset:
39954105
; GFX9: ; %bb.0: ; %entry

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5249,6 +5249,114 @@ bb:
52495249
ret void
52505250
}
52515251

5252+
define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
5253+
; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5254+
; GFX9: ; %bb.0: ; %bb
5255+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
5256+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
5257+
; GFX9-NEXT: s_add_i32 s2, s2, s3
5258+
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
5259+
; GFX9-NEXT: v_add_u32_e32 v0, -16, v0
5260+
; GFX9-NEXT: v_mov_b32_e32 v1, 15
5261+
; GFX9-NEXT: scratch_store_dword v0, v1, off
5262+
; GFX9-NEXT: s_waitcnt vmcnt(0)
5263+
; GFX9-NEXT: s_endpgm
5264+
;
5265+
; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5266+
; GFX10: ; %bb.0: ; %bb
5267+
; GFX10-NEXT: s_add_u32 s0, s0, s5
5268+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
5269+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
5270+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
5271+
; GFX10-NEXT: v_add3_u32 v0, s2, s3, v0
5272+
; GFX10-NEXT: v_mov_b32_e32 v1, 15
5273+
; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
5274+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5275+
; GFX10-NEXT: s_endpgm
5276+
;
5277+
; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5278+
; GFX11: ; %bb.0: ; %bb
5279+
; GFX11-NEXT: v_add3_u32 v0, s0, s1, v0
5280+
; GFX11-NEXT: v_mov_b32_e32 v1, 15
5281+
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
5282+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5283+
; GFX11-NEXT: s_endpgm
5284+
;
5285+
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5286+
; GFX12: ; %bb.0: ; %bb
5287+
; GFX12-NEXT: v_mov_b32_e32 v1, 15
5288+
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
5289+
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
5290+
; GFX12-NEXT: s_wait_storecnt 0x0
5291+
; GFX12-NEXT: s_endpgm
5292+
;
5293+
; GFX9-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5294+
; GFX9-PAL: ; %bb.0: ; %bb
5295+
; GFX9-PAL-NEXT: s_getpc_b64 s[2:3]
5296+
; GFX9-PAL-NEXT: s_mov_b32 s2, s8
5297+
; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5298+
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 15
5299+
; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0)
5300+
; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5301+
; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5
5302+
; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
5303+
; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1
5304+
; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0
5305+
; GFX9-PAL-NEXT: v_add_u32_e32 v0, -16, v0
5306+
; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off
5307+
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
5308+
; GFX9-PAL-NEXT: s_endpgm
5309+
;
5310+
; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5311+
; GFX940: ; %bb.0: ; %bb
5312+
; GFX940-NEXT: s_add_i32 s0, s0, s1
5313+
; GFX940-NEXT: v_add_u32_e32 v0, s0, v0
5314+
; GFX940-NEXT: v_add_u32_e32 v0, -16, v0
5315+
; GFX940-NEXT: v_mov_b32_e32 v1, 15
5316+
; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
5317+
; GFX940-NEXT: s_waitcnt vmcnt(0)
5318+
; GFX940-NEXT: s_endpgm
5319+
;
5320+
; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5321+
; GFX10-PAL: ; %bb.0: ; %bb
5322+
; GFX10-PAL-NEXT: s_getpc_b64 s[2:3]
5323+
; GFX10-PAL-NEXT: s_mov_b32 s2, s8
5324+
; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
5325+
; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0)
5326+
; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff
5327+
; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5
5328+
; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0
5329+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
5330+
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
5331+
; GFX10-PAL-NEXT: v_add3_u32 v0, s0, s1, v0
5332+
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
5333+
; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off offset:-16
5334+
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
5335+
; GFX10-PAL-NEXT: s_endpgm
5336+
;
5337+
; GFX11-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5338+
; GFX11-PAL: ; %bb.0: ; %bb
5339+
; GFX11-PAL-NEXT: v_add3_u32 v0, s0, s1, v0
5340+
; GFX11-PAL-NEXT: v_mov_b32_e32 v1, 15
5341+
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
5342+
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
5343+
; GFX11-PAL-NEXT: s_endpgm
5344+
;
5345+
; GFX12-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
5346+
; GFX12-PAL: ; %bb.0: ; %bb
5347+
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
5348+
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
5349+
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-16 scope:SCOPE_SYS
5350+
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
5351+
; GFX12-PAL-NEXT: s_endpgm
5352+
bb:
5353+
%add1 = add nsw i32 %sidx, %vidx
5354+
%add2 = add nsw i32 %add1, -16
5355+
%gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
5356+
store volatile i32 15, ptr addrspace(5) %gep, align 4
5357+
ret void
5358+
}
5359+
52525360
define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
52535361
; GFX9-LABEL: sgpr_base_negative_offset:
52545362
; GFX9: ; %bb.0: ; %entry

0 commit comments

Comments
 (0)