Skip to content

Commit 1d7f33a

Browse files
authored
AMDGPU: Use getSignedTargetConstant for ImmOffset in SelectScratchSVAddr(llvm#121978) (llvm#752)
ImmOffset is signed and we will hit an assert with negative ImmOffset when getTargetConstant is used. Cherry-pick 6869425 to amd-mainkine to fix SWDEV-504260 NOTE: We cherry-pick two additional utility functions in SelectionDAG.h to make this cherry-pick possible: getSignedConstant and getSignedTargetConstant
2 parents 9412f74 + 62533f7 commit 1d7f33a

File tree

4 files changed

+672
-2
lines changed

4 files changed

+672
-2
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,11 @@ class SelectionDAG {
680680
bool isTarget = false, bool isOpaque = false);
681681
SDValue getConstant(const APInt &Val, const SDLoc &DL, EVT VT,
682682
bool isTarget = false, bool isOpaque = false);
683-
683+
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isT,
684+
bool isO) {
685+
unsigned Size = VT.getScalarSizeInBits();
686+
return getConstant(APInt(Size, Val, /*isSigned=*/true), DL, VT, isT, isO);
687+
}
684688
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false,
685689
bool IsOpaque = false) {
686690
return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT,
@@ -708,6 +712,10 @@ class SelectionDAG {
708712
bool isOpaque = false) {
709713
return getConstant(Val, DL, VT, true, isOpaque);
710714
}
715+
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT,
716+
bool isOpaque = false) {
717+
return getSignedConstant(Val, DL, VT, true, isOpaque);
718+
}
711719

712720
/// Create a true or false constant of type \p VT using the target's
713721
/// BooleanContent for type \p OpVT.

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
20002000
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
20012001
return false;
20022002
SAddr = SelectSAddrFI(CurDAG, SAddr);
2003-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
2003+
Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i16);
20042004
return true;
20052005
}
20062006

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4000,6 +4000,116 @@ bb:
40004000
ret void
40014001
}
40024002

4003+
define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) {
4004+
; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4005+
; GFX9: ; %bb.0: ; %bb
4006+
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4007+
; GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4008+
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4009+
; GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4010+
; GFX9-NEXT: v_mov_b32_e32 v1, 15
4011+
; GFX9-NEXT: scratch_store_dword v0, v1, off
4012+
; GFX9-NEXT: s_waitcnt vmcnt(0)
4013+
; GFX9-NEXT: s_endpgm
4014+
;
4015+
; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4016+
; GFX10: ; %bb.0: ; %bb
4017+
; GFX10-NEXT: s_add_u32 s0, s0, s5
4018+
; GFX10-NEXT: s_addc_u32 s1, s1, 0
4019+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4020+
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4021+
; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4022+
; GFX10-NEXT: v_mov_b32_e32 v1, 15
4023+
; GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4024+
; GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4025+
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4026+
; GFX10-NEXT: s_endpgm
4027+
;
4028+
; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4029+
; GFX940: ; %bb.0: ; %bb
4030+
; GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4031+
; GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4032+
; GFX940-NEXT: v_mov_b32_e32 v1, 15
4033+
; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4034+
; GFX940-NEXT: s_waitcnt vmcnt(0)
4035+
; GFX940-NEXT: s_endpgm
4036+
;
4037+
; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4038+
; GFX11: ; %bb.0: ; %bb
4039+
; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4040+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4041+
; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4042+
; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4043+
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4044+
; GFX11-NEXT: s_endpgm
4045+
;
4046+
; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4047+
; GFX12: ; %bb.0: ; %bb
4048+
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4049+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4050+
; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4051+
; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4052+
; GFX12-NEXT: s_wait_storecnt 0x0
4053+
; GFX12-NEXT: s_endpgm
4054+
;
4055+
; UNALIGNED_GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4056+
; UNALIGNED_GFX9: ; %bb.0: ; %bb
4057+
; UNALIGNED_GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5
4058+
; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v0, s3, v0
4059+
; UNALIGNED_GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
4060+
; UNALIGNED_GFX9-NEXT: v_add3_u32 v0, s2, v0, -16
4061+
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15
4062+
; UNALIGNED_GFX9-NEXT: scratch_store_dword v0, v1, off
4063+
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
4064+
; UNALIGNED_GFX9-NEXT: s_endpgm
4065+
;
4066+
; UNALIGNED_GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4067+
; UNALIGNED_GFX10: ; %bb.0: ; %bb
4068+
; UNALIGNED_GFX10-NEXT: s_add_u32 s0, s0, s5
4069+
; UNALIGNED_GFX10-NEXT: s_addc_u32 s1, s1, 0
4070+
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
4071+
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
4072+
; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0
4073+
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
4074+
; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v0, s2, v0
4075+
; UNALIGNED_GFX10-NEXT: scratch_store_dword v0, v1, off offset:-16
4076+
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4077+
; UNALIGNED_GFX10-NEXT: s_endpgm
4078+
;
4079+
; UNALIGNED_GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4080+
; UNALIGNED_GFX940: ; %bb.0: ; %bb
4081+
; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v0, s1, v0
4082+
; UNALIGNED_GFX940-NEXT: v_add3_u32 v0, s0, v0, -16
4083+
; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 15
4084+
; UNALIGNED_GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1
4085+
; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
4086+
; UNALIGNED_GFX940-NEXT: s_endpgm
4087+
;
4088+
; UNALIGNED_GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4089+
; UNALIGNED_GFX11: ; %bb.0: ; %bb
4090+
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4091+
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4092+
; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
4093+
; UNALIGNED_GFX11-NEXT: scratch_store_b32 v0, v1, off offset:-16 dlc
4094+
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
4095+
; UNALIGNED_GFX11-NEXT: s_endpgm
4096+
;
4097+
; UNALIGNED_GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_negative_imm_offset:
4098+
; UNALIGNED_GFX12: ; %bb.0: ; %bb
4099+
; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0
4100+
; UNALIGNED_GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
4101+
; UNALIGNED_GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0
4102+
; UNALIGNED_GFX12-NEXT: scratch_store_b32 v0, v1, off offset:-16 scope:SCOPE_SYS
4103+
; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0
4104+
; UNALIGNED_GFX12-NEXT: s_endpgm
4105+
bb:
4106+
%add1 = add nsw i32 %sidx, %vidx
4107+
%add2 = add nsw i32 %add1, -16
4108+
%gep = getelementptr inbounds [16 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2
4109+
store volatile i32 15, ptr addrspace(5) %gep, align 4
4110+
ret void
4111+
}
4112+
40034113
define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) {
40044114
; GFX9-LABEL: sgpr_base_negative_offset:
40054115
; GFX9: ; %bb.0: ; %entry

0 commit comments

Comments
 (0)