Skip to content

Commit 83fe851

Browse files
AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)
Use i32 for offset instead of i16, this way it does not get interpreted as negative 16 bit offset.
1 parent 93eaa99 commit 83fe851

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
19111911
0);
19121912
}
19131913

1914-
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1914+
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
19151915

19161916
return true;
19171917
}
@@ -1966,7 +1966,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
19661966
return false;
19671967
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
19681968
return false;
1969-
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1969+
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
19701970
return true;
19711971
}
19721972
}
@@ -1999,7 +1999,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
19991999
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
20002000
return false;
20012001
SAddr = SelectSAddrFI(CurDAG, SAddr);
2002-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
2002+
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
20032003
return true;
20042004
}
20052005

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4926,7 +4926,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
49264926
;
49274927
; GFX12-LABEL: sgpr_base_large_offset:
49284928
; GFX12: ; %bb.0: ; %entry
4929-
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4929+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
49304930
; GFX12-NEXT: s_wait_loadcnt 0x0
49314931
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
49324932
; GFX12-NEXT: s_nop 0
@@ -4985,7 +4985,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
49854985
;
49864986
; GFX12-PAL-LABEL: sgpr_base_large_offset:
49874987
; GFX12-PAL: ; %bb.0: ; %entry
4988-
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4988+
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512
49894989
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
49904990
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
49914991
; GFX12-PAL-NEXT: s_nop 0
@@ -5038,7 +5038,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
50385038
; GFX12: ; %bb.0: ; %entry
50395039
; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000
50405040
; GFX12-NEXT: s_and_b32 s0, s0, -4
5041-
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
5041+
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
50425042
; GFX12-NEXT: s_wait_loadcnt 0x0
50435043
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
50445044
; GFX12-NEXT: s_nop 0
@@ -5103,7 +5103,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
51035103
; GFX12-PAL: ; %bb.0: ; %entry
51045104
; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000
51055105
; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4
5106-
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
5106+
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
51075107
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
51085108
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
51095109
; GFX12-PAL-NEXT: s_nop 0
@@ -5159,7 +5159,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
51595159
; GFX12: ; %bb.0: ; %bb
51605160
; GFX12-NEXT: v_mov_b32_e32 v1, 15
51615161
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
5162-
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
5162+
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
51635163
; GFX12-NEXT: s_wait_storecnt 0x0
51645164
; GFX12-NEXT: s_endpgm
51655165
;
@@ -5221,7 +5221,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
52215221
; GFX12-PAL: ; %bb.0: ; %bb
52225222
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
52235223
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
5224-
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
5224+
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
52255225
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
52265226
; GFX12-PAL-NEXT: s_endpgm
52275227
bb:

0 commit comments

Comments
 (0)