Skip to content

Commit dd3d8e4

Browse files
petar-avramovicllvmbot
authored andcommitted
AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)
Use i32 for offset instead of i16, this way it does not get interpreted as negative 16 bit offset. (cherry picked from commit 83fe851)
1 parent 4477e7b commit dd3d8e4

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
19111911
0);
19121912
}
19131913

1914-
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1914+
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
19151915

19161916
return true;
19171917
}
@@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
19671967
return false;
19681968
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
19691969
return false;
1970-
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1970+
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
19711971
return true;
19721972
}
19731973
}
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
20002000
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
20012001
return false;
20022002
SAddr = SelectSAddrFI(CurDAG, SAddr);
2003-
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
2003+
Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
20042004
return true;
20052005
}
20062006

llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
49564956
;
49574957
; GFX12-LABEL: sgpr_base_large_offset:
49584958
; GFX12: ; %bb.0: ; %entry
4959-
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24
4959+
; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512
49604960
; GFX12-NEXT: s_wait_loadcnt 0x0
49614961
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
49624962
; GFX12-NEXT: s_nop 0
@@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspa
50155015
;
50165016
; GFX12-PAL-LABEL: sgpr_base_large_offset:
50175017
; GFX12-PAL: ; %bb.0: ; %entry
5018-
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24
5018+
; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512
50195019
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
50205020
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
50215021
; GFX12-PAL-NEXT: s_nop 0
@@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
50685068
; GFX12: ; %bb.0: ; %entry
50695069
; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000
50705070
; GFX12-NEXT: s_and_b32 s0, s0, -4
5071-
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
5071+
; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
50725072
; GFX12-NEXT: s_wait_loadcnt 0x0
50735073
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
50745074
; GFX12-NEXT: s_nop 0
@@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr a
51335133
; GFX12-PAL: ; %bb.0: ; %entry
51345134
; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000
51355135
; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4
5136-
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
5136+
; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
51375137
; GFX12-PAL-NEXT: s_wait_loadcnt 0x0
51385138
; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off
51395139
; GFX12-PAL-NEXT: s_nop 0
@@ -5189,7 +5189,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
51895189
; GFX12: ; %bb.0: ; %bb
51905190
; GFX12-NEXT: v_mov_b32_e32 v1, 15
51915191
; GFX12-NEXT: s_add_co_i32 s0, s0, s1
5192-
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
5192+
; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
51935193
; GFX12-NEXT: s_wait_storecnt 0x0
51945194
; GFX12-NEXT: s_endpgm
51955195
;
@@ -5251,7 +5251,7 @@ define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
52515251
; GFX12-PAL: ; %bb.0: ; %bb
52525252
; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15
52535253
; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1
5254-
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
5254+
; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
52555255
; GFX12-PAL-NEXT: s_wait_storecnt 0x0
52565256
; GFX12-PAL-NEXT: s_endpgm
52575257
bb:

0 commit comments

Comments
 (0)