Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit ea06ecf

Browse files
author
Marek Olsak
committed
AMDGPU: Remove the s_buffer workaround for GFX9 chips
Summary: I checked the AMD closed source compiler and the workaround is only needed when x3 is emulated as x4, which we don't do in LLVM. SMEM x3 opcodes don't exist, and instead there is a possibility to use x4 with the last component being unused. If the last component is out of buffer bounds and falls on the next 4K page, the hw hangs. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D42756 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324486 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 634098b commit ea06ecf

File tree

3 files changed

+4
-19
lines changed

3 files changed

+4
-19
lines changed

lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -333,14 +333,6 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
333333
return HasMadMixInsts;
334334
}
335335

336-
bool hasSBufferLoadStoreAtomicDwordxN() const {
337-
// Only use the "x1" variants on GFX9 or don't use the buffer variants.
338-
// For x2 and higher variants, if the accessed region spans 2 VM pages and
339-
// the second page is unmapped, the hw hangs.
340-
// TODO: There is one future GFX9 chip that doesn't have this bug.
341-
return getGeneration() != GFX9;
342-
}
343-
344336
bool hasCARRY() const {
345337
return (getGeneration() >= EVERGREEN);
346338
}

lib/Target/AMDGPU/SILoadStoreOptimizer.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -853,9 +853,8 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
853853

854854
continue;
855855
}
856-
if (STM->hasSBufferLoadStoreAtomicDwordxN() &&
857-
(Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
858-
Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM)) {
856+
if (Opc == AMDGPU::S_BUFFER_LOAD_DWORD_IMM ||
857+
Opc == AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM) {
859858
// EltSize is in units of the offset encoding.
860859
CI.InstClass = S_BUFFER_LOAD_IMM;
861860
CI.EltSize = AMDGPU::getSMRDEncodedOffset(*STM, 4);

test/CodeGen/AMDGPU/smrd.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,8 @@ main_body:
217217
; GCN-NEXT: %bb.
218218
; SICI-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1
219219
; SICI-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x7
220-
; VI-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x4
221-
; VI-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1c
222-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
223-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
224-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
225-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
226-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
227-
; GFX9-NEXT: s_buffer_load_dword s{{[0-9]}}
220+
; VIGFX9-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x4
221+
; VIGFX9-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1c
228222
define amdgpu_ps void @smrd_imm_merged(<4 x i32> inreg %desc) #0 {
229223
main_body:
230224
%r1 = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 4)

0 commit comments

Comments
 (0)