-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Fix handling of negative scratch offset #122135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: None (macurtis-amd) ChangesFull diff: https://github.com/llvm/llvm-project/pull/122135.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index d9eaf82c521404..27e9018d68a03e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1997,7 +1997,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/bug-neg-scratch-offset.ll b/llvm/test/CodeGen/AMDGPU/bug-neg-scratch-offset.ll
new file mode 100644
index 00000000000000..9141b4fd188ce1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bug-neg-scratch-offset.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+target triple = "amdgcn-amd-amdhsa"
+
+%union.anon.41 = type { [4 x i64] }
+%union.anon.2 = type { i8 }
+
+define fastcc void @_ZN10PrimitivesI12rccl_bfloat810FuncMinMaxIS0_E13FanAsymmetricILi1ELi1EELi1E10ProtoLL128Li0EE9localCopyEPS0_S7_i(i32 %0, i64 %idx.ext62.i.i) {
+entry:
+ %1 = alloca %union.anon.41, i32 0, align 8, addrspace(5)
+ %add.ptr63.i.i3 = getelementptr %union.anon.2, ptr null, i64 %idx.ext62.i.i
+ br label %for.body69.i.i.epil3
+
+for.body69.i.i.epil3: ; preds = %for.body69.i.i.epil3, %entry
+ %i.0117.i.i.epil4 = phi i32 [ %inc.i.i.7.epil, %for.body69.i.i.epil3 ], [ %0, %entry ]
+ %conv65.i.i.epil5 = zext i32 %i.0117.i.i.epil4 to i64
+ %arrayidx73.i.i.epil6 = getelementptr [32 x i8], ptr addrspace(5) %1, i32 0, i32 %i.0117.i.i.epil4
+ %add.ptr75.i.i.epil7 = getelementptr i8, ptr %add.ptr63.i.i3, i64 %conv65.i.i.epil5
+ %2 = load <4 x i8>, ptr addrspace(5) %arrayidx73.i.i.epil6, align 8
+ store <4 x i8> %2, ptr %add.ptr75.i.i.epil7, align 1
+ %inc.i.i.3.epil = or disjoint i32 %i.0117.i.i.epil4, 1
+ %conv65.i.i.4.epil = zext i32 %inc.i.i.3.epil to i64
+ %arrayidx73.i.i.4.epil = getelementptr [32 x i8], ptr addrspace(5) %1, i32 0, i32 %inc.i.i.3.epil
+ %add.ptr75.i.i.4.epil = getelementptr i8, ptr %add.ptr63.i.i3, i64 %conv65.i.i.4.epil
+ %3 = load <4 x i8>, ptr addrspace(5) %arrayidx73.i.i.4.epil, align 4
+ store <4 x i8> %3, ptr %add.ptr75.i.i.4.epil, align 1
+ %inc.i.i.7.epil = add nuw i32 %i.0117.i.i.epil4, 1
+ br label %for.body69.i.i.epil3
+
+for.body69.i.i.epil3.for.cond.cleanup68.loopexit.i.i.unr-lcssa_crit_edge: ; No predecessors!
+ %conv65.i.i.epil = zext i32 %inc.i.i.7.epil to i64
+ ret void
+}
+
+; CHECK: SCRATCH_LOAD_DWORD_SVS %{{[0-9]+}}, %{{[0-9]+}}, -1
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" | ||
target triple = "amdgcn-amd-amdhsa" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this two lines are not needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did I travel in time? I thought I saw a very similar PR yesterday.
I'm not. #121978
I'm too slow ... closing. |
No description provided.