Skip to content

Commit 7b2bcdb

Browse files
author
SahilPatidar
committed
amdgpu_gfx functions do not use s0-s3 for inreg SGPR arguments on targets using scratch instructions for stack #78226
1 parent f410f74 commit 7b2bcdb

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -715,10 +715,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
715715
if (!IsEntryFunc && !IsGraphics) {
716716
// For the fixed ABI, pass workitem IDs in the last argument register.
717717
TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
718-
719-
if (!Subtarget.enableFlatScratch())
720-
CCInfo.AllocateReg(Info->getScratchRSrcReg());
721-
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
722718
}
723719

724720
IncomingValueAssigner Assigner(AssignFn);
@@ -732,9 +728,14 @@ bool AMDGPUCallLowering::lowerFormalArguments(
732728
uint64_t StackSize = Assigner.StackSize;
733729

734730
// Start adding system SGPRs.
735-
if (IsEntryFunc)
731+
if (IsEntryFunc) {
736732
TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
737-
733+
} else {
734+
if (!Subtarget.enableFlatScratch())
735+
CCInfo.AllocateReg(Info->getScratchRSrcReg());
736+
if (!IsGraphics)
737+
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
738+
}
738739
// When we tail call, we need to check if the callee's arguments will fit on
739740
// the caller's stack. So, whenever we lower formal arguments, we should keep
740741
// track of this information, since we might lower a tail call in this

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def CC_SI_Gfx : CallingConv<[
2323
// 33 is reserved for the frame pointer
2424
// 34 is reserved for the base pointer
2525
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
26+
SGPR0, SGPR1, SGPR2, SGPR3,
2627
SGPR4, SGPR5, SGPR6, SGPR7,
2728
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
2829
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,12 +2837,6 @@ SDValue SITargetLowering::LowerFormalArguments(
28372837
} else if (!IsGraphics) {
28382838
// For the fixed ABI, pass workitem IDs in the last argument register.
28392839
allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
2840-
2841-
// FIXME: Sink this into allocateSpecialInputSGPRs
2842-
if (!Subtarget->enableFlatScratch())
2843-
CCInfo.AllocateReg(Info->getScratchRSrcReg());
2844-
2845-
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
28462840
}
28472841

28482842
if (!IsKernel) {
@@ -3046,8 +3040,14 @@ SDValue SITargetLowering::LowerFormalArguments(
30463040
}
30473041

30483042
// Start adding system SGPRs.
3049-
if (IsEntryFunc)
3043+
if (IsEntryFunc) {
30503044
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
3045+
} else {
3046+
if (!Subtarget->enableFlatScratch())
3047+
CCInfo.AllocateReg(Info->getScratchRSrcReg());
3048+
if (!IsGraphics)
3049+
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
3050+
}
30513051

30523052
auto &ArgUsageInfo =
30533053
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();

0 commit comments

Comments
 (0)