Skip to content

Commit 7f25f52

Browse files
author
SahilPatidar
committed
amdgpu_gfx functions do not use s0-s3 for inreg SGPR arguments on targets using scratch instructions for stack #78226
1 parent fcb6737 commit 7f25f52

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -715,10 +715,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
715715
if (!IsEntryFunc && !IsGraphics) {
716716
// For the fixed ABI, pass workitem IDs in the last argument register.
717717
TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
718-
719-
if (!Subtarget.enableFlatScratch())
720-
CCInfo.AllocateReg(Info->getScratchRSrcReg());
721-
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
722718
}
723719

724720
IncomingValueAssigner Assigner(AssignFn);
@@ -732,9 +728,14 @@ bool AMDGPUCallLowering::lowerFormalArguments(
732728
uint64_t StackSize = Assigner.StackSize;
733729

734730
// Start adding system SGPRs.
735-
if (IsEntryFunc)
731+
if (IsEntryFunc) {
736732
TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics);
737-
733+
} else {
734+
if (!Subtarget.enableFlatScratch())
735+
CCInfo.AllocateReg(Info->getScratchRSrcReg());
736+
if (!IsGraphics)
737+
TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
738+
}
738739
// When we tail call, we need to check if the callee's arguments will fit on
739740
// the caller's stack. So, whenever we lower formal arguments, we should keep
740741
// track of this information, since we might lower a tail call in this

llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def CC_SI_Gfx : CallingConv<[
2323
// 33 is reserved for the frame pointer
2424
// 34 is reserved for the base pointer
2525
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
26+
SGPR0, SGPR1, SGPR2, SGPR3,
2627
SGPR4, SGPR5, SGPR6, SGPR7,
2728
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
2829
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,12 +2784,6 @@ SDValue SITargetLowering::LowerFormalArguments(
27842784
} else if (!IsGraphics) {
27852785
// For the fixed ABI, pass workitem IDs in the last argument register.
27862786
allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
2787-
2788-
// FIXME: Sink this into allocateSpecialInputSGPRs
2789-
if (!Subtarget->enableFlatScratch())
2790-
CCInfo.AllocateReg(Info->getScratchRSrcReg());
2791-
2792-
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
27932787
}
27942788

27952789
if (!IsKernel) {
@@ -2993,8 +2987,14 @@ SDValue SITargetLowering::LowerFormalArguments(
29932987
}
29942988

29952989
// Start adding system SGPRs.
2996-
if (IsEntryFunc)
2990+
if (IsEntryFunc) {
29972991
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
2992+
} else {
2993+
if (!Subtarget->enableFlatScratch())
2994+
CCInfo.AllocateReg(Info->getScratchRSrcReg());
2995+
if (!IsGraphics)
2996+
allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
2997+
}
29982998

29992999
auto &ArgUsageInfo =
30003000
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();

0 commit comments

Comments
 (0)