Skip to content

Commit 690f5b7

Browse files
committed
[AMDGPU] Fix function calls with flat scratch
When flat scratch is used, the stack pointer needs to be added when writing arguments to the stack. For buffer instructions, this is done in SelectMUBUFScratchOffen and SelectMUBUFScratchOffset. Move that to call argument lowering, like it is done in GlobalISel. Differential Revision: https://reviews.llvm.org/D103166
1 parent 6133b60 commit 690f5b7

8 files changed

+140
-201
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,11 +1497,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
14971497
return false;
14981498
}
14991499

1500-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1501-
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1502-
return PSV && PSV->isStack();
1503-
}
1504-
15051500
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
15061501
SDLoc DL(N);
15071502

@@ -1538,13 +1533,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
15381533
AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
15391534
VAddr = SDValue(MovHighBits, 0);
15401535

1541-
// In a call sequence, stores to the argument stack area are relative to the
1542-
// stack pointer.
1543-
const MachinePointerInfo &PtrInfo
1544-
= cast<MemSDNode>(Parent)->getPointerInfo();
1545-
SOffset = isStackPtrRelative(PtrInfo)
1546-
? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1547-
: CurDAG->getTargetConstant(0, DL, MVT::i32);
1536+
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
15481537
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
15491538
return true;
15501539
}
@@ -1587,28 +1576,52 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
15871576
return true;
15881577
}
15891578

1579+
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1580+
if (Val.getOpcode() != ISD::CopyFromReg)
1581+
return false;
1582+
auto RC =
1583+
TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1584+
return RC && TRI.isSGPRClass(RC);
1585+
}
1586+
15901587
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15911588
SDValue Addr,
15921589
SDValue &SRsrc,
15931590
SDValue &SOffset,
15941591
SDValue &Offset) const {
1595-
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1596-
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1597-
return false;
1598-
1599-
SDLoc DL(Addr);
1592+
const SIRegisterInfo *TRI =
1593+
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
16001594
MachineFunction &MF = CurDAG->getMachineFunction();
16011595
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1596+
SDLoc DL(Addr);
16021597

1603-
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1598+
// CopyFromReg <sgpr>
1599+
if (IsCopyFromSGPR(*TRI, Addr)) {
1600+
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1601+
SOffset = Addr;
1602+
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1603+
return true;
1604+
}
16041605

1605-
const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1606+
ConstantSDNode *CAddr;
1607+
if (Addr.getOpcode() == ISD::ADD) {
1608+
// Add (CopyFromReg <sgpr>) <constant>
1609+
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1610+
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1611+
return false;
1612+
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1613+
return false;
16061614

1607-
// FIXME: Get from MachinePointerInfo? We should only be using the frame
1608-
// offset if we know this is in a call sequence.
1609-
SOffset = isStackPtrRelative(PtrInfo)
1610-
? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1611-
: CurDAG->getTargetConstant(0, DL, MVT::i32);
1615+
SOffset = Addr.getOperand(0);
1616+
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1617+
SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1618+
// <constant>
1619+
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1620+
} else {
1621+
return false;
1622+
}
1623+
1624+
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
16121625

16131626
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
16141627
return true;
@@ -1890,19 +1903,21 @@ static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
18901903
}
18911904

18921905
// Match (32-bit SGPR base) + sext(imm offset)
1893-
bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1894-
SDValue Addr,
1906+
bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
18951907
SDValue &SAddr,
18961908
SDValue &Offset) const {
18971909
if (Addr->isDivergent())
18981910
return false;
18991911

1900-
SAddr = Addr;
1912+
SDLoc DL(Addr);
1913+
19011914
int64_t COffsetVal = 0;
19021915

19031916
if (CurDAG->isBaseWithConstantOffset(Addr)) {
19041917
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
19051918
SAddr = Addr.getOperand(0);
1919+
} else {
1920+
SAddr = Addr;
19061921
}
19071922

19081923
SAddr = SelectSAddrFI(CurDAG, SAddr);
@@ -1917,14 +1932,15 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
19171932

19181933
COffsetVal = SplitImmOffset;
19191934

1920-
SDLoc DL(N);
19211935
SDValue AddOffset =
1922-
getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1936+
SAddr.getOpcode() == ISD::TargetFrameIndex
1937+
? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1938+
: CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
19231939
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
19241940
SAddr, AddOffset), 0);
19251941
}
19261942

1927-
Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1943+
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
19281944

19291945
return true;
19301946
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4156,8 +4156,13 @@ SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
41564156
int64_t Offset) const {
41574157
MachineFunction &MF = DAG.getMachineFunction();
41584158
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
4159+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
41594160

41604161
SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
4162+
// Stores to the argument stack area are relative to the stack pointer.
4163+
SDValue SP =
4164+
DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
4165+
Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
41614166
SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
41624167
MachineMemOperand::MODereferenceable);
41634168
return Store;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3692,11 +3692,6 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
36923692
}};
36933693
}
36943694

3695-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
3696-
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
3697-
return PSV && PSV->isStack();
3698-
}
3699-
37003695
InstructionSelector::ComplexRendererFns
37013696
AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
37023697
MachineInstr *MI = Root.getParent();
@@ -3818,18 +3813,13 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
38183813

38193814
const MachineFunction *MF = MBB->getParent();
38203815
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
3821-
const MachineMemOperand *MMO = *MI->memoperands_begin();
3822-
const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
38233816

38243817
return {{
38253818
[=](MachineInstrBuilder &MIB) { // rsrc
38263819
MIB.addReg(Info->getScratchRSrcReg());
38273820
},
38283821
[=](MachineInstrBuilder &MIB) { // soffset
3829-
if (isStackPtrRelative(PtrInfo))
3830-
MIB.addReg(Info->getStackPtrOffsetReg());
3831-
else
3832-
MIB.addImm(0);
3822+
MIB.addImm(0);
38333823
},
38343824
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
38353825
}};

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
31233123
// locations, which are supposed to be immutable?
31243124
Chain = addTokenForArgument(Chain, DAG, MFI, FI);
31253125
} else {
3126-
DstAddr = PtrOff;
3126+
// Stores to the argument stack area are relative to the stack pointer.
3127+
SDValue SP = DAG.getCopyFromReg(Chain, DL, Info->getStackPtrOffsetReg(),
3128+
MVT::i32);
3129+
DstAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, SP, PtrOff);
31273130
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
31283131
Alignment =
31293132
commonAlignment(Subtarget->getStackAlignment(), LocMemOffset);

llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,11 +494,11 @@ define void @too_many_args_use_workitem_id_x_byval(
494494

495495
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
496496
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
497-
; GCN-DAG: s_movk_i32 s32, 0x400
498497

499498
; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
500-
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
501499
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
500+
; GCN: s_movk_i32 s32, 0x400
501+
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
502502

503503
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
504504
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],

llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,10 +609,10 @@ define void @too_many_args_use_workitem_id_x_byval(
609609
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
610610
; VARABI: enable_vgpr_workitem_id = 0
611611
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
612-
; VARABI: s_movk_i32 s32, 0x400{{$}}
613612
; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
614-
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
615613
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
614+
; VARABI: s_movk_i32 s32, 0x400{{$}}
615+
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
616616

617617
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
618618
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
@@ -656,8 +656,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
656656
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
657657
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
658658
; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
659-
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
660659
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
660+
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
661661
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
662662
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
663663
; VARABI: s_swappc_b64

0 commit comments

Comments
 (0)