Skip to content

Commit 0486129

Browse files
author
git apple-llvm automerger
committed
Merge commit '4fc5b01ca123' from apple/main into swift/next
2 parents 2118cc3 + 4fc5b01 commit 0486129

8 files changed

+140
-201
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1497,11 +1497,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
14971497
return false;
14981498
}
14991499

1500-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1501-
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1502-
return PSV && PSV->isStack();
1503-
}
1504-
15051500
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
15061501
SDLoc DL(N);
15071502

@@ -1538,13 +1533,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
15381533
AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
15391534
VAddr = SDValue(MovHighBits, 0);
15401535

1541-
// In a call sequence, stores to the argument stack area are relative to the
1542-
// stack pointer.
1543-
const MachinePointerInfo &PtrInfo
1544-
= cast<MemSDNode>(Parent)->getPointerInfo();
1545-
SOffset = isStackPtrRelative(PtrInfo)
1546-
? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1547-
: CurDAG->getTargetConstant(0, DL, MVT::i32);
1536+
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
15481537
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
15491538
return true;
15501539
}
@@ -1587,28 +1576,52 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
15871576
return true;
15881577
}
15891578

1579+
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1580+
if (Val.getOpcode() != ISD::CopyFromReg)
1581+
return false;
1582+
auto RC =
1583+
TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1584+
return RC && TRI.isSGPRClass(RC);
1585+
}
1586+
15901587
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15911588
SDValue Addr,
15921589
SDValue &SRsrc,
15931590
SDValue &SOffset,
15941591
SDValue &Offset) const {
1595-
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1596-
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1597-
return false;
1598-
1599-
SDLoc DL(Addr);
1592+
const SIRegisterInfo *TRI =
1593+
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
16001594
MachineFunction &MF = CurDAG->getMachineFunction();
16011595
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1596+
SDLoc DL(Addr);
16021597

1603-
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1598+
// CopyFromReg <sgpr>
1599+
if (IsCopyFromSGPR(*TRI, Addr)) {
1600+
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1601+
SOffset = Addr;
1602+
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1603+
return true;
1604+
}
16041605

1605-
const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1606+
ConstantSDNode *CAddr;
1607+
if (Addr.getOpcode() == ISD::ADD) {
1608+
// Add (CopyFromReg <sgpr>) <constant>
1609+
CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1610+
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1611+
return false;
1612+
if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1613+
return false;
16061614

1607-
// FIXME: Get from MachinePointerInfo? We should only be using the frame
1608-
// offset if we know this is in a call sequence.
1609-
SOffset = isStackPtrRelative(PtrInfo)
1610-
? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
1611-
: CurDAG->getTargetConstant(0, DL, MVT::i32);
1615+
SOffset = Addr.getOperand(0);
1616+
} else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1617+
SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1618+
// <constant>
1619+
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1620+
} else {
1621+
return false;
1622+
}
1623+
1624+
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
16121625

16131626
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
16141627
return true;
@@ -1890,19 +1903,21 @@ static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
18901903
}
18911904

18921905
// Match (32-bit SGPR base) + sext(imm offset)
1893-
bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
1894-
SDValue Addr,
1906+
bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
18951907
SDValue &SAddr,
18961908
SDValue &Offset) const {
18971909
if (Addr->isDivergent())
18981910
return false;
18991911

1900-
SAddr = Addr;
1912+
SDLoc DL(Addr);
1913+
19011914
int64_t COffsetVal = 0;
19021915

19031916
if (CurDAG->isBaseWithConstantOffset(Addr)) {
19041917
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
19051918
SAddr = Addr.getOperand(0);
1919+
} else {
1920+
SAddr = Addr;
19061921
}
19071922

19081923
SAddr = SelectSAddrFI(CurDAG, SAddr);
@@ -1917,14 +1932,15 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *N,
19171932

19181933
COffsetVal = SplitImmOffset;
19191934

1920-
SDLoc DL(N);
19211935
SDValue AddOffset =
1922-
getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1936+
SAddr.getOpcode() == ISD::TargetFrameIndex
1937+
? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1938+
: CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
19231939
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_U32, DL, MVT::i32,
19241940
SAddr, AddOffset), 0);
19251941
}
19261942

1927-
Offset = CurDAG->getTargetConstant(COffsetVal, SDLoc(), MVT::i16);
1943+
Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
19281944

19291945
return true;
19301946
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4156,8 +4156,13 @@ SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
41564156
int64_t Offset) const {
41574157
MachineFunction &MF = DAG.getMachineFunction();
41584158
MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
4159+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
41594160

41604161
SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
4162+
// Stores to the argument stack area are relative to the stack pointer.
4163+
SDValue SP =
4164+
DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
4165+
Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
41614166
SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
41624167
MachineMemOperand::MODereferenceable);
41634168
return Store;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3692,11 +3692,6 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
36923692
}};
36933693
}
36943694

3695-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
3696-
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
3697-
return PSV && PSV->isStack();
3698-
}
3699-
37003695
InstructionSelector::ComplexRendererFns
37013696
AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
37023697
MachineInstr *MI = Root.getParent();
@@ -3818,18 +3813,13 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
38183813

38193814
const MachineFunction *MF = MBB->getParent();
38203815
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
3821-
const MachineMemOperand *MMO = *MI->memoperands_begin();
3822-
const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
38233816

38243817
return {{
38253818
[=](MachineInstrBuilder &MIB) { // rsrc
38263819
MIB.addReg(Info->getScratchRSrcReg());
38273820
},
38283821
[=](MachineInstrBuilder &MIB) { // soffset
3829-
if (isStackPtrRelative(PtrInfo))
3830-
MIB.addReg(Info->getStackPtrOffsetReg());
3831-
else
3832-
MIB.addImm(0);
3822+
MIB.addImm(0);
38333823
},
38343824
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
38353825
}};

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
31233123
// locations, which are supposed to be immutable?
31243124
Chain = addTokenForArgument(Chain, DAG, MFI, FI);
31253125
} else {
3126-
DstAddr = PtrOff;
3126+
// Stores to the argument stack area are relative to the stack pointer.
3127+
SDValue SP = DAG.getCopyFromReg(Chain, DL, Info->getStackPtrOffsetReg(),
3128+
MVT::i32);
3129+
DstAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, SP, PtrOff);
31273130
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
31283131
Alignment =
31293132
commonAlignment(Subtarget->getStackAlignment(), LocMemOffset);

llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,11 +494,11 @@ define void @too_many_args_use_workitem_id_x_byval(
494494

495495
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
496496
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
497-
; GCN-DAG: s_movk_i32 s32, 0x400
498497

499498
; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
500-
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
501499
; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
500+
; GCN: s_movk_i32 s32, 0x400
501+
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
502502

503503
; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
504504
; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],

llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,10 +609,10 @@ define void @too_many_args_use_workitem_id_x_byval(
609609
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
610610
; VARABI: enable_vgpr_workitem_id = 0
611611
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
612-
; VARABI: s_movk_i32 s32, 0x400{{$}}
613612
; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
614-
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
615613
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
614+
; VARABI: s_movk_i32 s32, 0x400{{$}}
615+
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
616616

617617
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
618618
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
@@ -656,8 +656,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
656656
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
657657
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
658658
; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
659-
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
660659
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
660+
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
661661
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
662662
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
663663
; VARABI: s_swappc_b64

0 commit comments

Comments
 (0)