Skip to content

Commit ab2fde2

Browse files
author
Jun Wang
committed
Add an additional CopyToReg and CopyFromReg for the CopyFromReg
for the i1 return value.
1 parent e86e0f5 commit ab2fde2

File tree

2 files changed

+32
-9
lines changed

2 files changed

+32
-9
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3230,6 +3230,21 @@ SDValue SITargetLowering::LowerCallResult(
32303230
Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InGlue);
32313231
Chain = Val.getValue(1);
32323232
InGlue = Val.getValue(2);
3233+
3234+
// For i1 return value allocated to an SGPR, we want the dst reg for the
3235+
// above CopyFromReg not to be of VReg_1 when emitting machine code.
3236+
// This requires creating an addional CopyToReg followed by another
3237+
// CopyFromReg.
3238+
if (RVLocs.size() == 1 && VA.getLocVT() == MVT::i1) {
3239+
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
3240+
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
3241+
3242+
if (TRI->isSGPRReg(MRI, VA.getLocReg())) {
3243+
Register TmpVReg = MRI.createVirtualRegister(TRI->getBoolRC());
3244+
SDValue TmpCopyTo = DAG.getCopyToReg(Chain, DL, TmpVReg, Val);
3245+
Val = DAG.getCopyFromReg(TmpCopyTo, DL, TmpVReg, MVT::i1);
3246+
}
3247+
}
32333248
} else if (VA.isMemLoc()) {
32343249
report_fatal_error("TODO: return values in memory");
32353250
} else
@@ -15863,6 +15878,21 @@ static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
1586315878
return false;
1586415879
}
1586515880

15881+
LLVM_ATTRIBUTE_UNUSED
15882+
static bool isCopyFromRegForI1Return(const SDNode *N) {
15883+
assert(N->getOpcode() == ISD::CopyFromReg);
15884+
SDNode *N1 = N->getOperand(0).getNode();
15885+
if (N1->getOpcode() != ISD::CopyToReg)
15886+
return false;
15887+
SDNode *N2 = N1->getOperand(0).getNode();
15888+
if (N2->getOpcode() != ISD::CopyFromReg)
15889+
return false;
15890+
SDNode *N3 = N2->getOperand(0).getNode();
15891+
if (N3->getOpcode() != ISD::CALLSEQ_END)
15892+
return false;
15893+
return true;
15894+
}
15895+
1586615896
bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
1586715897
FunctionLoweringInfo *FLI,
1586815898
UniformityInfo *UA) const {
@@ -15880,7 +15910,8 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
1588015910
if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
1588115911
return UA->isDivergent(V);
1588215912

15883-
assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
15913+
assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N) ||
15914+
isCopyFromRegForI1Return(N));
1588415915
return !TRI->isSGPRReg(MRI, Reg);
1588515916
}
1588615917
case ISD::LOAD: {

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -689,14 +689,6 @@ bool Vreg1LoweringHelper::lowerCopiesToI1() {
689689
assert(!MI.getOperand(1).getSubReg());
690690

691691
if (!SrcReg.isVirtual() || (!isLaneMaskReg(SrcReg) && !isVreg1(SrcReg))) {
692-
if (!SrcReg.isVirtual() &&
693-
TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 64) {
694-
// When calling convention allocates SGPR for i1, for GPUs with
695-
// wavefront size 64, i1 return value is put in 64b SGPR.
696-
assert(ST->isWave64());
697-
continue;
698-
}
699-
700692
assert(TII->getRegisterInfo().getRegSizeInBits(SrcReg, *MRI) == 32);
701693
Register TmpReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
702694
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64), TmpReg)

0 commit comments

Comments
 (0)