Skip to content

Commit 3629ff8

Browse files
jayfoadyuxuanchen1997
authored andcommitted
[AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot. NFCI. (#99345)
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251064
1 parent e827ce3 commit 3629ff8

File tree

6 files changed

+15
-46
lines changed

6 files changed

+15
-46
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2775,18 +2775,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
27752775
case Intrinsic::amdgcn_interp_p1_f16:
27762776
SelectInterpP1F16(N);
27772777
return;
2778-
case Intrinsic::amdgcn_inverse_ballot:
2779-
switch (N->getOperand(1).getValueSizeInBits()) {
2780-
case 32:
2781-
Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
2782-
break;
2783-
case 64:
2784-
Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
2785-
break;
2786-
default:
2787-
llvm_unreachable("Unsupported size for inverse ballot mask.");
2788-
}
2789-
break;
27902778
default:
27912779
SelectCode(N);
27922780
break;

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,8 +1055,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
10551055
return selectIntrinsicCmp(I);
10561056
case Intrinsic::amdgcn_ballot:
10571057
return selectBallot(I);
1058-
case Intrinsic::amdgcn_inverse_ballot:
1059-
return selectInverseBallot(I);
10601058
case Intrinsic::amdgcn_reloc_constant:
10611059
return selectRelocConstant(I);
10621060
case Intrinsic::amdgcn_groupstaticsize:
@@ -1449,17 +1447,6 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
14491447
return true;
14501448
}
14511449

1452-
bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
1453-
MachineBasicBlock *BB = I.getParent();
1454-
const DebugLoc &DL = I.getDebugLoc();
1455-
const Register DstReg = I.getOperand(0).getReg();
1456-
const Register MaskReg = I.getOperand(2).getReg();
1457-
1458-
BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
1459-
I.eraseFromParent();
1460-
return true;
1461-
}
1462-
14631450
bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
14641451
Register DstReg = I.getOperand(0).getReg();
14651452
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
112112
bool selectDivScale(MachineInstr &MI) const;
113113
bool selectIntrinsicCmp(MachineInstr &MI) const;
114114
bool selectBallot(MachineInstr &I) const;
115-
bool selectInverseBallot(MachineInstr &I) const;
116115
bool selectRelocConstant(MachineInstr &I) const;
117116
bool selectGroupStaticSize(MachineInstr &I) const;
118117
bool selectReturnAddress(MachineInstr &I) const;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5480,24 +5480,11 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
54805480
return BB;
54815481
}
54825482
case AMDGPU::S_INVERSE_BALLOT_U32:
5483-
case AMDGPU::S_INVERSE_BALLOT_U64: {
5484-
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5485-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
5486-
const SIRegisterInfo *TRI = ST.getRegisterInfo();
5487-
const DebugLoc &DL = MI.getDebugLoc();
5488-
const Register DstReg = MI.getOperand(0).getReg();
5489-
Register MaskReg = MI.getOperand(1).getReg();
5490-
5491-
const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);
5492-
5493-
if (IsVALU) {
5494-
MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
5495-
}
5496-
5497-
BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
5498-
MI.eraseFromParent();
5483+
case AMDGPU::S_INVERSE_BALLOT_U64:
5484+
// These opcodes only exist to let SIFixSGPRCopies insert a readfirstlane if
5485+
// necessary. After that they are equivalent to a COPY.
5486+
MI.setDesc(TII->get(AMDGPU::COPY));
54995487
return BB;
5500-
}
55015488
case AMDGPU::ENDPGM_TRAP: {
55025489
const DebugLoc &DL = MI.getDebugLoc();
55035490
if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6686,7 +6686,9 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
66866686
MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
66876687
MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
66886688
MI.getOpcode() == AMDGPU::S_WQM_B32 ||
6689-
MI.getOpcode() == AMDGPU::S_WQM_B64) {
6689+
MI.getOpcode() == AMDGPU::S_WQM_B64 ||
6690+
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
6691+
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
66906692
MachineOperand &Src = MI.getOperand(1);
66916693
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
66926694
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,15 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
212212
}
213213

214214
let usesCustomInserter = 1 in {
215-
def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
215+
def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
216+
(outs SReg_32:$sdst), (ins SSrc_b32:$mask),
217+
[(set i1:$sdst, (int_amdgcn_inverse_ballot i32:$mask))]
218+
>;
216219

217-
def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
220+
def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
221+
(outs SReg_64:$sdst), (ins SSrc_b64:$mask),
222+
[(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
223+
>;
218224
} // End usesCustomInserter = 1
219225

220226
// Pseudo instructions used for @llvm.fptrunc.round upward

0 commit comments

Comments
 (0)