Skip to content

Commit 72e07d4

Browse files
committed
[AMDGPU] Simplify applySelectFCmpToFMinToFMaxLegacy. NFC.
1 parent cda8270 commit 72e07d4

File tree

1 file changed

+21
-58
lines changed

1 file changed

+21
-58
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp

Lines changed: 21 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@ class AMDGPUPostLegalizerCombinerImpl : public Combiner {
6666
struct FMinFMaxLegacyInfo {
6767
Register LHS;
6868
Register RHS;
69-
Register True;
70-
Register False;
7169
CmpInst::Predicate Pred;
7270
};
7371

@@ -172,76 +170,41 @@ bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
172170
m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
173171
return false;
174172

175-
Info.True = MI.getOperand(2).getReg();
176-
Info.False = MI.getOperand(3).getReg();
173+
Register True = MI.getOperand(2).getReg();
174+
Register False = MI.getOperand(3).getReg();
177175

178176
// TODO: Handle case where the the selected value is an fneg and the compared
179177
// constant is the negation of the selected value.
180-
if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
181-
!(Info.LHS == Info.False && Info.RHS == Info.True))
178+
if ((Info.LHS != True || Info.RHS != False) &&
179+
(Info.LHS != False || Info.RHS != True))
182180
return false;
183181

184-
switch (Info.Pred) {
185-
case CmpInst::FCMP_FALSE:
186-
case CmpInst::FCMP_OEQ:
187-
case CmpInst::FCMP_ONE:
188-
case CmpInst::FCMP_ORD:
189-
case CmpInst::FCMP_UNO:
190-
case CmpInst::FCMP_UEQ:
191-
case CmpInst::FCMP_UNE:
192-
case CmpInst::FCMP_TRUE:
193-
return false;
194-
default:
195-
return true;
196-
}
182+
// Invert the predicate if necessary so that the apply function can assume
183+
// that the select operands are the same as the fcmp operands.
184+
// (select (fcmp P, L, R), R, L) -> (select (fcmp !P, L, R), L, R)
185+
if (Info.LHS != True)
186+
Info.Pred = CmpInst::getInversePredicate(Info.Pred);
187+
188+
// Only match </<=/>=/> not ==/!= etc.
189+
return Info.Pred != CmpInst::getSwappedPredicate(Info.Pred);
197190
}
198191

199192
void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
200193
MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
201-
B.setInstrAndDebugLoc(MI);
202-
auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
203-
B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
204-
};
205-
206-
switch (Info.Pred) {
207-
case CmpInst::FCMP_ULT:
208-
case CmpInst::FCMP_ULE:
209-
if (Info.LHS == Info.True)
210-
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
211-
else
212-
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
213-
break;
214-
case CmpInst::FCMP_OLE:
215-
case CmpInst::FCMP_OLT: {
194+
unsigned Opc = (Info.Pred & CmpInst::FCMP_OGT) ? AMDGPU::G_AMDGPU_FMAX_LEGACY
195+
: AMDGPU::G_AMDGPU_FMIN_LEGACY;
196+
Register X = Info.LHS;
197+
Register Y = Info.RHS;
198+
if (Info.Pred == CmpInst::getUnorderedPredicate(Info.Pred)) {
216199
// We need to permute the operands to get the correct NaN behavior. The
217200
// selected operand is the second one based on the failing compare with NaN,
218201
// so permute it based on the compare type the hardware uses.
219-
if (Info.LHS == Info.True)
220-
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
221-
else
222-
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
223-
break;
224-
}
225-
case CmpInst::FCMP_UGE:
226-
case CmpInst::FCMP_UGT: {
227-
if (Info.LHS == Info.True)
228-
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
229-
else
230-
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
231-
break;
232-
}
233-
case CmpInst::FCMP_OGT:
234-
case CmpInst::FCMP_OGE: {
235-
if (Info.LHS == Info.True)
236-
buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
237-
else
238-
buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
239-
break;
240-
}
241-
default:
242-
llvm_unreachable("predicate should not have matched");
202+
std::swap(X, Y);
243203
}
244204

205+
B.setInstrAndDebugLoc(MI);
206+
B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
207+
245208
MI.eraseFromParent();
246209
}
247210

0 commit comments

Comments
 (0)