Skip to content

Commit 548e519

Browse files
committed
Modify performSelectCombine and foldFreeOpFromSelect to prevent the
performFNEGCombine changes from being unwound. However, this still needs work as the changes causes a mixture of codegen regressions and improvements in the fneg-* tests. Update shl64-reduce.ll for vector v2i32 and.
1 parent 995e7fb commit 548e519

File tree

6 files changed

+188
-151
lines changed

6 files changed

+188
-151
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4654,8 +4654,27 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
46544654
if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
46554655
return SDValue();
46564656

4657-
return distributeOpThroughSelect(DCI, LHS.getOpcode(),
4658-
SDLoc(N), Cond, LHS, RHS);
4657+
// select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
4658+
// lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
4659+
// out in this case. For now I've made the logic as specific to the case as
4660+
// possible, hopefully this can be relaxed in future.
4661+
if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
4662+
SDValue LHSB = LHS.getOperand(0);
4663+
SDValue RHSB = RHS.getOperand(0);
4664+
if (LHSB.getOpcode() == ISD::BITCAST &&
4665+
RHSB->getOpcode() == ISD::BITCAST) {
4666+
EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
4667+
EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
4668+
if (LHSB.getValueType() == MVT::f32 &&
4669+
RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
4670+
RHSBOpTy == MVT::i32) {
4671+
return SDValue();
4672+
}
4673+
}
4674+
}
4675+
4676+
return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
4677+
RHS);
46594678
}
46604679

46614680
bool Inv = false;
@@ -4708,8 +4727,8 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
47084727
if (Inv)
47094728
std::swap(NewLHS, NewRHS);
47104729

4711-
SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
4712-
Cond, NewLHS, NewRHS);
4730+
SDValue NewSelect =
4731+
DAG.getNode(ISD::SELECT, SL, VT, Cond, NewLHS, NewRHS);
47134732
DCI.AddToWorklist(NewSelect.getNode());
47144733
return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
47154734
}
@@ -5047,8 +5066,20 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
50475066
}
50485067
case ISD::SELECT: {
50495068
// fneg (select c, a, b) -> select c, (fneg a), (fneg b)
5069+
// This combine became necessary recently to prevent a regression after v2i32 xor was made legal.
5070+
// When adding this combine a case was added to performFNEGCombine to prevent this combine from
5071+
// being undone under certain conditions.
50505072
// TODO: Invert conditions of foldFreeOpFromSelect
5051-
return SDValue();
5073+
SDValue Cond = N0.getOperand(0);
5074+
SDValue LHS = N0.getOperand(1);
5075+
SDValue RHS = N0.getOperand(2);
5076+
EVT LHVT = LHS.getValueType();
5077+
EVT RHVT = RHS.getValueType();
5078+
5079+
SDValue LFNeg = DAG.getNode(ISD::FNEG, SL, LHVT, LHS);
5080+
SDValue RFNeg = DAG.getNode(ISD::FNEG, SL, RHVT, RHS);
5081+
SDValue Op = DAG.getNode(Opc, SL, LHVT, Cond, LFNeg, RFNeg);
5082+
return Op;
50525083
}
50535084
case ISD::BITCAST: {
50545085
SDLoc SL(N);

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -366,9 +366,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
366366
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
367367
Type *Ty) const override;
368368

369-
// bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
370-
// EVT VT) const override;
371-
372369
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
373370
unsigned Index) const override;
374371
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,10 +2376,6 @@ def : AMDGPUPat <
23762376
$src1), sub1)
23772377
>;
23782378

2379-
def : AMDGPUPat <
2380-
(fneg (select i1:$src0, (f32 (bitconvert i32:$src1)), (f32 (bitconvert i32:$src2)))),
2381-
(V_CNDMASK_B32_e64 (i32 1), $src2, (i32 1), $src1, $src0)>;
2382-
23832379
let True16Predicate = NotHasTrue16BitInsts in {
23842380
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
23852381

@@ -2395,11 +2391,6 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
23952391

23962392
let True16Predicate = UseRealTrue16Insts in {
23972393

2398-
// Prevents regression in fneg-modifier-casting.ll along with modifications to XorCombine() when v2i32 or is legal.
2399-
def : AMDGPUPat <
2400-
(fneg (select i1:$src0, (f32 (bitconvert i32:$src1)), (f32 (bitconvert i32:$src2)))),
2401-
(V_CNDMASK_B32_e64 (i32 1), $src2, (i32 1), $src1, $src0)>;
2402-
24032394
def : GCNPat <
24042395
(rotr i32:$src0, i32:$src1),
24052396
(V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,

0 commit comments

Comments
 (0)