@@ -7029,6 +7029,81 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
7029
7029
}
7030
7030
}
7031
7031
7032
+ SDValue SITargetLowering::combineAnd(SDValue Op,
7033
+ DAGCombinerInfo &DCI) const {
7034
+ const unsigned Opc = Op.getOpcode();
7035
+ assert(Opc == ISD::AND);
7036
+
7037
+ auto &DAG = DCI.DAG;
7038
+ SDLoc DL(Op);
7039
+
7040
+ if(hasAndNot(Op)) {
7041
+ SDValue LHS = Op->getOperand(0);
7042
+ SDValue RHS = Op->getOperand(1);
7043
+
7044
+ // (and LHS, (or Y, ~Z))
7045
+ if (RHS.getOpcode() == ISD::OR && RHS.hasOneUse()) {
7046
+ SDValue Y = RHS->getOperand(0);
7047
+ SDValue NotZ = RHS->getOperand(1);
7048
+
7049
+ if (NotZ.getOpcode() == ISD::XOR && isAllOnesConstant(NotZ->getOperand(1))) {
7050
+ SDValue Z = NotZ->getOperand(0);
7051
+
7052
+ if (!isa<ConstantSDNode>(Y)) {
7053
+ SDValue NotY = DAG.getNOT(DL, Y, Y.getValueType());
7054
+ SDValue AndNotYZ = DAG.getNode(ISD::AND, DL, Y.getValueType(), NotY, Z);
7055
+ SDValue NotAndNotYZ = DAG.getNOT(DL, AndNotYZ, AndNotYZ.getValueType());
7056
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, Op.getValueType(), LHS, NotAndNotYZ);
7057
+ return NewAnd;
7058
+ }
7059
+ }
7060
+ }
7061
+ }
7062
+
7063
+ EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
7064
+ : Op->getOperand(0).getValueType();
7065
+ auto ExtTy = OpTy.changeElementType(MVT::i32);
7066
+
7067
+ if (DCI.isBeforeLegalizeOps() ||
7068
+ isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
7069
+ return SDValue();
7070
+
7071
+ SDValue LHS;
7072
+ SDValue RHS;
7073
+ if (Opc == ISD::SELECT) {
7074
+ LHS = Op->getOperand(1);
7075
+ RHS = Op->getOperand(2);
7076
+ } else {
7077
+ LHS = Op->getOperand(0);
7078
+ RHS = Op->getOperand(1);
7079
+ }
7080
+
7081
+ const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
7082
+ LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
7083
+
7084
+ // Special case: for shifts, the RHS always needs a zext.
7085
+ if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
7086
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
7087
+ else
7088
+ RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
7089
+
7090
+ // setcc always return i1/i1 vec so no need to truncate after.
7091
+ if (Opc == ISD::SETCC) {
7092
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7093
+ return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
7094
+ }
7095
+
7096
+ // For other ops, we extend the operation's return type as well so we need to
7097
+ // truncate back to the original type.
7098
+ SDValue NewVal;
7099
+ if (Opc == ISD::SELECT)
7100
+ NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
7101
+ else
7102
+ NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
7103
+
7104
+ return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
7105
+ }
7106
+
7032
7107
SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
7033
7108
DAGCombinerInfo &DCI) const {
7034
7109
const unsigned Opc = Op.getOpcode();
@@ -15244,13 +15319,17 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
15244
15319
15245
15320
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
15246
15321
DAGCombinerInfo &DCI) const {
15322
+ SelectionDAG &DAG = DCI.DAG;
15247
15323
switch (N->getOpcode()) {
15324
+ case ISD::AND:
15325
+ if (auto Res = combineAnd(SDValue(N, 0), DCI))
15326
+ return Res;
15327
+ break;
15248
15328
case ISD::ADD:
15249
15329
case ISD::SUB:
15250
15330
case ISD::SHL:
15251
15331
case ISD::SRL:
15252
15332
case ISD::SRA:
15253
- case ISD::AND:
15254
15333
case ISD::OR:
15255
15334
case ISD::XOR:
15256
15335
case ISD::MUL:
@@ -15356,7 +15435,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
15356
15435
case AMDGPUISD::CLAMP:
15357
15436
return performClampCombine(N, DCI);
15358
15437
case ISD::SCALAR_TO_VECTOR: {
15359
- SelectionDAG &DAG = DCI.DAG;
15360
15438
EVT VT = N->getValueType(0);
15361
15439
15362
15440
// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
@@ -17527,8 +17605,8 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
17527
17605
}
17528
17606
17529
17607
bool SITargetLowering::hasAndNot(SDValue Op) const {
17530
- // Return false if the operation is divergent, as AND-NOT optimization
17531
- // requires uniform behavior across threads .
17608
+ // Return false if the operation is divergent, as AND-NOT is a scalar-only
17609
+ // instruction .
17532
17610
if (Op->isDivergent())
17533
17611
return false;
17534
17612
0 commit comments