[AMDGPU] Remove combineAnd.

harrisonGPU · harrisonGPU · commit ea08a49b1ace · 2024-10-18T18:27:27.000+08:00
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6822,84 +6822,6 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
   }
 }
 
-SDValue SITargetLowering::combineAnd(SDValue Op, DAGCombinerInfo &DCI) const {
-  const unsigned Opc = Op.getOpcode();
-  assert(Opc == ISD::AND);
-
-  auto &DAG = DCI.DAG;
-  SDLoc DL(Op);
-
-  if (hasAndNot(Op)) {
-    SDValue LHS = Op->getOperand(0);
-    SDValue RHS = Op->getOperand(1);
-
-    // (and LHS, (or Y, ~Z))
-    if (RHS.getOpcode() == ISD::OR && RHS.hasOneUse()) {
-      SDValue Y = RHS->getOperand(0);
-      SDValue NotZ = RHS->getOperand(1);
-
-      if (NotZ.getOpcode() == ISD::XOR &&
-          isAllOnesConstant(NotZ->getOperand(1))) {
-        SDValue Z = NotZ->getOperand(0);
-
-        if (!isa<ConstantSDNode>(Y)) {
-          SDValue NotY = DAG.getNOT(DL, Y, Y.getValueType());
-          SDValue AndNotYZ =
-              DAG.getNode(ISD::AND, DL, Y.getValueType(), NotY, Z);
-          SDValue NotAndNotYZ =
-              DAG.getNOT(DL, AndNotYZ, AndNotYZ.getValueType());
-          SDValue NewAnd =
-              DAG.getNode(ISD::AND, DL, Op.getValueType(), LHS, NotAndNotYZ);
-          return NewAnd;
-        }
-      }
-    }
-  }
-
-  EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
-                                 : Op->getOperand(0).getValueType();
-  auto ExtTy = OpTy.changeElementType(MVT::i32);
-
-  if (DCI.isBeforeLegalizeOps() ||
-      isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
-    return SDValue();
-
-  SDValue LHS;
-  SDValue RHS;
-  if (Opc == ISD::SELECT) {
-    LHS = Op->getOperand(1);
-    RHS = Op->getOperand(2);
-  } else {
-    LHS = Op->getOperand(0);
-    RHS = Op->getOperand(1);
-  }
-
-  const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
-  LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
-
-  // Special case: for shifts, the RHS always needs a zext.
-  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
-    RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
-  else
-    RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
-
-  // setcc always return i1/i1 vec so no need to truncate after.
-  if (Opc == ISD::SETCC) {
-    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-    return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
-  }
-
-  // For other ops, we extend the operation's return type as well so we need to
-  // truncate back to the original type.
-  SDValue NewVal;
-  if (Opc == ISD::SELECT)
-    NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
-  else
-    NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
-
-  return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
-}
-
 SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
                                                 DAGCombinerInfo &DCI) const {
   const unsigned Opc = Op.getOpcode();
@@ -14877,17 +14799,13 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
 
 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
-  SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
-  case ISD::AND:
-    if (auto Res = combineAnd(SDValue(N, 0), DCI))
-      return Res;
-    break;
   case ISD::ADD:
   case ISD::SUB:
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:
+  case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
   case ISD::MUL:
@@ -14991,6 +14909,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case AMDGPUISD::CLAMP:
     return performClampCombine(N, DCI);
   case ISD::SCALAR_TO_VECTOR: {
+    SelectionDAG &DAG = DCI.DAG;
     EVT VT = N->getValueType(0);
 
     // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
@@ -16974,7 +16893,7 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
 bool SITargetLowering::hasAndNot(SDValue Op) const {
   // Return false if the operation is divergent, as AND-NOT is a scalar-only
   // instruction.
-  if (Op->isDivergent())
+  if (Op->isDivergent() || !Op->isMachineOpcode())
     return false;
 
   EVT VT = Op.getValueType();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -147,7 +147,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue combineAnd(SDValue Op, DAGCombinerInfo &DCI) const;
   SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
   SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/andorn2.ll b/llvm/test/CodeGen/AMDGPU/andorn2.ll
@@ -25,6 +25,28 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}scalar_andn2_i32_one_sgpr
+; GCN: s_andn2_b32
+define amdgpu_kernel void @scalar_andn2_i32_one_sgpr(
+    ptr addrspace(1) %r0, i32 inreg %a, i32 inreg %b) {
+entry:
+  %nb = xor i32 %b, -1
+  %r0.val = and i32 %a, %nb
+  store i32 %r0.val, ptr addrspace(1) %r0
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_andn2_i64_one_sgpr
+; GCN: s_andn2_b64
+define amdgpu_kernel void @scalar_andn2_i64_one_sgpr(
+    ptr addrspace(1) %r0, i64 inreg %a, i64 inreg %b) {
+entry:
+  %nb = xor i64 %b, -1
+  %r0.val = and i64 %a, %nb
+  store i64 %r0.val, ptr addrspace(1) %r0
+  ret void
+}
+
 ; GCN-LABEL: {{^}}scalar_orn2_i32_one_use
 ; GCN: s_orn2_b32
 define amdgpu_kernel void @scalar_orn2_i32_one_use(
@@ -47,6 +69,28 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}scalar_orn2_i32_one_use_sgpr
+; GCN: s_orn2_b32
+define amdgpu_kernel void @scalar_orn2_i32_one_use_sgpr(
+    ptr addrspace(1) %r0, i32 inreg %a, i32 inreg %b) {
+entry:
+  %nb = xor i32 %b, -1
+  %r0.val = or i32 %a, %nb
+  store i32 %r0.val, ptr addrspace(1) %r0
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_orn2_i64_one_use_sgpr
+; GCN: s_orn2_b64
+define amdgpu_kernel void @scalar_orn2_i64_one_use_sgpr(
+    ptr addrspace(1) %r0, i64 inreg %a, i64 inreg %b) {
+entry:
+  %nb = xor i64 %b, -1
+  %r0.val = or i64 %a, %nb
+  store i64 %r0.val, ptr addrspace(1) %r0
+  ret void
+}
+
 ; GCN-LABEL: {{^}}vector_andn2_i32_s_v_one_use
 ; GCN: v_not_b32
 ; GCN: v_and_b32
diff --git a/llvm/test/CodeGen/AMDGPU/andornot.ll b/llvm/test/CodeGen/AMDGPU/andornot.ll