llvm
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 36 additions & 5 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Lines changed: 36 additions & 5 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIISelLowering.h
Lines changed: 0 additions & 3 deletions b/‎llvm/lib/Target/AMDGPU/SIISelLowering.h
Lines changed: 0 additions & 3 deletions
diff --git a/‎llvm/lib/Target/AMDGPU/SIInstructions.td
Lines changed: 0 additions & 9 deletions b/‎llvm/lib/Target/AMDGPU/SIInstructions.td
Lines changed: 0 additions & 9 deletions
@@ -4654,8 +4654,27 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
     if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
       return SDValue();
 
-    return distributeOpThroughSelect(DCI, LHS.getOpcode(),
-                                     SDLoc(N), Cond, LHS, RHS);
+    // select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
+    // lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
+    // out in this case. For now I've made the logic as specific to the case as
+    // possible, hopefully this can be relaxed in future.
+    if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
+      SDValue LHSB = LHS.getOperand(0);
+      SDValue RHSB = RHS.getOperand(0);
+      if (LHSB.getOpcode() == ISD::BITCAST &&
+          RHSB->getOpcode() == ISD::BITCAST) {
+        EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
+        EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
+        if (LHSB.getValueType() == MVT::f32 &&
+            RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
+            RHSBOpTy == MVT::i32) {
+          return SDValue();
+        }
+      }
+    }
+
+    return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
+                                     RHS);
   }
 
   bool Inv = false;
@@ -4708,8 +4727,8 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
       if (Inv)
         std::swap(NewLHS, NewRHS);
 
-      SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
-                                      Cond, NewLHS, NewRHS);
+      SDValue NewSelect =
+          DAG.getNode(ISD::SELECT, SL, VT, Cond, NewLHS, NewRHS);
       DCI.AddToWorklist(NewSelect.getNode());
       return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
     }
@@ -5047,8 +5066,20 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
   }
   case ISD::SELECT: {
     // fneg (select c, a, b) -> select c, (fneg a), (fneg b)
+    // This combine became necessary recently to prevent a regression after v2i32 xor was made legal.
+    // When adding this combine a case was added to performFNEGCombine to prevent this combine from
+    // being undone under certain conditions.
     // TODO: Invert conditions of foldFreeOpFromSelect
-    return SDValue();
+    SDValue Cond = N0.getOperand(0);
+    SDValue LHS = N0.getOperand(1);
+    SDValue RHS = N0.getOperand(2);
+    EVT LHVT = LHS.getValueType();
+    EVT RHVT = RHS.getValueType();
+
+    SDValue LFNeg = DAG.getNode(ISD::FNEG, SL, LHVT, LHS);
+    SDValue RFNeg = DAG.getNode(ISD::FNEG, SL, RHVT, RHS);
+    SDValue Op = DAG.getNode(Opc, SL, LHVT, Cond, LFNeg, RFNeg);
+    return Op;
   }
   case ISD::BITCAST: {
     SDLoc SL(N);
 
@@ -366,9 +366,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                         Type *Ty) const override;
 
-  // bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
-  //                                           EVT VT) const override;
-
   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                                unsigned Index) const override;
   bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
 
@@ -2376,10 +2376,6 @@ def : AMDGPUPat <
                $src1), sub1)
 >;
 
-def : AMDGPUPat <
-  (fneg (select i1:$src0, (f32 (bitconvert i32:$src1)), (f32 (bitconvert i32:$src2)))),
-    (V_CNDMASK_B32_e64 (i32 1), $src2, (i32 1), $src1, $src0)>;
-
 let True16Predicate = NotHasTrue16BitInsts in {
 def : ROTRPattern <V_ALIGNBIT_B32_e64>;
 
@@ -2395,11 +2391,6 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
 
 let True16Predicate = UseRealTrue16Insts in {
 
-// Prevents regression in fneg-modifier-casting.ll along with modifications to XorCombine() when v2i32 or is legal.
-def : AMDGPUPat <
-  (fneg (select i1:$src0, (f32 (bitconvert i32:$src1)), (f32 (bitconvert i32:$src2)))),
-    (V_CNDMASK_B32_e64 (i32 1), $src2, (i32 1), $src1, $src0)>;
-
 def : GCNPat <
   (rotr i32:$src0, i32:$src1),
   (V_ALIGNBIT_B32_t16_e64 /* src0_modifiers */ 0, $src0,