[ValueTracking][X86] Compute KnownBits for phadd/phsub

mskamp · mskamp · commit 8aa2cec0af34 · 2024-05-16T18:35:25.000+02:00
Add KnownBits computations to ValueTracking and X86 DAG lowering. These instructions add/subtract adjacent vector elements in their operands. Example: phadd [X1, X2] [Y1, Y2] = [X1 + X2, Y1 + Y2]. This means that, in this example, we can compute the KnownBits of the operation by computing the KnownBits of [X1, X2] + [X1, X2] and [Y1, Y2] + [Y1, Y2] and intersecting the results. This approach also generalizes to all x86 vector types. There are also the operations phadd.sw and phsub.sw, which perform saturating addition/subtraction. Use sadd_sat and ssub_sat to compute the KnownBits of these operations. Fixes #82516.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1725,6 +1725,54 @@ static void computeKnownBitsFromOperator(const Operator *I,
       case Intrinsic::x86_sse42_crc32_64_64:
         Known.Zero.setBitsFrom(32);
         break;
+      case Intrinsic::x86_ssse3_phadd_d:
+      case Intrinsic::x86_ssse3_phadd_w:
+      case Intrinsic::x86_ssse3_phadd_d_128:
+      case Intrinsic::x86_ssse3_phadd_w_128:
+      case Intrinsic::x86_avx2_phadd_d:
+      case Intrinsic::x86_avx2_phadd_w: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                    .intersectWith(KnownBits::computeForAddSub(
+                        true, false, false, Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phadd_sw:
+      case Intrinsic::x86_ssse3_phadd_sw_128:
+      case Intrinsic::x86_avx2_phadd_sw: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::sadd_sat(Known, Known)
+                    .intersectWith(KnownBits::sadd_sat(Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phsub_d:
+      case Intrinsic::x86_ssse3_phsub_w:
+      case Intrinsic::x86_ssse3_phsub_d_128:
+      case Intrinsic::x86_ssse3_phsub_w_128:
+      case Intrinsic::x86_avx2_phsub_d:
+      case Intrinsic::x86_avx2_phsub_w: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                    .intersectWith(KnownBits::computeForAddSub(
+                        false, false, false, Known2, Known2));
+        break;
+      }
+      case Intrinsic::x86_ssse3_phsub_sw:
+      case Intrinsic::x86_ssse3_phsub_sw_128:
+      case Intrinsic::x86_avx2_phsub_sw: {
+        computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
+        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
+
+        Known = KnownBits::ssub_sat(Known, Known)
+                    .intersectWith(KnownBits::ssub_sat(Known2, Known2));
+        break;
+      }
       case Intrinsic::riscv_vsetvli:
       case Intrinsic::riscv_vsetvlimax: {
         bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37262,6 +37262,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     }
     break;
   }
+  case X86ISD::HADD: {
+    Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits Known2 =
+        DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+    Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                .intersectWith(KnownBits::computeForAddSub(true, false, false,
+                                                           Known2, Known2));
+    break;
+  }
+  case X86ISD::HSUB: {
+    Known =
+        DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits Known2 =
+        DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+    Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                .intersectWith(KnownBits::computeForAddSub(false, false, false,
+                                                           Known2, Known2));
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     switch (Op->getConstantOperandVal(0)) {
     case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37297,58 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
       break;
     }
+    case Intrinsic::x86_ssse3_phadd_d:
+    case Intrinsic::x86_ssse3_phadd_w:
+    case Intrinsic::x86_ssse3_phadd_d_128:
+    case Intrinsic::x86_ssse3_phadd_w_128:
+    case Intrinsic::x86_avx2_phadd_d:
+    case Intrinsic::x86_avx2_phadd_w: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
+                  .intersectWith(KnownBits::computeForAddSub(true, false, false,
+                                                             Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phadd_sw:
+    case Intrinsic::x86_ssse3_phadd_sw_128:
+    case Intrinsic::x86_avx2_phadd_sw: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::sadd_sat(Known, Known)
+                  .intersectWith(KnownBits::sadd_sat(Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phsub_d:
+    case Intrinsic::x86_ssse3_phsub_w:
+    case Intrinsic::x86_ssse3_phsub_d_128:
+    case Intrinsic::x86_ssse3_phsub_w_128:
+    case Intrinsic::x86_avx2_phsub_d:
+    case Intrinsic::x86_avx2_phsub_w: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
+                  .intersectWith(KnownBits::computeForAddSub(
+                      false, false, false, Known2, Known2));
+      break;
+    }
+    case Intrinsic::x86_ssse3_phsub_sw:
+    case Intrinsic::x86_ssse3_phsub_sw_128:
+    case Intrinsic::x86_avx2_phsub_sw: {
+      Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+      KnownBits Known2 =
+          DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+      Known = KnownBits::ssub_sat(Known, Known)
+                  .intersectWith(KnownBits::ssub_sat(Known2, Known2));
+      break;
+    }
     }
     break;
   }
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll b/llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll
@@ -5,12 +5,7 @@ define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
 ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <4 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
 ;
 entry:
   %0 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
@@ -25,12 +20,7 @@ define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
 ; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
-; CHECK-NEXT:    ret <8 x i1> [[RET]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -45,12 +35,7 @@ define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
 ; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
-; CHECK-NEXT:    ret <8 x i1> [[RET]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -65,12 +50,7 @@ define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
 ; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <8 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <8 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    ret <8 x i1> [[RET]]
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
 ;
 entry:
   %0 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -85,12 +65,7 @@ define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
 ; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
-; CHECK-NEXT:    ret <16 x i1> [[RET]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -105,12 +80,7 @@ define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
 ; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
 ; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
-; CHECK-NEXT:    ret <16 x i1> [[RET]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -125,12 +95,7 @@ define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
 ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <4 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6>
-; CHECK-NEXT:    ret <4 x i1> [[RET]]
+; CHECK-NEXT:    ret <4 x i1> zeroinitializer
 ;
 entry:
   %0 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -145,12 +110,7 @@ define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
 ; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-; CHECK-NEXT:    [[TMP1:%.*]] = or <8 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <8 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
-; CHECK-NEXT:    ret <8 x i1> [[RET]]
+; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
@@ -185,12 +145,7 @@ define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
 ; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP1:%.*]] = or <8 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <8 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
-; CHECK-NEXT:    ret <8 x i1> [[RET]]
+; CHECK-NEXT:    ret <8 x i1> zeroinitializer
 ;
 entry:
   %0 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -205,12 +160,7 @@ define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
 ; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = or <16 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-; CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
-; CHECK-NEXT:    [[RET:%.*]] = icmp eq <16 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
-; CHECK-NEXT:    ret <16 x i1> [[RET]]
+; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
 ;
 entry:
   %0 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
diff --git a/llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll b/llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll