Skip to content

Commit 8aa2cec

Browse files
committed
[ValueTracking][X86] Compute KnownBits for phadd/phsub
Add KnownBits computations to ValueTracking and X86 DAG lowering. These instructions add/subtract adjacent vector elements in their operands. Example: phadd [X1, X2] [Y1, Y2] = [X1 + X2, Y1 + Y2]. This means that, in this example, we can compute the KnownBits of the operation by computing the KnownBits of [X1, X2] + [X1, X2] and [Y1, Y2] + [Y1, Y2] and intersecting the results. This approach also generalizes to all x86 vector types. There are also the operations phadd.sw and phsub.sw, which perform saturating addition/subtraction. Use sadd_sat and ssub_sat to compute the KnownBits of these operations. Fixes #82516.
1 parent b12b999 commit 8aa2cec

File tree

4 files changed

+140
-94
lines changed

4 files changed

+140
-94
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1725,6 +1725,54 @@ static void computeKnownBitsFromOperator(const Operator *I,
17251725
case Intrinsic::x86_sse42_crc32_64_64:
17261726
Known.Zero.setBitsFrom(32);
17271727
break;
1728+
case Intrinsic::x86_ssse3_phadd_d:
1729+
case Intrinsic::x86_ssse3_phadd_w:
1730+
case Intrinsic::x86_ssse3_phadd_d_128:
1731+
case Intrinsic::x86_ssse3_phadd_w_128:
1732+
case Intrinsic::x86_avx2_phadd_d:
1733+
case Intrinsic::x86_avx2_phadd_w: {
1734+
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1735+
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1736+
1737+
Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
1738+
.intersectWith(KnownBits::computeForAddSub(
1739+
true, false, false, Known2, Known2));
1740+
break;
1741+
}
1742+
case Intrinsic::x86_ssse3_phadd_sw:
1743+
case Intrinsic::x86_ssse3_phadd_sw_128:
1744+
case Intrinsic::x86_avx2_phadd_sw: {
1745+
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1746+
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1747+
1748+
Known = KnownBits::sadd_sat(Known, Known)
1749+
.intersectWith(KnownBits::sadd_sat(Known2, Known2));
1750+
break;
1751+
}
1752+
case Intrinsic::x86_ssse3_phsub_d:
1753+
case Intrinsic::x86_ssse3_phsub_w:
1754+
case Intrinsic::x86_ssse3_phsub_d_128:
1755+
case Intrinsic::x86_ssse3_phsub_w_128:
1756+
case Intrinsic::x86_avx2_phsub_d:
1757+
case Intrinsic::x86_avx2_phsub_w: {
1758+
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1759+
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1760+
1761+
Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
1762+
.intersectWith(KnownBits::computeForAddSub(
1763+
false, false, false, Known2, Known2));
1764+
break;
1765+
}
1766+
case Intrinsic::x86_ssse3_phsub_sw:
1767+
case Intrinsic::x86_ssse3_phsub_sw_128:
1768+
case Intrinsic::x86_avx2_phsub_sw: {
1769+
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1770+
computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1771+
1772+
Known = KnownBits::ssub_sat(Known, Known)
1773+
.intersectWith(KnownBits::ssub_sat(Known2, Known2));
1774+
break;
1775+
}
17281776
case Intrinsic::riscv_vsetvli:
17291777
case Intrinsic::riscv_vsetvlimax: {
17301778
bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37262,6 +37262,27 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3726237262
}
3726337263
break;
3726437264
}
37265+
case X86ISD::HADD: {
37266+
Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
37267+
KnownBits Known2 =
37268+
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37269+
37270+
Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
37271+
.intersectWith(KnownBits::computeForAddSub(true, false, false,
37272+
Known2, Known2));
37273+
break;
37274+
}
37275+
case X86ISD::HSUB: {
37276+
Known =
37277+
DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
37278+
KnownBits Known2 =
37279+
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37280+
37281+
Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
37282+
.intersectWith(KnownBits::computeForAddSub(false, false, false,
37283+
Known2, Known2));
37284+
break;
37285+
}
3726537286
case ISD::INTRINSIC_WO_CHAIN: {
3726637287
switch (Op->getConstantOperandVal(0)) {
3726737288
case Intrinsic::x86_sse2_psad_bw:
@@ -37276,6 +37297,58 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3727637297
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
3727737298
break;
3727837299
}
37300+
case Intrinsic::x86_ssse3_phadd_d:
37301+
case Intrinsic::x86_ssse3_phadd_w:
37302+
case Intrinsic::x86_ssse3_phadd_d_128:
37303+
case Intrinsic::x86_ssse3_phadd_w_128:
37304+
case Intrinsic::x86_avx2_phadd_d:
37305+
case Intrinsic::x86_avx2_phadd_w: {
37306+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37307+
KnownBits Known2 =
37308+
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
37309+
37310+
Known = KnownBits::computeForAddSub(true, false, false, Known, Known)
37311+
.intersectWith(KnownBits::computeForAddSub(true, false, false,
37312+
Known2, Known2));
37313+
break;
37314+
}
37315+
case Intrinsic::x86_ssse3_phadd_sw:
37316+
case Intrinsic::x86_ssse3_phadd_sw_128:
37317+
case Intrinsic::x86_avx2_phadd_sw: {
37318+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37319+
KnownBits Known2 =
37320+
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
37321+
37322+
Known = KnownBits::sadd_sat(Known, Known)
37323+
.intersectWith(KnownBits::sadd_sat(Known2, Known2));
37324+
break;
37325+
}
37326+
case Intrinsic::x86_ssse3_phsub_d:
37327+
case Intrinsic::x86_ssse3_phsub_w:
37328+
case Intrinsic::x86_ssse3_phsub_d_128:
37329+
case Intrinsic::x86_ssse3_phsub_w_128:
37330+
case Intrinsic::x86_avx2_phsub_d:
37331+
case Intrinsic::x86_avx2_phsub_w: {
37332+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37333+
KnownBits Known2 =
37334+
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
37335+
37336+
Known = KnownBits::computeForAddSub(false, false, false, Known, Known)
37337+
.intersectWith(KnownBits::computeForAddSub(
37338+
false, false, false, Known2, Known2));
37339+
break;
37340+
}
37341+
case Intrinsic::x86_ssse3_phsub_sw:
37342+
case Intrinsic::x86_ssse3_phsub_sw_128:
37343+
case Intrinsic::x86_avx2_phsub_sw: {
37344+
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
37345+
KnownBits Known2 =
37346+
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
37347+
37348+
Known = KnownBits::ssub_sat(Known, Known)
37349+
.intersectWith(KnownBits::ssub_sat(Known2, Known2));
37350+
break;
37351+
}
3727937352
}
3728037353
break;
3728137354
}

llvm/test/Analysis/ValueTracking/knownbits-hadd-hsub.ll

Lines changed: 10 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,7 @@ define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
55
; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
66
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3>
9-
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3>
10-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
11-
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8>
12-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <4 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6>
13-
; CHECK-NEXT: ret <4 x i1> [[RET]]
8+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
149
;
1510
entry:
1611
%0 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
@@ -25,12 +20,7 @@ define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
2520
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
2621
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
2722
; CHECK-NEXT: entry:
28-
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
29-
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
30-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
31-
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
32-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
33-
; CHECK-NEXT: ret <8 x i1> [[RET]]
23+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
3424
;
3525
entry:
3626
%0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -45,12 +35,7 @@ define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
4535
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
4636
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
4737
; CHECK-NEXT: entry:
48-
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
49-
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
50-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
51-
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
52-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
53-
; CHECK-NEXT: ret <8 x i1> [[RET]]
38+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
5439
;
5540
entry:
5641
%0 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -65,12 +50,7 @@ define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
6550
; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
6651
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
6752
; CHECK-NEXT: entry:
68-
; CHECK-NEXT: [[TMP0:%.*]] = and <8 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
69-
; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
70-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
71-
; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i32> [[TMP2]], <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
72-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i32> [[TMP3]], <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
73-
; CHECK-NEXT: ret <8 x i1> [[RET]]
53+
; CHECK-NEXT: ret <8 x i1> zeroinitializer
7454
;
7555
entry:
7656
%0 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -85,12 +65,7 @@ define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
8565
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
8666
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
8767
; CHECK-NEXT: entry:
88-
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
89-
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
90-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
91-
; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
92-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
93-
; CHECK-NEXT: ret <16 x i1> [[RET]]
68+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
9469
;
9570
entry:
9671
%0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -105,12 +80,7 @@ define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
10580
; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
10681
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
10782
; CHECK-NEXT: entry:
108-
; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i16> [[X]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
109-
; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i16> [[Y]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
110-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
111-
; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i16> [[TMP2]], <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
112-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i16> [[TMP3]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
113-
; CHECK-NEXT: ret <16 x i1> [[RET]]
83+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
11484
;
11585
entry:
11686
%0 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -125,12 +95,7 @@ define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
12595
; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
12696
; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
12797
; CHECK-NEXT: entry:
128-
; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535>
129-
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535>
130-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
131-
; CHECK-NEXT: [[CONV:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
132-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <4 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6>
133-
; CHECK-NEXT: ret <4 x i1> [[RET]]
98+
; CHECK-NEXT: ret <4 x i1> zeroinitializer
13499
;
135100
entry:
136101
%0 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -145,12 +110,7 @@ define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
145110
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
146111
; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
147112
; CHECK-NEXT: entry:
148-
; CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
149-
; CHECK-NEXT: [[TMP1:%.*]] = or <8 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
150-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
151-
; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
152-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
153-
; CHECK-NEXT: ret <8 x i1> [[RET]]
113+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
154114
;
155115
entry:
156116
%0 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
@@ -185,12 +145,7 @@ define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
185145
; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
186146
; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
187147
; CHECK-NEXT: entry:
188-
; CHECK-NEXT: [[TMP0:%.*]] = or <8 x i32> [[X]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
189-
; CHECK-NEXT: [[TMP1:%.*]] = or <8 x i32> [[Y]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
190-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[TMP0]], <8 x i32> [[TMP1]])
191-
; CHECK-NEXT: [[CONV:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i16>
192-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <8 x i16> [[CONV]], <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
193-
; CHECK-NEXT: ret <8 x i1> [[RET]]
148+
; CHECK-NEXT: ret <8 x i1> zeroinitializer
194149
;
195150
entry:
196151
%0 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -205,12 +160,7 @@ define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
205160
; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
206161
; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
207162
; CHECK-NEXT: entry:
208-
; CHECK-NEXT: [[TMP0:%.*]] = or <16 x i16> [[X]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
209-
; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i16> [[Y]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
210-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[TMP0]], <16 x i16> [[TMP1]])
211-
; CHECK-NEXT: [[CONV:%.*]] = trunc <16 x i16> [[TMP2]] to <16 x i8>
212-
; CHECK-NEXT: [[RET:%.*]] = icmp eq <16 x i8> [[CONV]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
213-
; CHECK-NEXT: ret <16 x i1> [[RET]]
163+
; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
214164
;
215165
entry:
216166
%0 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>

0 commit comments

Comments
 (0)