Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit d8ea264

Browse files
author
Uriel Korach
committed
[X86][AVX512] Improve lowering of AVX512 test intrinsics
Added TESTM and TESTNM to the list of instructions that already zeroing unused upper bits and does not need the redundant shift left and shift right instructions afterwards. Added a pattern for TESTM and TESTNM in iselLowering, so now icmp(neq,and(X,Y), 0) goes folds into TESTM and icmp(eq,and(X,Y), 0) goes folds into TESTNM This commit is a preparation for lowering the test and testn X86 intrinsics to IR. Differential Revision: https://reviews.llvm.org/D38732 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317465 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 16b230f commit d8ea264

12 files changed

+97
-264
lines changed

lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,10 +449,10 @@ namespace {
449449
// Returns true if this masked compare can be implemented legally with this
450450
// type.
451451
static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
452-
if (N->getOpcode() == X86ISD::PCMPEQM ||
453-
N->getOpcode() == X86ISD::PCMPGTM ||
454-
N->getOpcode() == X86ISD::CMPM ||
455-
N->getOpcode() == X86ISD::CMPMU) {
452+
unsigned Opcode = N->getOpcode();
453+
if (Opcode == X86ISD::PCMPEQM || Opcode == X86ISD::PCMPGTM ||
454+
Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM ||
455+
Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU) {
456456
// We can get 256-bit 8 element types here without VLX being enabled. When
457457
// this happens we will use 512-bit operations and the mask will not be
458458
// zero extended.

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4998,6 +4998,8 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
49984998
switch (Opcode) {
49994999
default:
50005000
return false;
5001+
case X86ISD::TESTM:
5002+
case X86ISD::TESTNM:
50015003
case X86ISD::PCMPEQM:
50025004
case X86ISD::PCMPGTM:
50035005
case X86ISD::CMPM:
@@ -17469,6 +17471,20 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
1746917471

1747017472
if (Swap)
1747117473
std::swap(Op0, Op1);
17474+
17475+
// See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM.
17476+
if ((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) {
17477+
SDValue A = peekThroughBitcasts(Op0);
17478+
if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) &&
17479+
ISD::isBuildVectorAllZeros(Op1.getNode())) {
17480+
MVT VT0 = Op0.getSimpleValueType();
17481+
SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0));
17482+
SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1));
17483+
return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM,
17484+
dl, VT, RHS, LHS);
17485+
}
17486+
}
17487+
1747217488
if (Opc)
1747317489
return DAG.getNode(Opc, dl, VT, Op0, Op1);
1747417490
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;

test/CodeGen/X86/avx512-skx-insert-subvec.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,6 @@ define <8 x i1> @test3(<4 x i1> %a) {
4646
; CHECK: # BB#0:
4747
; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
4848
; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0
49-
; CHECK-NEXT: kshiftlb $4, %k0, %k0
50-
; CHECK-NEXT: kshiftrb $4, %k0, %k0
5149
; CHECK-NEXT: vpmovm2w %k0, %xmm0
5250
; CHECK-NEXT: retq
5351

test/CodeGen/X86/avx512bw-vec-test-testn.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
define zeroext i32 @TEST_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
66
; CHECK-LABEL: TEST_mm512_test_epi16_mask:
77
; CHECK: # BB#0: # %entry
8-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
9-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
10-
; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
8+
; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0
119
; CHECK-NEXT: kmovd %k0, %eax
1210
; CHECK-NEXT: vzeroupper
1311
; CHECK-NEXT: retq
@@ -24,9 +22,7 @@ entry:
2422
define zeroext i64 @TEST_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
2523
; CHECK-LABEL: TEST_mm512_test_epi8_mask:
2624
; CHECK: # BB#0: # %entry
27-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
28-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
29-
; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
25+
; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0
3026
; CHECK-NEXT: kmovq %k0, %rax
3127
; CHECK-NEXT: vzeroupper
3228
; CHECK-NEXT: retq
@@ -42,10 +38,8 @@ entry:
4238
define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
4339
; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask:
4440
; CHECK: # BB#0: # %entry
45-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
46-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
4741
; CHECK-NEXT: kmovd %edi, %k1
48-
; CHECK-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
42+
; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
4943
; CHECK-NEXT: kmovd %k0, %eax
5044
; CHECK-NEXT: vzeroupper
5145
; CHECK-NEXT: retq
@@ -63,10 +57,8 @@ entry:
6357
define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
6458
; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask:
6559
; CHECK: # BB#0: # %entry
66-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
67-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
6860
; CHECK-NEXT: kmovq %rdi, %k1
69-
; CHECK-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
61+
; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
7062
; CHECK-NEXT: kmovq %k0, %rax
7163
; CHECK-NEXT: vzeroupper
7264
; CHECK-NEXT: retq
@@ -84,9 +76,7 @@ entry:
8476
define zeroext i32 @TEST_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
8577
; CHECK-LABEL: TEST_mm512_testn_epi16_mask:
8678
; CHECK: # BB#0: # %entry
87-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
88-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
89-
; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
79+
; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0
9080
; CHECK-NEXT: kmovd %k0, %eax
9181
; CHECK-NEXT: vzeroupper
9282
; CHECK-NEXT: retq
@@ -103,9 +93,7 @@ entry:
10393
define zeroext i64 @TEST_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
10494
; CHECK-LABEL: TEST_mm512_testn_epi8_mask:
10595
; CHECK: # BB#0: # %entry
106-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
107-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
108-
; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
96+
; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0
10997
; CHECK-NEXT: kmovq %k0, %rax
11098
; CHECK-NEXT: vzeroupper
11199
; CHECK-NEXT: retq
@@ -121,10 +109,8 @@ entry:
121109
define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
122110
; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask:
123111
; CHECK: # BB#0: # %entry
124-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
125-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
126112
; CHECK-NEXT: kmovd %edi, %k1
127-
; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
113+
; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
128114
; CHECK-NEXT: kmovd %k0, %eax
129115
; CHECK-NEXT: vzeroupper
130116
; CHECK-NEXT: retq
@@ -142,10 +128,8 @@ entry:
142128
define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
143129
; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask:
144130
; CHECK: # BB#0: # %entry
145-
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
146-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
147131
; CHECK-NEXT: kmovq %rdi, %k1
148-
; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
132+
; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
149133
; CHECK-NEXT: kmovq %k0, %rax
150134
; CHECK-NEXT: vzeroupper
151135
; CHECK-NEXT: retq

test/CodeGen/X86/avx512bwvl-vec-test-testn.ll

Lines changed: 16 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@
55
define zeroext i16 @TEST_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
66
; CHECK-LABEL: TEST_mm_test_epi8_mask:
77
; CHECK: # BB#0: # %entry
8-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
9-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
10-
; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0
8+
; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0
119
; CHECK-NEXT: kmovd %k0, %eax
1210
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
1311
; CHECK-NEXT: retq
@@ -23,10 +21,8 @@ entry:
2321
define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
2422
; CHECK-LABEL: TEST_mm_mask_test_epi8_mask:
2523
; CHECK: # BB#0: # %entry
26-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
27-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
2824
; CHECK-NEXT: kmovd %edi, %k1
29-
; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k0 {%k1}
25+
; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
3026
; CHECK-NEXT: kmovd %k0, %eax
3127
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
3228
; CHECK-NEXT: retq
@@ -44,9 +40,7 @@ entry:
4440
define zeroext i8 @TEST_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
4541
; CHECK-LABEL: TEST_mm_test_epi16_mask:
4642
; CHECK: # BB#0: # %entry
47-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
48-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
49-
; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0
43+
; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0
5044
; CHECK-NEXT: kmovd %k0, %eax
5145
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
5246
; CHECK-NEXT: retq
@@ -62,10 +56,8 @@ entry:
6256
define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
6357
; CHECK-LABEL: TEST_mm_mask_test_epi16_mask:
6458
; CHECK: # BB#0: # %entry
65-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
66-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
6759
; CHECK-NEXT: kmovd %edi, %k1
68-
; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k0 {%k1}
60+
; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
6961
; CHECK-NEXT: kmovd %k0, %eax
7062
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
7163
; CHECK-NEXT: retq
@@ -83,9 +75,7 @@ entry:
8375
define zeroext i16 @TEST_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
8476
; CHECK-LABEL: TEST_mm_testn_epi8_mask:
8577
; CHECK: # BB#0: # %entry
86-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
87-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
88-
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
78+
; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0
8979
; CHECK-NEXT: kmovd %k0, %eax
9080
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
9181
; CHECK-NEXT: retq
@@ -101,10 +91,8 @@ entry:
10191
define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
10292
; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask:
10393
; CHECK: # BB#0: # %entry
104-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
105-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
10694
; CHECK-NEXT: kmovd %edi, %k1
107-
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
95+
; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
10896
; CHECK-NEXT: kmovd %k0, %eax
10997
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
11098
; CHECK-NEXT: retq
@@ -122,9 +110,7 @@ entry:
122110
define zeroext i8 @TEST_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
123111
; CHECK-LABEL: TEST_mm_testn_epi16_mask:
124112
; CHECK: # BB#0: # %entry
125-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
126-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
127-
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
113+
; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0
128114
; CHECK-NEXT: kmovd %k0, %eax
129115
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
130116
; CHECK-NEXT: retq
@@ -140,10 +126,8 @@ entry:
140126
define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
141127
; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask:
142128
; CHECK: # BB#0: # %entry
143-
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
144-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
145129
; CHECK-NEXT: kmovd %edi, %k1
146-
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
130+
; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
147131
; CHECK-NEXT: kmovd %k0, %eax
148132
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
149133
; CHECK-NEXT: retq
@@ -161,9 +145,7 @@ entry:
161145
define i32 @TEST_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
162146
; CHECK-LABEL: TEST_mm256_test_epi8_mask:
163147
; CHECK: # BB#0: # %entry
164-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
165-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
166-
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0
148+
; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0
167149
; CHECK-NEXT: kmovd %k0, %eax
168150
; CHECK-NEXT: vzeroupper
169151
; CHECK-NEXT: retq
@@ -179,10 +161,8 @@ entry:
179161
define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
180162
; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask:
181163
; CHECK: # BB#0: # %entry
182-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
183-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
184164
; CHECK-NEXT: kmovd %edi, %k1
185-
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1}
165+
; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
186166
; CHECK-NEXT: kmovd %k0, %eax
187167
; CHECK-NEXT: vzeroupper
188168
; CHECK-NEXT: retq
@@ -200,9 +180,7 @@ entry:
200180
define zeroext i16 @TEST_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
201181
; CHECK-LABEL: TEST_mm256_test_epi16_mask:
202182
; CHECK: # BB#0: # %entry
203-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
204-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
205-
; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0
183+
; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0
206184
; CHECK-NEXT: kmovd %k0, %eax
207185
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
208186
; CHECK-NEXT: vzeroupper
@@ -219,10 +197,8 @@ entry:
219197
define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
220198
; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask:
221199
; CHECK: # BB#0: # %entry
222-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
223-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
224200
; CHECK-NEXT: kmovd %edi, %k1
225-
; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k0 {%k1}
201+
; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
226202
; CHECK-NEXT: kmovd %k0, %eax
227203
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
228204
; CHECK-NEXT: vzeroupper
@@ -241,9 +217,7 @@ entry:
241217
define i32 @TEST_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
242218
; CHECK-LABEL: TEST_mm256_testn_epi8_mask:
243219
; CHECK: # BB#0: # %entry
244-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
245-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
246-
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
220+
; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0
247221
; CHECK-NEXT: kmovd %k0, %eax
248222
; CHECK-NEXT: vzeroupper
249223
; CHECK-NEXT: retq
@@ -259,10 +233,8 @@ entry:
259233
define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
260234
; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask:
261235
; CHECK: # BB#0: # %entry
262-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
263-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
264236
; CHECK-NEXT: kmovd %edi, %k1
265-
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
237+
; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
266238
; CHECK-NEXT: kmovd %k0, %eax
267239
; CHECK-NEXT: vzeroupper
268240
; CHECK-NEXT: retq
@@ -280,9 +252,7 @@ entry:
280252
define zeroext i16 @TEST_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
281253
; CHECK-LABEL: TEST_mm256_testn_epi16_mask:
282254
; CHECK: # BB#0: # %entry
283-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
284-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
285-
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
255+
; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0
286256
; CHECK-NEXT: kmovd %k0, %eax
287257
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
288258
; CHECK-NEXT: vzeroupper
@@ -299,10 +269,8 @@ entry:
299269
define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
300270
; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask:
301271
; CHECK: # BB#0: # %entry
302-
; CHECK-NEXT: vpand %ymm0, %ymm1, %ymm0
303-
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
304272
; CHECK-NEXT: kmovd %edi, %k1
305-
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
273+
; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
306274
; CHECK-NEXT: kmovd %k0, %eax
307275
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
308276
; CHECK-NEXT: vzeroupper

0 commit comments

Comments
 (0)