Skip to content

Commit f3f4afe

Browse files
authored
[X86] matchUnaryShuffle - add support for matching 512-bit extension patterns. (#127643)
Handles both BWI and non-BWI cases (skips PMOV*XBW without BWI). The vector-interleaved-store-i8-stride-8.ll VPTERNLOG diffs are due to better value tracking now recognizing the zero-extension patterns where before it was any-extension
1 parent 8900e41 commit f3f4afe

File tree

3 files changed

+170
-232
lines changed

3 files changed

+170
-232
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38927,13 +38927,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
3892738927
}
3892838928

3892938929
// Match against a ANY/SIGN/ZERO_EXTEND_VECTOR_INREG instruction.
38930-
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
38931-
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
38932-
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
38930+
if (AllowIntDomain &&
38931+
((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
38932+
(MaskVT.is256BitVector() && Subtarget.hasInt256()) ||
38933+
(MaskVT.is512BitVector() && Subtarget.useAVX512Regs()))) {
3893338934
unsigned MaxScale = 64 / MaskEltSize;
3893438935
bool UseSign = V1.getScalarValueSizeInBits() == MaskEltSize &&
3893538936
DAG.ComputeNumSignBits(V1) == MaskEltSize;
3893638937
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
38938+
// Skip 512-bit VPMOV?XBW on non-AVX512BW targets.
38939+
if (Scale == 2 && MaskVT == MVT::v64i8 && !Subtarget.useBWIRegs())
38940+
continue;
3893738941
bool MatchAny = true;
3893838942
bool MatchZero = true;
3893938943
bool MatchSign = UseSign;

llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6905,7 +6905,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
69056905
; AVX512-NEXT: # ymm11 = mem[2,1,3,3,6,5,7,7]
69066906
; AVX512-NEXT: vinserti64x4 $1, %ymm11, %zmm0, %zmm0
69076907
; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm11 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
6908-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm22 = zmm0 ^ (zmm11 & (zmm22 ^ zmm0))
6908+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm22 = zmm22 | (zmm0 & ~zmm11)
69096909
; AVX512-NEXT: vpshufd $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
69106910
; AVX512-NEXT: # ymm0 = mem[0,0,2,1,4,4,6,5]
69116911
; AVX512-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
@@ -6927,7 +6927,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
69276927
; AVX512-NEXT: vpshufd $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload
69286928
; AVX512-NEXT: # ymm10 = mem[2,1,3,3,6,5,7,7]
69296929
; AVX512-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm0
6930-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm24 = zmm0 ^ (zmm11 & (zmm24 ^ zmm0))
6930+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm24 = zmm24 | (zmm0 & ~zmm11)
69316931
; AVX512-NEXT: vpshufd $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
69326932
; AVX512-NEXT: # ymm0 = mem[0,0,2,1,4,4,6,5]
69336933
; AVX512-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload
@@ -6944,7 +6944,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
69446944
; AVX512-NEXT: vpshufd {{.*#+}} ymm10 = ymm29[2,1,3,3,6,5,7,7]
69456945
; AVX512-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm0
69466946
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm10 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
6947-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm10 = zmm0 ^ (zmm11 & (zmm10 ^ zmm0))
6947+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm10 = zmm10 | (zmm0 & ~zmm11)
69486948
; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm25[0,0,2,1,4,4,6,5]
69496949
; AVX512-NEXT: vpshufd {{.*#+}} ymm1 = ymm23[0,2,2,3,4,6,6,7]
69506950
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -6968,7 +6968,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
69686968
; AVX512-NEXT: vmovdqa 32(%rdi), %xmm0
69696969
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
69706970
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm4 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
6971-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm4 = zmm1 ^ (zmm11 & (zmm4 ^ zmm1))
6971+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm1 & ~zmm11)
69726972
; AVX512-NEXT: vpshufd {{.*#+}} ymm1 = ymm20[0,0,2,1,4,4,6,5]
69736973
; AVX512-NEXT: vpshufd {{.*#+}} ymm3 = ymm18[0,2,2,3,4,6,6,7]
69746974
; AVX512-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
@@ -7035,7 +7035,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
70357035
; AVX512-NEXT: # ymm25 = mem[2,1,3,3,6,5,7,7]
70367036
; AVX512-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
70377037
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7038-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = zmm14 ^ (zmm11 & (zmm0 ^ zmm14))
7038+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm14 & ~zmm11)
70397039
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero,xmm8[4],zero,zero,zero,xmm8[5],zero,zero,zero,xmm8[6],zero,zero,zero,xmm8[7],zero,zero,zero
70407040
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero
70417041
; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm12 = xmm12[0],zero,zero,zero,xmm12[1],zero,zero,zero,xmm12[2],zero,zero,zero,xmm12[3],zero,zero,zero,xmm12[4],zero,zero,zero,xmm12[5],zero,zero,zero,xmm12[6],zero,zero,zero,xmm12[7],zero,zero,zero
@@ -7057,7 +7057,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
70577057
; AVX512-NEXT: vpshufd $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm25 # 32-byte Folded Reload
70587058
; AVX512-NEXT: # ymm25 = mem[2,1,3,3,6,5,7,7]
70597059
; AVX512-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
7060-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm8 = zmm14 ^ (zmm11 & (zmm8 ^ zmm14))
7060+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm8 = zmm8 | (zmm14 & ~zmm11)
70617061
; AVX512-NEXT: vpshufd {{.*#+}} ymm14 = ymm30[0,0,2,1,4,4,6,5]
70627062
; AVX512-NEXT: vpshufd {{.*#+}} ymm25 = ymm31[0,2,2,3,4,6,6,7]
70637063
; AVX512-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
@@ -7070,7 +7070,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
70707070
; AVX512-NEXT: vpshufd {{.*#+}} ymm14 = ymm19[0,1,1,3,4,5,5,7]
70717071
; AVX512-NEXT: vpshufd {{.*#+}} ymm17 = ymm20[2,1,3,3,6,5,7,7]
70727072
; AVX512-NEXT: vinserti64x4 $1, %ymm17, %zmm14, %zmm14
7073-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm6 = zmm14 ^ (zmm11 & (zmm6 ^ zmm14))
7073+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm6 = zmm6 | (zmm14 & ~zmm11)
70747074
; AVX512-NEXT: vpshufd {{.*#+}} ymm14 = ymm23[0,0,2,1,4,4,6,5]
70757075
; AVX512-NEXT: vpshufd {{.*#+}} ymm9 = ymm9[0,2,2,3,4,6,6,7]
70767076
; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm14, %zmm9
@@ -7083,7 +7083,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
70837083
; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm13[0,1,1,3,4,5,5,7]
70847084
; AVX512-NEXT: vpshufd {{.*#+}} ymm3 = ymm15[2,1,3,3,6,5,7,7]
70857085
; AVX512-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
7086-
; AVX512-NEXT: vpternlogq {{.*#+}} zmm12 = zmm2 ^ (zmm11 & (zmm12 ^ zmm2))
7086+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm12 = zmm12 | (zmm2 & ~zmm11)
70877087
; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm16[0,0,2,1,4,4,6,5]
70887088
; AVX512-NEXT: vpshufd {{.*#+}} ymm3 = ymm18[0,2,2,3,4,6,6,7]
70897089
; AVX512-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
@@ -7589,7 +7589,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
75897589
; AVX512DQ-NEXT: # ymm11 = mem[2,1,3,3,6,5,7,7]
75907590
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm11, %zmm0, %zmm0
75917591
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} zmm11 = [65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535,65535,0,65535,65535]
7592-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm22 = zmm0 ^ (zmm11 & (zmm22 ^ zmm0))
7592+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm22 = zmm22 | (zmm0 & ~zmm11)
75937593
; AVX512DQ-NEXT: vpshufd $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
75947594
; AVX512DQ-NEXT: # ymm0 = mem[0,0,2,1,4,4,6,5]
75957595
; AVX512DQ-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm7 # 32-byte Folded Reload
@@ -7611,7 +7611,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
76117611
; AVX512DQ-NEXT: vpshufd $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload
76127612
; AVX512DQ-NEXT: # ymm10 = mem[2,1,3,3,6,5,7,7]
76137613
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm0
7614-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm24 = zmm0 ^ (zmm11 & (zmm24 ^ zmm0))
7614+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm24 = zmm24 | (zmm0 & ~zmm11)
76157615
; AVX512DQ-NEXT: vpshufd $96, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
76167616
; AVX512DQ-NEXT: # ymm0 = mem[0,0,2,1,4,4,6,5]
76177617
; AVX512DQ-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %ymm10 # 32-byte Folded Reload
@@ -7628,7 +7628,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
76287628
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm10 = ymm29[2,1,3,3,6,5,7,7]
76297629
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm10, %zmm0, %zmm0
76307630
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm10 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
7631-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm10 = zmm0 ^ (zmm11 & (zmm10 ^ zmm0))
7631+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm10 = zmm10 | (zmm0 & ~zmm11)
76327632
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm25[0,0,2,1,4,4,6,5]
76337633
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm1 = ymm23[0,2,2,3,4,6,6,7]
76347634
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -7652,7 +7652,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
76527652
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm0
76537653
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3],xmm0[4],xmm14[4],xmm0[5],xmm14[5],xmm0[6],xmm14[6],xmm0[7],xmm14[7]
76547654
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm4 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
7655-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm1 ^ (zmm11 & (zmm4 ^ zmm1))
7655+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm1 & ~zmm11)
76567656
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm1 = ymm20[0,0,2,1,4,4,6,5]
76577657
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm3 = ymm18[0,2,2,3,4,6,6,7]
76587658
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
@@ -7719,7 +7719,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
77197719
; AVX512DQ-NEXT: # ymm25 = mem[2,1,3,3,6,5,7,7]
77207720
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
77217721
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7722-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = zmm14 ^ (zmm11 & (zmm0 ^ zmm14))
7722+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 | (zmm14 & ~zmm11)
77237723
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero,xmm8[4],zero,zero,zero,xmm8[5],zero,zero,zero,xmm8[6],zero,zero,zero,xmm8[7],zero,zero,zero
77247724
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero,xmm6[4],zero,zero,zero,xmm6[5],zero,zero,zero,xmm6[6],zero,zero,zero,xmm6[7],zero,zero,zero
77257725
; AVX512DQ-NEXT: vpmovzxwq {{.*#+}} zmm12 = xmm12[0],zero,zero,zero,xmm12[1],zero,zero,zero,xmm12[2],zero,zero,zero,xmm12[3],zero,zero,zero,xmm12[4],zero,zero,zero,xmm12[5],zero,zero,zero,xmm12[6],zero,zero,zero,xmm12[7],zero,zero,zero
@@ -7741,7 +7741,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
77417741
; AVX512DQ-NEXT: vpshufd $246, {{[-0-9]+}}(%r{{[sb]}}p), %ymm25 # 32-byte Folded Reload
77427742
; AVX512DQ-NEXT: # ymm25 = mem[2,1,3,3,6,5,7,7]
77437743
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
7744-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm8 = zmm14 ^ (zmm11 & (zmm8 ^ zmm14))
7744+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm8 = zmm8 | (zmm14 & ~zmm11)
77457745
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm14 = ymm30[0,0,2,1,4,4,6,5]
77467746
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm25 = ymm31[0,2,2,3,4,6,6,7]
77477747
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm25, %zmm14, %zmm14
@@ -7754,7 +7754,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
77547754
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm14 = ymm19[0,1,1,3,4,5,5,7]
77557755
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm17 = ymm20[2,1,3,3,6,5,7,7]
77567756
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm17, %zmm14, %zmm14
7757-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm6 = zmm14 ^ (zmm11 & (zmm6 ^ zmm14))
7757+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm6 = zmm6 | (zmm14 & ~zmm11)
77587758
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm14 = ymm23[0,0,2,1,4,4,6,5]
77597759
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm9 = ymm9[0,2,2,3,4,6,6,7]
77607760
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm9, %zmm14, %zmm9
@@ -7767,7 +7767,7 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
77677767
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm13[0,1,1,3,4,5,5,7]
77687768
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm3 = ymm15[2,1,3,3,6,5,7,7]
77697769
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
7770-
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm12 = zmm2 ^ (zmm11 & (zmm12 ^ zmm2))
7770+
; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm12 = zmm12 | (zmm2 & ~zmm11)
77717771
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm16[0,0,2,1,4,4,6,5]
77727772
; AVX512DQ-NEXT: vpshufd {{.*#+}} ymm3 = ymm18[0,2,2,3,4,6,6,7]
77737773
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2

0 commit comments

Comments
 (0)