Skip to content

Commit 3c42372

Browse files
committed
[X86] combineLoad - improve constant pool matches by ignoring undef elements
When trying to share constant pool entries, we can ignore the undef elements of the entry that is being removed
1 parent 3909310 commit 3c42372

17 files changed

+8709
-8700
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49951,6 +49951,18 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4995149951
Extract = DAG.getBitcast(RegVT, Extract);
4995249952
return DCI.CombineTo(N, Extract, SDValue(User, 1));
4995349953
}
49954+
auto MatchingBits = [](const APInt &Undefs, const APInt &UserUndefs,
49955+
ArrayRef<APInt> Bits, ArrayRef<APInt> UserBits) {
49956+
if (!UserUndefs.isSubsetOf(Undefs))
49957+
return false;
49958+
for (unsigned I = 0, E = Undefs.getBitWidth(); I != E; ++I) {
49959+
if (Undefs[I])
49960+
continue;
49961+
if (Bits[I] != UserBits[I])
49962+
return false;
49963+
}
49964+
return true;
49965+
};
4995449966
if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
4995549967
getTargetConstantFromBasePtr(Ptr)) {
4995649968
// See if we are loading a constant that has also been broadcast.
@@ -49961,7 +49973,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4996149973
UserBits)) {
4996249974
UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
4996349975
UserBits.truncate(Bits.size());
49964-
if (Bits == UserBits && UserUndefs.isSubsetOf(Undefs)) {
49976+
if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
4996549977
SDValue Extract = extractSubVector(
4996649978
SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
4996749979
Extract = DAG.getBitcast(RegVT, Extract);
@@ -49985,7 +49997,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4998549997
UserBits)) {
4998649998
UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
4998749999
UserBits.truncate(Bits.size());
49988-
if (Bits == UserBits && UserUndefs.isSubsetOf(Undefs)) {
50000+
if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
4998950001
SDValue Extract = extractSubVector(
4999050002
SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
4999150003
Extract = DAG.getBitcast(RegVT, Extract);

llvm/test/CodeGen/X86/splat-for-size.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,9 @@ define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
387387
define <8 x i64> @pr23259() #1 {
388388
; AVX-LABEL: pr23259:
389389
; AVX: # %bb.0: # %entry
390-
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1,1,1,1]
391-
; AVX-NEXT: vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
392-
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
393390
; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
391+
; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm1[2,3]
392+
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
394393
; AVX-NEXT: retq
395394
;
396395
; AVX2-LABEL: pr23259:

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -603,9 +603,10 @@ define void @load_i16_stride3_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
603603
; AVX2-ONLY-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm0
604604
; AVX2-ONLY-NEXT: vextracti128 $1, %ymm0, %xmm1
605605
; AVX2-ONLY-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10],ymm1[11],ymm0[12,13],ymm1[14],ymm0[15]
606-
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15,20,21,26,27,u,u,u,u,u,u,u,u,u,u,u,u]
607-
; AVX2-ONLY-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0],xmm3[1],xmm4[2,3],xmm3[4],xmm4[5,6],xmm3[7]
608-
; AVX2-ONLY-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,0,1,6,7,12,13,2,3,8,9,14,15]
606+
; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} ymm1 = <4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15,20,21,26,27,u,u,u,u,u,u,u,u,u,u,u,u>
607+
; AVX2-ONLY-NEXT: vpshufb %ymm1, %ymm0, %ymm0
608+
; AVX2-ONLY-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1],xmm4[2,3],xmm3[4],xmm4[5,6],xmm3[7]
609+
; AVX2-ONLY-NEXT: vpshufb %xmm1, %xmm3, %xmm1
609610
; AVX2-ONLY-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
610611
; AVX2-ONLY-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
611612
; AVX2-ONLY-NEXT: vmovdqa %ymm2, (%rsi)
@@ -646,9 +647,10 @@ define void @load_i16_stride3_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
646647
; AVX512F-NEXT: vpternlogq $202, %ymm2, %ymm1, %ymm0
647648
; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1
648649
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3],ymm0[4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10],ymm1[11],ymm0[12,13],ymm1[14],ymm0[15]
649-
; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15,20,21,26,27,u,u,u,u,u,u,u,u,u,u,u,u]
650-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0],xmm4[1],xmm5[2,3],xmm4[4],xmm5[5,6],xmm4[7]
651-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,0,1,6,7,12,13,2,3,8,9,14,15]
650+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = <4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15,20,21,26,27,u,u,u,u,u,u,u,u,u,u,u,u>
651+
; AVX512F-NEXT: vpshufb %ymm1, %ymm0, %ymm0
652+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0],xmm4[1],xmm5[2,3],xmm4[4],xmm5[5,6],xmm4[7]
653+
; AVX512F-NEXT: vpshufb %xmm1, %xmm2, %xmm1
652654
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
653655
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7]
654656
; AVX512F-NEXT: vmovdqa %ymm3, (%rsi)
@@ -1111,22 +1113,22 @@ define void @load_i16_stride3_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
11111113
;
11121114
; AVX512F-LABEL: load_i16_stride3_vf32:
11131115
; AVX512F: # %bb.0:
1114-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535]
1116+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535]
11151117
; AVX512F-NEXT: vmovdqa 128(%rdi), %ymm5
11161118
; AVX512F-NEXT: vmovdqa 160(%rdi), %ymm6
1117-
; AVX512F-NEXT: vmovdqa %ymm0, %ymm1
1118-
; AVX512F-NEXT: vpternlogq $202, %ymm5, %ymm6, %ymm1
1119-
; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
1120-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3],ymm2[4],ymm1[5,6],ymm2[7],ymm1[8],ymm2[9],ymm1[10,11],ymm2[12],ymm1[13,14],ymm2[15]
1121-
; AVX512F-NEXT: vpshufb {{.*#+}} ymm3 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,4,5,10,11,16,17,22,23,28,29,18,19,24,25,30,31,20,21,26,27]
1122-
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm1
1119+
; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
1120+
; AVX512F-NEXT: vpternlogq $202, %ymm5, %ymm6, %ymm0
1121+
; AVX512F-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,0,1]
1122+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3],ymm2[4],ymm0[5,6],ymm2[7],ymm0[8],ymm2[9],ymm0[10,11],ymm2[12],ymm0[13,14],ymm2[15]
1123+
; AVX512F-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,4,5,10,11,16,17,22,23,28,29,18,19,24,25,30,31,20,21,26,27]
1124+
; AVX512F-NEXT: vmovdqa 112(%rdi), %xmm0
11231125
; AVX512F-NEXT: vmovdqa 96(%rdi), %xmm2
1124-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm1[1],xmm2[2,3],xmm1[4],xmm2[5,6],xmm1[7]
1126+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm0[1],xmm2[2,3],xmm0[4],xmm2[5,6],xmm0[7]
11251127
; AVX512F-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,1,6,7,12,13,2,3,8,9,14,15,u,u,u,u]
11261128
; AVX512F-NEXT: vpblendd {{.*#+}} ymm7 = ymm4[0,1,2],ymm3[3,4,5,6,7]
11271129
; AVX512F-NEXT: vmovdqa (%rdi), %ymm8
11281130
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm9
1129-
; AVX512F-NEXT: vmovdqa %ymm0, %ymm3
1131+
; AVX512F-NEXT: vmovdqa %ymm1, %ymm3
11301132
; AVX512F-NEXT: vpternlogq $202, %ymm9, %ymm8, %ymm3
11311133
; AVX512F-NEXT: vpermq {{.*#+}} ymm4 = ymm3[2,3,0,1]
11321134
; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2,3],ymm4[4],ymm3[5,6],ymm4[7],ymm3[8],ymm4[9],ymm3[10,11],ymm4[12],ymm3[13,14],ymm4[15]
@@ -1140,48 +1142,49 @@ define void @load_i16_stride3_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, pt
11401142
; AVX512F-NEXT: vpshufhw {{.*#+}} xmm10 = xmm10[0,1,2,3,6,5,4,7]
11411143
; AVX512F-NEXT: vpblendd {{.*#+}} ymm10 = ymm10[0,1,2,3],ymm11[4,5,6,7]
11421144
; AVX512F-NEXT: vinserti64x4 $1, %ymm7, %zmm10, %zmm7
1143-
; AVX512F-NEXT: vmovdqa %ymm0, %ymm10
1145+
; AVX512F-NEXT: vmovdqa %ymm1, %ymm10
11441146
; AVX512F-NEXT: vpternlogq $202, %ymm6, %ymm5, %ymm10
11451147
; AVX512F-NEXT: vpermq {{.*#+}} ymm11 = ymm10[2,3,0,1]
11461148
; AVX512F-NEXT: vpblendw {{.*#+}} ymm10 = ymm10[0,1],ymm11[2],ymm10[3,4],ymm11[5],ymm10[6,7,8,9],ymm11[10],ymm10[11,12],ymm11[13],ymm10[14,15]
1147-
; AVX512F-NEXT: vpshufb {{.*#+}} ymm10 = ymm10[2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13,18,19,24,25,30,31,20,21,26,27,16,17,22,23,28,29]
1148-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm11 = xmm2[0,1],xmm1[2],xmm2[3,4],xmm1[5],xmm2[6,7]
1149-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm11 = xmm11[2,3,8,9,14,15,4,5,10,11,u,u,u,u,u,u]
1150-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm11 = xmm11[0,1,2,3,4],xmm10[5,6,7]
1151-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm10 = ymm11[0,1,2,3],ymm10[4,5,6,7]
1152-
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm11 = [65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535]
1153-
; AVX512F-NEXT: vmovdqa %ymm11, %ymm12
1154-
; AVX512F-NEXT: vpternlogq $202, %ymm8, %ymm9, %ymm12
1155-
; AVX512F-NEXT: vpermq {{.*#+}} ymm13 = ymm12[2,3,0,1]
1156-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm12 = ymm12[0,1],ymm13[2],ymm12[3,4],ymm13[5],ymm12[6,7,8,9],ymm13[10],ymm12[11,12],ymm13[13],ymm12[14,15]
1157-
; AVX512F-NEXT: vpshufb {{.*#+}} ymm12 = ymm12[2,3,8,9,14,15,4,5,12,13,10,11,0,1,6,7,18,19,24,25,30,31,20,21,28,29,26,27,16,17,22,23]
1158-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm13 = xmm3[0,1],xmm4[2],xmm3[3,4],xmm4[5],xmm3[6,7]
1159-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm13 = xmm13[u,u,u,u,u,u,4,5,10,11,0,1,6,7,12,13]
1160-
; AVX512F-NEXT: vinserti128 $1, %xmm13, %ymm0, %ymm13
1161-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm13 = ymm12[0,1,2],ymm13[3,4,5,6,7],ymm12[8,9,10],ymm13[11,12,13,14,15]
1162-
; AVX512F-NEXT: vpshufhw {{.*#+}} xmm12 = xmm12[0,1,2,3,5,6,7,4]
1163-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm12 = ymm12[0,1,2,3],ymm13[4,5,6,7]
1164-
; AVX512F-NEXT: vinserti64x4 $1, %ymm10, %zmm12, %zmm10
1165-
; AVX512F-NEXT: vpternlogq $202, %ymm5, %ymm6, %ymm11
1166-
; AVX512F-NEXT: vpermq {{.*#+}} ymm5 = ymm11[2,3,0,1]
1167-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm5 = ymm5[0],ymm11[1,2],ymm5[3],ymm11[4,5],ymm5[6],ymm11[7],ymm5[8],ymm11[9,10],ymm5[11],ymm11[12,13],ymm5[14],ymm11[15]
1149+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm11 = [2,3,8,9,14,15,4,5,10,11,0,1,6,7,12,13,18,19,24,25,30,31,20,21,26,27,16,17,22,23,28,29]
1150+
; AVX512F-NEXT: vpshufb %ymm11, %ymm10, %ymm10
1151+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm12 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
1152+
; AVX512F-NEXT: vpshufb %xmm11, %xmm12, %xmm12
1153+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm12 = xmm12[0,1,2,3,4],xmm10[5,6,7]
1154+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm10 = ymm12[0,1,2,3],ymm10[4,5,6,7]
1155+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm12 = [65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535,0,65535,65535]
1156+
; AVX512F-NEXT: vmovdqa %ymm12, %ymm13
1157+
; AVX512F-NEXT: vpternlogq $202, %ymm8, %ymm9, %ymm13
1158+
; AVX512F-NEXT: vpermq {{.*#+}} ymm14 = ymm13[2,3,0,1]
1159+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm13 = ymm13[0,1],ymm14[2],ymm13[3,4],ymm14[5],ymm13[6,7,8,9],ymm14[10],ymm13[11,12],ymm14[13],ymm13[14,15]
1160+
; AVX512F-NEXT: vpshufb {{.*#+}} ymm13 = ymm13[2,3,8,9,14,15,4,5,12,13,10,11,0,1,6,7,18,19,24,25,30,31,20,21,28,29,26,27,16,17,22,23]
1161+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm14 = xmm3[0,1],xmm4[2],xmm3[3,4],xmm4[5],xmm3[6,7]
1162+
; AVX512F-NEXT: vpshufb %xmm11, %xmm14, %xmm11
1163+
; AVX512F-NEXT: vinserti128 $1, %xmm11, %ymm0, %ymm11
1164+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm11 = ymm13[0,1,2],ymm11[3,4,5,6,7],ymm13[8,9,10],ymm11[11,12,13,14,15]
1165+
; AVX512F-NEXT: vpshufhw {{.*#+}} xmm13 = xmm13[0,1,2,3,5,6,7,4]
1166+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm11 = ymm13[0,1,2,3],ymm11[4,5,6,7]
1167+
; AVX512F-NEXT: vinserti64x4 $1, %ymm10, %zmm11, %zmm10
1168+
; AVX512F-NEXT: vpternlogq $202, %ymm5, %ymm6, %ymm12
1169+
; AVX512F-NEXT: vpermq {{.*#+}} ymm5 = ymm12[2,3,0,1]
1170+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm5 = ymm5[0],ymm12[1,2],ymm5[3],ymm12[4,5],ymm5[6],ymm12[7],ymm5[8],ymm12[9,10],ymm5[11],ymm12[12,13],ymm5[14],ymm12[15]
11681171
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [4,5,10,11,0,1,6,7,12,13,2,3,8,9,14,15,20,21,26,27,16,17,22,23,28,29,18,19,24,25,30,31]
11691172
; AVX512F-NEXT: vpshufb %ymm6, %ymm5, %ymm5
1170-
; AVX512F-NEXT: vpternlogq $202, %ymm8, %ymm9, %ymm0
1171-
; AVX512F-NEXT: vpermq {{.*#+}} ymm8 = ymm0[2,3,0,1]
1172-
; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm8[0],ymm0[1,2],ymm8[3],ymm0[4,5],ymm8[6],ymm0[7],ymm8[8],ymm0[9,10],ymm8[11],ymm0[12,13],ymm8[14],ymm0[15]
1173-
; AVX512F-NEXT: vpshufb %ymm6, %ymm0, %ymm0
1173+
; AVX512F-NEXT: vpternlogq $202, %ymm8, %ymm9, %ymm1
1174+
; AVX512F-NEXT: vpermq {{.*#+}} ymm8 = ymm1[2,3,0,1]
1175+
; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm8[0],ymm1[1,2],ymm8[3],ymm1[4,5],ymm8[6],ymm1[7],ymm8[8],ymm1[9,10],ymm8[11],ymm1[12,13],ymm8[14],ymm1[15]
1176+
; AVX512F-NEXT: vpshufb %ymm6, %ymm1, %ymm1
11741177
; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1],xmm4[2,3],xmm3[4],xmm4[5,6],xmm3[7]
1175-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u,u,u,0,1,6,7,12,13,2,3,8,9,14,15]
1178+
; AVX512F-NEXT: vpshufb %xmm6, %xmm3, %xmm3
11761179
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
1177-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm3[5,6,7]
1178-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3,4],xmm2[5],xmm1[6,7]
1179-
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,10,11,0,1,6,7,12,13,14,15,0,1,2,3]
1180-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1
1181-
; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm1
1182-
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4],xmm5[5,6,7]
1183-
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm5[4,5,6,7]
1184-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
1180+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm3[5,6,7]
1181+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2],xmm0[3,4],xmm2[5],xmm0[6,7]
1182+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,10,11,0,1,6,7,12,13,14,15,0,1,2,3]
1183+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1184+
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0
1185+
; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm5[5,6,7]
1186+
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm5[4,5,6,7]
1187+
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
11851188
; AVX512F-NEXT: vmovdqa64 %zmm7, (%rsi)
11861189
; AVX512F-NEXT: vmovdqa64 %zmm10, (%rdx)
11871190
; AVX512F-NEXT: vmovdqa64 %zmm0, (%rcx)

0 commit comments

Comments
 (0)