Skip to content

Commit bd034ab

Browse files
authored
[X86] combineX86ShuffleChain - always combine to a new VPERMV node if the root shuffle was a VPERMV node (#128183)
Similar to what we already do for VPERMV3 nodes - if we're trying to create a new unary variable shuffle and we started with a VPERMV node then always create a new one if it reduces the shuffle chain depth
1 parent a1163d8 commit bd034ab

6 files changed

+231
-233
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40090,8 +40090,9 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4009040090
bool AllowBWIVPERMV3 =
4009140091
(Depth >= BWIVPERMV3ShuffleDepth || HasSlowVariableMask);
4009240092

40093-
// If root was a VPERMV3 node, always allow a variable shuffle.
40094-
if (Root.getOpcode() == X86ISD::VPERMV3)
40093+
// If root was a VPERMV/VPERMV3 node, always allow a variable shuffle.
40094+
if ((UnaryShuffle && Root.getOpcode() == X86ISD::VPERMV) ||
40095+
Root.getOpcode() == X86ISD::VPERMV3)
4009540096
AllowVariableCrossLaneMask = AllowVariablePerLaneMask = true;
4009640097

4009740098
bool MaskContainsZeros = isAnyZero(Mask);

llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -588,20 +588,19 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
588588
; AVX2-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
589589
; AVX2-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
590590
; AVX2-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
591-
; AVX2-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
592-
; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm1 = [5,7,1,3,7,0,0,0]
593-
; AVX2-FCP-NEXT: vpermd %ymm4, %ymm1, %ymm1
594-
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm1[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
595-
; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
596-
; AVX2-FCP-NEXT: # ymm5 = mem[0,1,0,1]
597-
; AVX2-FCP-NEXT: vpermd %ymm0, %ymm5, %ymm5
598-
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,zero,zero,ymm5[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm5[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
599-
; AVX2-FCP-NEXT: vpor %ymm5, %ymm1, %ymm1
591+
; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,5,3,7,1,5,3,7]
592+
; AVX2-FCP-NEXT: # ymm1 = mem[0,1,0,1]
593+
; AVX2-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
594+
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
595+
; AVX2-FCP-NEXT: vpmovsxbd {{.*#+}} ymm5 = [5,7,1,3,7,0,0,0]
596+
; AVX2-FCP-NEXT: vpermd %ymm4, %ymm5, %ymm5
597+
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[0,1,4,5],zero,zero,zero,zero,zero,zero,ymm5[10,11,14,15,2,3,18,19],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
598+
; AVX2-FCP-NEXT: vpor %ymm1, %ymm5, %ymm1
600599
; AVX2-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
601600
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,4,5,6,7,4,5],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[24,25,20,21,22,23,20,21,28,29]
602601
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm4[0,1,8,9],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm4[2,3],zero,zero,ymm4[18,19,26,27],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
603602
; AVX2-FCP-NEXT: vpor %ymm2, %ymm3, %ymm2
604-
; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,2,4,6,0,2,4,6]
603+
; AVX2-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,4,2,6,0,4,2,6]
605604
; AVX2-FCP-NEXT: # ymm3 = mem[0,1,0,1]
606605
; AVX2-FCP-NEXT: vpermd %ymm0, %ymm3, %ymm0
607606
; AVX2-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u,u,u,u,u,u,u,18,19,22,23,26,27,u,u,u,u]
@@ -670,17 +669,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
670669
; AVX512-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
671670
; AVX512-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
672671
; AVX512-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
673-
; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
674-
; AVX512-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
675-
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
676-
; AVX512-FCP-NEXT: # ymm0 = mem[0,1,0,1]
677-
; AVX512-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
678-
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
679-
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
672+
; AVX512-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
673+
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
674+
; AVX512-FCP-NEXT: # ymm1 = mem[0,1,0,1]
675+
; AVX512-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
676+
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
677+
; AVX512-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
680678
; AVX512-FCP-NEXT: # ymm5 = mem[0,1,0,1]
681-
; AVX512-FCP-NEXT: vpermd %ymm1, %ymm5, %ymm1
682-
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
683-
; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
679+
; AVX512-FCP-NEXT: vpermd %ymm0, %ymm5, %ymm0
680+
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
681+
; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
684682
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
685683
; AVX512-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
686684
; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]
@@ -753,17 +751,16 @@ define void @store_i16_stride7_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
753751
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm4
754752
; AVX512DQ-FCP-NEXT: vmovq {{.*#+}} xmm5 = mem[0],zero
755753
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
756-
; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} ymm1 = [0,2,4,0]
757-
; AVX512DQ-FCP-NEXT: vpermi2q %ymm5, %ymm0, %ymm1
758-
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm0 = [0,2,4,6,0,2,4,6]
759-
; AVX512DQ-FCP-NEXT: # ymm0 = mem[0,1,0,1]
760-
; AVX512DQ-FCP-NEXT: vpermd %ymm1, %ymm0, %ymm0
761-
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero
762-
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,3,5,7,1,3,5,7]
754+
; AVX512DQ-FCP-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[2],ymm5[2]
755+
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
756+
; AVX512DQ-FCP-NEXT: # ymm1 = mem[0,1,0,1]
757+
; AVX512DQ-FCP-NEXT: vpermd %ymm0, %ymm1, %ymm1
758+
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero
759+
; AVX512DQ-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [1,5,3,7,1,5,3,7]
763760
; AVX512DQ-FCP-NEXT: # ymm5 = mem[0,1,0,1]
764-
; AVX512DQ-FCP-NEXT: vpermd %ymm1, %ymm5, %ymm1
765-
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,ymm1[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm1[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
766-
; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
761+
; AVX512DQ-FCP-NEXT: vpermd %ymm0, %ymm5, %ymm0
762+
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,4,5,8,9],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[18,19,22,23,26,27],zero,zero,zero,zero,zero,zero,zero,zero
763+
; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
767764
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm4[0,1,8,9],zero,zero,zero,zero,ymm4[u,u,u,u,u,u,2,3],zero,zero,ymm4[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero
768765
; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
769766
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,ymm2[26,27],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29]

llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,17 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
9595
; AVX2-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
9696
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9797
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
98-
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
99-
; AVX2-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,2,2,1,4,6,6,5]
100-
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,1]
101-
; AVX2-NEXT: vshufps {{.*#+}} ymm3 = ymm0[0,2,2,3,4,6,6,7]
102-
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,0,2]
103-
; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6],ymm1[7]
104-
; AVX2-NEXT: vmovaps {{.*#+}} xmm3 = [3,5,7,u]
105-
; AVX2-NEXT: vpermps %ymm2, %ymm3, %ymm2
106-
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,5,0,1,3,5,0,1]
98+
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
99+
; AVX2-NEXT: # ymm1 = mem[0,1,0,1]
100+
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm1
101+
; AVX2-NEXT: vshufps {{.*#+}} ymm3 = ymm2[0,2,2,1,4,6,6,5]
102+
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
103+
; AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6],ymm3[7]
104+
; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [5,3,0,1,5,3,0,1]
107105
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
108106
; AVX2-NEXT: vpermps %ymm0, %ymm3, %ymm0
107+
; AVX2-NEXT: vmovaps {{.*#+}} xmm3 = [3,5,7,u]
108+
; AVX2-NEXT: vpermps %ymm2, %ymm3, %ymm2
109109
; AVX2-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1,2],xmm0[3]
110110
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
111111
; AVX2-NEXT: vmovlps %xmm0, 48(%rax)
@@ -130,17 +130,17 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
130130
; AVX2-FP-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
131131
; AVX2-FP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
132132
; AVX2-FP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
133-
; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
134-
; AVX2-FP-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,2,2,1,4,6,6,5]
135-
; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,1]
136-
; AVX2-FP-NEXT: vshufps {{.*#+}} ymm3 = ymm0[0,2,2,3,4,6,6,7]
137-
; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,0,2]
138-
; AVX2-FP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6],ymm1[7]
139-
; AVX2-FP-NEXT: vmovaps {{.*#+}} xmm3 = [3,5,7,u]
140-
; AVX2-FP-NEXT: vpermps %ymm2, %ymm3, %ymm2
141-
; AVX2-FP-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,5,0,1,3,5,0,1]
133+
; AVX2-FP-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,4,2,6,0,4,2,6]
134+
; AVX2-FP-NEXT: # ymm1 = mem[0,1,0,1]
135+
; AVX2-FP-NEXT: vpermps %ymm0, %ymm1, %ymm1
136+
; AVX2-FP-NEXT: vshufps {{.*#+}} ymm3 = ymm2[0,2,2,1,4,6,6,5]
137+
; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,2,2,1]
138+
; AVX2-FP-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6],ymm3[7]
139+
; AVX2-FP-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [5,3,0,1,5,3,0,1]
142140
; AVX2-FP-NEXT: # ymm3 = mem[0,1,0,1]
143141
; AVX2-FP-NEXT: vpermps %ymm0, %ymm3, %ymm0
142+
; AVX2-FP-NEXT: vmovaps {{.*#+}} xmm3 = [3,5,7,u]
143+
; AVX2-FP-NEXT: vpermps %ymm2, %ymm3, %ymm2
144144
; AVX2-FP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1,2],xmm0[3]
145145
; AVX2-FP-NEXT: vextractf128 $1, %ymm0, %xmm0
146146
; AVX2-FP-NEXT: vmovlps %xmm0, 48(%rax)
@@ -165,23 +165,22 @@ define void @store_i32_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
165165
; AVX2-FCP-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
166166
; AVX2-FCP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
167167
; AVX2-FCP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
168-
; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
169-
; AVX2-FCP-NEXT: vmovaps {{.*#+}} xmm1 = [3,5,7,u]
170-
; AVX2-FCP-NEXT: vpermps %ymm2, %ymm1, %ymm1
171-
; AVX2-FCP-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,5,0,1,3,5,0,1]
172-
; AVX2-FCP-NEXT: # ymm3 = mem[0,1,0,1]
173-
; AVX2-FCP-NEXT: vpermps %ymm0, %ymm3, %ymm3
174-
; AVX2-FCP-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[3]
175-
; AVX2-FCP-NEXT: vmovaps {{.*#+}} ymm4 = [0,2,4,6,u,u,u,1]
176-
; AVX2-FCP-NEXT: vpermps %ymm2, %ymm4, %ymm2
177-
; AVX2-FCP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [0,2,4,0,0,2,4,0]
168+
; AVX2-FCP-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [5,3,0,1,5,3,0,1]
169+
; AVX2-FCP-NEXT: # ymm1 = mem[0,1,0,1]
170+
; AVX2-FCP-NEXT: vpermps %ymm0, %ymm1, %ymm1
171+
; AVX2-FCP-NEXT: vmovaps {{.*#+}} xmm3 = [3,5,7,u]
172+
; AVX2-FCP-NEXT: vpermps %ymm2, %ymm3, %ymm3
173+
; AVX2-FCP-NEXT: vblendps {{.*#+}} xmm3 = xmm3[0,1,2],xmm1[3]
174+
; AVX2-FCP-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [0,4,2,0,0,4,2,0]
178175
; AVX2-FCP-NEXT: # ymm4 = mem[0,1,0,1]
179176
; AVX2-FCP-NEXT: vpermps %ymm0, %ymm4, %ymm0
177+
; AVX2-FCP-NEXT: vmovaps {{.*#+}} ymm4 = [0,2,4,6,u,u,u,1]
178+
; AVX2-FCP-NEXT: vpermps %ymm2, %ymm4, %ymm2
180179
; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6],ymm2[7]
181-
; AVX2-FCP-NEXT: vextractf128 $1, %ymm3, %xmm2
182-
; AVX2-FCP-NEXT: vmovlps %xmm2, 48(%rax)
180+
; AVX2-FCP-NEXT: vextractf128 $1, %ymm1, %xmm1
181+
; AVX2-FCP-NEXT: vmovlps %xmm1, 48(%rax)
183182
; AVX2-FCP-NEXT: vmovaps %ymm0, (%rax)
184-
; AVX2-FCP-NEXT: vmovaps %xmm1, 32(%rax)
183+
; AVX2-FCP-NEXT: vmovaps %xmm3, 32(%rax)
185184
; AVX2-FCP-NEXT: vzeroupper
186185
; AVX2-FCP-NEXT: retq
187186
;

0 commit comments

Comments
 (0)