@@ -79,24 +79,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
79
79
; AVX2-NEXT: vmovaps (%r8), %xmm2
80
80
; AVX2-NEXT: vmovaps (%r9), %xmm3
81
81
; AVX2-NEXT: vmovaps (%r10), %xmm4
82
- ; AVX2-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
82
+ ; AVX2-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
83
83
; AVX2-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
84
- ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
85
- ; AVX2-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
86
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
84
+ ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
85
+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
86
+ ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
87
87
; AVX2-NEXT: vbroadcastsd %xmm4, %ymm6
88
- ; AVX2-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
89
- ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
88
+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
89
+ ; AVX2-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
90
90
; AVX2-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
91
91
; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
92
92
; AVX2-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
93
- ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
94
- ; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
95
- ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
96
- ; AVX2-NEXT: vmovaps %xmm1 , 96(%rax)
97
- ; AVX2-NEXT: vmovaps %ymm0 , (%rax)
93
+ ; AVX2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
94
+ ; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
95
+ ; AVX2-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
96
+ ; AVX2-NEXT: vmovaps %xmm3 , 96(%rax)
97
+ ; AVX2-NEXT: vmovaps %ymm1 , (%rax)
98
98
; AVX2-NEXT: vmovaps %ymm2, 64(%rax)
99
- ; AVX2-NEXT: vmovaps %ymm5 , 32(%rax)
99
+ ; AVX2-NEXT: vmovaps %ymm0 , 32(%rax)
100
100
; AVX2-NEXT: vzeroupper
101
101
; AVX2-NEXT: retq
102
102
;
@@ -109,24 +109,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
109
109
; AVX2-FP-NEXT: vmovaps (%r8), %xmm2
110
110
; AVX2-FP-NEXT: vmovaps (%r9), %xmm3
111
111
; AVX2-FP-NEXT: vmovaps (%r10), %xmm4
112
- ; AVX2-FP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
112
+ ; AVX2-FP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
113
113
; AVX2-FP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
114
- ; AVX2-FP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
115
- ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
116
- ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
114
+ ; AVX2-FP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
115
+ ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
116
+ ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
117
117
; AVX2-FP-NEXT: vbroadcastsd %xmm4, %ymm6
118
- ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
119
- ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
118
+ ; AVX2-FP-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
119
+ ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
120
120
; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
121
121
; AVX2-FP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
122
122
; AVX2-FP-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
123
- ; AVX2-FP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
124
- ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
125
- ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
126
- ; AVX2-FP-NEXT: vmovaps %xmm1 , 96(%rax)
127
- ; AVX2-FP-NEXT: vmovaps %ymm0 , (%rax)
123
+ ; AVX2-FP-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
124
+ ; AVX2-FP-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
125
+ ; AVX2-FP-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
126
+ ; AVX2-FP-NEXT: vmovaps %xmm3 , 96(%rax)
127
+ ; AVX2-FP-NEXT: vmovaps %ymm1 , (%rax)
128
128
; AVX2-FP-NEXT: vmovaps %ymm2, 64(%rax)
129
- ; AVX2-FP-NEXT: vmovaps %ymm5 , 32(%rax)
129
+ ; AVX2-FP-NEXT: vmovaps %ymm0 , 32(%rax)
130
130
; AVX2-FP-NEXT: vzeroupper
131
131
; AVX2-FP-NEXT: retq
132
132
;
@@ -139,24 +139,24 @@ define void @store_i64_stride7_vf2(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
139
139
; AVX2-FCP-NEXT: vmovaps (%r8), %xmm2
140
140
; AVX2-FCP-NEXT: vmovaps (%r9), %xmm3
141
141
; AVX2-FCP-NEXT: vmovaps (%r10), %xmm4
142
- ; AVX2-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm0
142
+ ; AVX2-FCP-NEXT: vinsertf128 $1, (%rsi), %ymm0, %ymm5
143
143
; AVX2-FCP-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm1
144
- ; AVX2-FCP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm5
145
- ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1],ymm0[2,3],ymm5 [4,5,6,7]
146
- ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm5 = ymm5 [0,2,2,1]
144
+ ; AVX2-FCP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm6
145
+ ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm0 = ymm6 [0,1],ymm0[2,3],ymm6 [4,5,6,7]
146
+ ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,2,1]
147
147
; AVX2-FCP-NEXT: vbroadcastsd %xmm4, %ymm6
148
- ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm5 = ymm5 [0,1,2,3],ymm6[4,5],ymm5 [6,7]
149
- ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm0 [1],ymm1[1],ymm0 [3],ymm1[3]
148
+ ; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm0 = ymm0 [0,1,2,3],ymm6[4,5],ymm0 [6,7]
149
+ ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm5 [1],ymm1[1],ymm5 [3],ymm1[3]
150
150
; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm6 = ymm6[2,1,3,3]
151
151
; AVX2-FCP-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2
152
152
; AVX2-FCP-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
153
- ; AVX2-FCP-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0 [0],ymm1[0],ymm0 [2],ymm1[2]
154
- ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm0 = ymm0 [0,2,1,3]
155
- ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm3[1],xmm4[1]
156
- ; AVX2-FCP-NEXT: vmovaps %xmm1 , 96(%rax)
157
- ; AVX2-FCP-NEXT: vmovaps %ymm0 , (%rax)
153
+ ; AVX2-FCP-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm5 [0],ymm1[0],ymm5 [2],ymm1[2]
154
+ ; AVX2-FCP-NEXT: vpermpd {{.*#+}} ymm1 = ymm1 [0,2,1,3]
155
+ ; AVX2-FCP-NEXT: vunpckhpd {{.*#+}} xmm3 = xmm3[1],xmm4[1]
156
+ ; AVX2-FCP-NEXT: vmovaps %xmm3 , 96(%rax)
157
+ ; AVX2-FCP-NEXT: vmovaps %ymm1 , (%rax)
158
158
; AVX2-FCP-NEXT: vmovaps %ymm2, 64(%rax)
159
- ; AVX2-FCP-NEXT: vmovaps %ymm5 , 32(%rax)
159
+ ; AVX2-FCP-NEXT: vmovaps %ymm0 , 32(%rax)
160
160
; AVX2-FCP-NEXT: vzeroupper
161
161
; AVX2-FCP-NEXT: retq
162
162
;
0 commit comments