@@ -84,7 +84,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
84
84
;
85
85
; AVX512-LABEL: load_i64_stride3_vf2:
86
86
; AVX512: # %bb.0:
87
- ; AVX512-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
87
+ ; AVX512-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
88
88
; AVX512-NEXT: vmovdqa 32(%rdi), %xmm1
89
89
; AVX512-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
90
90
; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -97,9 +97,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
97
97
; AVX512-FCP-LABEL: load_i64_stride3_vf2:
98
98
; AVX512-FCP: # %bb.0:
99
99
; AVX512-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
100
- ; AVX512-FCP-NEXT: vmovaps (%rdi), %zmm1
101
- ; AVX512-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
102
- ; AVX512-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
100
+ ; AVX512-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
101
+ ; AVX512-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
103
102
; AVX512-FCP-NEXT: vmovaps 16(%rdi), %xmm2
104
103
; AVX512-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
105
104
; AVX512-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -110,7 +109,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
110
109
;
111
110
; AVX512DQ-LABEL: load_i64_stride3_vf2:
112
111
; AVX512DQ: # %bb.0:
113
- ; AVX512DQ-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
112
+ ; AVX512DQ-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
114
113
; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm1
115
114
; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
116
115
; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -123,9 +122,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
123
122
; AVX512DQ-FCP-LABEL: load_i64_stride3_vf2:
124
123
; AVX512DQ-FCP: # %bb.0:
125
124
; AVX512DQ-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
126
- ; AVX512DQ-FCP-NEXT: vmovaps (%rdi), %zmm1
127
- ; AVX512DQ-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
128
- ; AVX512DQ-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
125
+ ; AVX512DQ-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
126
+ ; AVX512DQ-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
129
127
; AVX512DQ-FCP-NEXT: vmovaps 16(%rdi), %xmm2
130
128
; AVX512DQ-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
131
129
; AVX512DQ-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -136,7 +134,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
136
134
;
137
135
; AVX512BW-LABEL: load_i64_stride3_vf2:
138
136
; AVX512BW: # %bb.0:
139
- ; AVX512BW-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
137
+ ; AVX512BW-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
140
138
; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm1
141
139
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
142
140
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -149,9 +147,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
149
147
; AVX512BW-FCP-LABEL: load_i64_stride3_vf2:
150
148
; AVX512BW-FCP: # %bb.0:
151
149
; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
152
- ; AVX512BW-FCP-NEXT: vmovaps (%rdi), %zmm1
153
- ; AVX512BW-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
154
- ; AVX512BW-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
150
+ ; AVX512BW-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
151
+ ; AVX512BW-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
155
152
; AVX512BW-FCP-NEXT: vmovaps 16(%rdi), %xmm2
156
153
; AVX512BW-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
157
154
; AVX512BW-FCP-NEXT: vmovaps %xmm1, (%rsi)
@@ -162,7 +159,7 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
162
159
;
163
160
; AVX512DQ-BW-LABEL: load_i64_stride3_vf2:
164
161
; AVX512DQ-BW: # %bb.0:
165
- ; AVX512DQ-BW-NEXT: vpermpd {{.*#+}} zmm0 = mem[0,3,2,3,4,7,6,7 ]
162
+ ; AVX512DQ-BW-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,3,2,3]
166
163
; AVX512DQ-BW-NEXT: vmovdqa 32(%rdi), %xmm1
167
164
; AVX512DQ-BW-NEXT: vpalignr {{.*#+}} xmm2 = mem[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
168
165
; AVX512DQ-BW-NEXT: vpblendd {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
@@ -175,9 +172,8 @@ define void @load_i64_stride3_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
175
172
; AVX512DQ-BW-FCP-LABEL: load_i64_stride3_vf2:
176
173
; AVX512DQ-BW-FCP: # %bb.0:
177
174
; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,4]
178
- ; AVX512DQ-BW-FCP-NEXT: vmovaps (%rdi), %zmm1
179
- ; AVX512DQ-BW-FCP-NEXT: vpermpd %zmm1, %zmm0, %zmm0
180
- ; AVX512DQ-BW-FCP-NEXT: vpermpd {{.*#+}} zmm1 = zmm1[0,3,2,3,4,7,6,7]
175
+ ; AVX512DQ-BW-FCP-NEXT: vpermpd (%rdi), %zmm0, %zmm0
176
+ ; AVX512DQ-BW-FCP-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,3,2,3]
181
177
; AVX512DQ-BW-FCP-NEXT: vmovaps 16(%rdi), %xmm2
182
178
; AVX512DQ-BW-FCP-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],mem[2,3]
183
179
; AVX512DQ-BW-FCP-NEXT: vmovaps %xmm1, (%rsi)
0 commit comments