@@ -1105,19 +1105,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
1105
1105
; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1
1106
1106
; AVX512-VL-NEXT: vmovdqa64 128(%rdi), %zmm2
1107
1107
; AVX512-VL-NEXT: vmovdqa64 192(%rdi), %zmm3
1108
- ; AVX512-VL-NEXT: vpmovdw %zmm1 , %ymm4
1109
- ; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm1
1110
- ; AVX512-VL-NEXT: vpsrld $16, %zmm0 , %zmm5
1111
- ; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm6
1112
- ; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm7
1108
+ ; AVX512-VL-NEXT: vpsrld $16, %zmm0 , %zmm4
1109
+ ; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm5
1110
+ ; AVX512-VL-NEXT: vpsrld $16, %zmm2 , %zmm6
1111
+ ; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm7
1112
+ ; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rsi)
1113
1113
; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi)
1114
- ; AVX512-VL-NEXT: vmovdqa %ymm4, 32(%rsi)
1115
- ; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
1116
1114
; AVX512-VL-NEXT: vpmovdw %zmm3, 96(%rsi)
1117
- ; AVX512-VL-NEXT: vpmovdw %zmm7, 64(%rdx)
1118
- ; AVX512-VL-NEXT: vpmovdw %zmm6, 96(%rdx)
1119
- ; AVX512-VL-NEXT: vpmovdw %zmm5, (%rdx)
1120
- ; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rdx)
1115
+ ; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
1116
+ ; AVX512-VL-NEXT: vpmovdw %zmm7, 96(%rdx)
1117
+ ; AVX512-VL-NEXT: vpmovdw %zmm6, 64(%rdx)
1118
+ ; AVX512-VL-NEXT: vpmovdw %zmm5, 32(%rdx)
1119
+ ; AVX512-VL-NEXT: vpmovdw %zmm4, (%rdx)
1121
1120
; AVX512-VL-NEXT: vzeroupper
1122
1121
; AVX512-VL-NEXT: retq
1123
1122
;
@@ -1127,19 +1126,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
1127
1126
; AVX512-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
1128
1127
; AVX512-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
1129
1128
; AVX512-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1130
- ; AVX512-FCP-NEXT: vpmovdw %zmm1 , %ymm4
1131
- ; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1132
- ; AVX512-FCP-NEXT: vpsrld $16, %zmm0 , %zmm5
1133
- ; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1134
- ; AVX512-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1129
+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm0 , %zmm4
1130
+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1131
+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm2 , %zmm6
1132
+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1133
+ ; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
1135
1134
; AVX512-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1136
- ; AVX512-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1137
- ; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1138
1135
; AVX512-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1139
- ; AVX512-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1140
- ; AVX512-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1141
- ; AVX512-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1142
- ; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1136
+ ; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1137
+ ; AVX512-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1138
+ ; AVX512-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1139
+ ; AVX512-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1140
+ ; AVX512-FCP-NEXT: vpmovdw %zmm4, (%rdx)
1143
1141
; AVX512-FCP-NEXT: vzeroupper
1144
1142
; AVX512-FCP-NEXT: retq
1145
1143
;
@@ -1149,19 +1147,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
1149
1147
; AVX512DQ-NEXT: vmovdqa64 64(%rdi), %zmm1
1150
1148
; AVX512DQ-NEXT: vmovdqa64 128(%rdi), %zmm2
1151
1149
; AVX512DQ-NEXT: vmovdqa64 192(%rdi), %zmm3
1152
- ; AVX512DQ-NEXT: vpmovdw %zmm1 , %ymm4
1153
- ; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm1
1154
- ; AVX512DQ-NEXT: vpsrld $16, %zmm0 , %zmm5
1155
- ; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm6
1156
- ; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm7
1150
+ ; AVX512DQ-NEXT: vpsrld $16, %zmm0 , %zmm4
1151
+ ; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm5
1152
+ ; AVX512DQ-NEXT: vpsrld $16, %zmm2 , %zmm6
1153
+ ; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm7
1154
+ ; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rsi)
1157
1155
; AVX512DQ-NEXT: vpmovdw %zmm0, (%rsi)
1158
- ; AVX512DQ-NEXT: vmovdqa %ymm4, 32(%rsi)
1159
- ; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
1160
1156
; AVX512DQ-NEXT: vpmovdw %zmm3, 96(%rsi)
1161
- ; AVX512DQ-NEXT: vpmovdw %zmm7, 64(%rdx)
1162
- ; AVX512DQ-NEXT: vpmovdw %zmm6, 96(%rdx)
1163
- ; AVX512DQ-NEXT: vpmovdw %zmm5, (%rdx)
1164
- ; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rdx)
1157
+ ; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
1158
+ ; AVX512DQ-NEXT: vpmovdw %zmm7, 96(%rdx)
1159
+ ; AVX512DQ-NEXT: vpmovdw %zmm6, 64(%rdx)
1160
+ ; AVX512DQ-NEXT: vpmovdw %zmm5, 32(%rdx)
1161
+ ; AVX512DQ-NEXT: vpmovdw %zmm4, (%rdx)
1165
1162
; AVX512DQ-NEXT: vzeroupper
1166
1163
; AVX512DQ-NEXT: retq
1167
1164
;
@@ -1171,19 +1168,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
1171
1168
; AVX512DQ-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
1172
1169
; AVX512DQ-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
1173
1170
; AVX512DQ-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1174
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1 , %ymm4
1175
- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1176
- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0 , %zmm5
1177
- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1178
- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1171
+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0 , %zmm4
1172
+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1173
+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2 , %zmm6
1174
+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1175
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
1179
1176
; AVX512DQ-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1180
- ; AVX512DQ-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1181
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1182
1177
; AVX512DQ-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1183
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1184
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1185
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1186
- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1178
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1179
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1180
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1181
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1182
+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm4, (%rdx)
1187
1183
; AVX512DQ-FCP-NEXT: vzeroupper
1188
1184
; AVX512DQ-FCP-NEXT: retq
1189
1185
;
0 commit comments