@@ -1101,3 +1101,125 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in
1101
1101
%ret15 = insertelement <16 x i8 > %ret14 , i8 %v15 , i32 15
1102
1102
ret <16 x i8 > %ret15
1103
1103
}
1104
+
1105
+ define void @indices_convert () {
1106
+ ; SSE3-LABEL: indices_convert:
1107
+ ; SSE3: # %bb.0: # %bb
1108
+ ; SSE3-NEXT: movdqa (%rax), %xmm0
1109
+ ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1110
+ ; SSE3-NEXT: movd %xmm1, %eax
1111
+ ; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1112
+ ; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1113
+ ; SSE3-NEXT: andl $3, %eax
1114
+ ; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
1115
+ ; SSE3-NEXT: movd %xmm1, %ecx
1116
+ ; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1117
+ ; SSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1118
+ ; SSE3-NEXT: andl $3, %ecx
1119
+ ; SSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1120
+ ; SSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1121
+ ; SSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1122
+ ; SSE3-NEXT: movups %xmm1, (%rax)
1123
+ ; SSE3-NEXT: retq
1124
+ ;
1125
+ ; SSSE3-LABEL: indices_convert:
1126
+ ; SSSE3: # %bb.0: # %bb
1127
+ ; SSSE3-NEXT: movdqa (%rax), %xmm0
1128
+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1129
+ ; SSSE3-NEXT: movd %xmm1, %eax
1130
+ ; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1131
+ ; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1132
+ ; SSSE3-NEXT: andl $3, %eax
1133
+ ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
1134
+ ; SSSE3-NEXT: movd %xmm1, %ecx
1135
+ ; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1136
+ ; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
1137
+ ; SSSE3-NEXT: andl $3, %ecx
1138
+ ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1139
+ ; SSSE3-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1140
+ ; SSSE3-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1141
+ ; SSSE3-NEXT: movups %xmm1, (%rax)
1142
+ ; SSSE3-NEXT: retq
1143
+ ;
1144
+ ; SSE41-LABEL: indices_convert:
1145
+ ; SSE41: # %bb.0: # %bb
1146
+ ; SSE41-NEXT: movaps (%rax), %xmm0
1147
+ ; SSE41-NEXT: extractps $2, %xmm0, %eax
1148
+ ; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1149
+ ; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1150
+ ; SSE41-NEXT: andl $3, %eax
1151
+ ; SSE41-NEXT: extractps $3, %xmm0, %ecx
1152
+ ; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1153
+ ; SSE41-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1154
+ ; SSE41-NEXT: andl $3, %ecx
1155
+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1156
+ ; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
1157
+ ; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
1158
+ ; SSE41-NEXT: movups %xmm1, (%rax)
1159
+ ; SSE41-NEXT: retq
1160
+ ;
1161
+ ; XOP-LABEL: indices_convert:
1162
+ ; XOP: # %bb.0: # %bb
1163
+ ; XOP-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
1164
+ ; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1165
+ ; XOP-NEXT: vmovapd (%rax), %xmm1
1166
+ ; XOP-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1167
+ ; XOP-NEXT: vpermil2pd $0, %xmm0, %xmm1, %xmm1, %xmm0
1168
+ ; XOP-NEXT: vmovupd %xmm0, (%rax)
1169
+ ; XOP-NEXT: retq
1170
+ ;
1171
+ ; AVX1-LABEL: indices_convert:
1172
+ ; AVX1: # %bb.0: # %bb
1173
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
1174
+ ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
1175
+ ; AVX1-NEXT: vmovapd (%rax), %xmm1
1176
+ ; AVX1-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1177
+ ; AVX1-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
1178
+ ; AVX1-NEXT: vmovupd %xmm0, (%rax)
1179
+ ; AVX1-NEXT: retq
1180
+ ;
1181
+ ; AVX2-LABEL: indices_convert:
1182
+ ; AVX2: # %bb.0: # %bb
1183
+ ; AVX2-NEXT: vpbroadcastq (%rax), %xmm0
1184
+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
1185
+ ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
1186
+ ; AVX2-NEXT: vpaddq %xmm0, %xmm0, %xmm0
1187
+ ; AVX2-NEXT: vmovapd (%rax), %xmm1
1188
+ ; AVX2-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
1189
+ ; AVX2-NEXT: vmovupd %xmm0, (%rax)
1190
+ ; AVX2-NEXT: retq
1191
+ ;
1192
+ ; AVX512-LABEL: indices_convert:
1193
+ ; AVX512: # %bb.0: # %bb
1194
+ ; AVX512-NEXT: vmovaps (%rax), %ymm0
1195
+ ; AVX512-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
1196
+ ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
1197
+ ; AVX512-NEXT: vandps %xmm2, %xmm1, %xmm1
1198
+ ; AVX512-NEXT: vpermpd %zmm0, %zmm1, %zmm0
1199
+ ; AVX512-NEXT: vmovups %xmm0, (%rax)
1200
+ ; AVX512-NEXT: vzeroupper
1201
+ ; AVX512-NEXT: retq
1202
+ ;
1203
+ ; AVX512VL-LABEL: indices_convert:
1204
+ ; AVX512VL: # %bb.0: # %bb
1205
+ ; AVX512VL-NEXT: vpbroadcastq (%rax), %xmm0
1206
+ ; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1207
+ ; AVX512VL-NEXT: vpermq (%rax), %ymm0, %ymm0
1208
+ ; AVX512VL-NEXT: vmovdqu %xmm0, (%rax)
1209
+ ; AVX512VL-NEXT: vzeroupper
1210
+ ; AVX512VL-NEXT: retq
1211
+ bb:
1212
+ %0 = load <4 x i64 >, <4 x i64 >* undef , align 32
1213
+ %1 = bitcast <4 x i64 > %0 to <8 x i32 >
1214
+ %2 = shufflevector <8 x i32 > %1 , <8 x i32 > undef , <2 x i32 > <i32 2 , i32 12 >
1215
+ %3 = and <2 x i32 > %2 , <i32 7 , i32 7 >
1216
+ %4 = extractelement <2 x i32 > %3 , i32 0
1217
+ %vecext.i8.1 = extractelement <4 x i64 > %0 , i32 %4
1218
+ %5 = extractelement <2 x i32 > %3 , i32 1
1219
+ %vecext.i8.2 = extractelement <4 x i64 > %0 , i32 %5
1220
+ %6 = insertelement <2 x i64 > poison, i64 %vecext.i8.1 , i32 0
1221
+ %7 = insertelement <2 x i64 > %6 , i64 %vecext.i8.2 , i32 1
1222
+ %8 = select <2 x i1 > undef , <2 x i64 > undef , <2 x i64 > %7
1223
+ store <2 x i64 > %8 , <2 x i64 >* undef , align 8
1224
+ ret void
1225
+ }
0 commit comments