@@ -1228,13 +1228,14 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
1228
1228
;
1229
1229
; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
1230
1230
; AVX512VBMI2: # %bb.0:
1231
- ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
1232
- ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1233
- ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1234
- ; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
1235
- ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1236
- ; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
1237
- ; AVX512VBMI2-NEXT: vpermt2b %zmm4, %zmm3, %zmm0
1231
+ ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1232
+ ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1233
+ ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
1234
+ ; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1235
+ ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1236
+ ; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1237
+ ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1238
+ ; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
1238
1239
; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1239
1240
; AVX512VBMI2-NEXT: retq
1240
1241
;
@@ -1251,16 +1252,29 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
1251
1252
; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1252
1253
; AVX512VLBW-NEXT: retq
1253
1254
;
1254
- ; AVX10-LABEL: splatvar_funnnel_v32i8:
1255
- ; AVX10: # %bb.0:
1256
- ; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1257
- ; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1258
- ; AVX10-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1259
- ; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1260
- ; AVX10-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1261
- ; AVX10-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1262
- ; AVX10-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1263
- ; AVX10-NEXT: retq
1255
+ ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1256
+ ; AVX512VLVBMI2: # %bb.0:
1257
+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1258
+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1259
+ ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95,0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87]
1260
+ ; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1261
+ ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1262
+ ; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1263
+ ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
1264
+ ; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
1265
+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1266
+ ; AVX512VLVBMI2-NEXT: retq
1267
+ ;
1268
+ ; AVX10_256-LABEL: splatvar_funnnel_v32i8:
1269
+ ; AVX10_256: # %bb.0:
1270
+ ; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1271
+ ; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1272
+ ; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1273
+ ; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1274
+ ; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1275
+ ; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1276
+ ; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1277
+ ; AVX10_256-NEXT: retq
1264
1278
;
1265
1279
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
1266
1280
; XOPAVX1: # %bb.0:
0 commit comments