@@ -1101,6 +1101,121 @@ define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_
1101
1101
ret <64 x i8 > %r
1102
1102
}
1103
1103
1104
+ define <64 x i8 > @shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1105
+ ; AVX512F-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1106
+ ; AVX512F: # %bb.0:
1107
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1108
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1109
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1110
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1111
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1112
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1113
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1114
+ ; AVX512F-NEXT: retq
1115
+ ;
1116
+ ; AVX512BW-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1117
+ ; AVX512BW: # %bb.0:
1118
+ ; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1119
+ ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1120
+ ; AVX512BW-NEXT: retq
1121
+ ;
1122
+ ; AVX512DQ-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1123
+ ; AVX512DQ: # %bb.0:
1124
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1125
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1126
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1127
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1128
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1129
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1130
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1131
+ ; AVX512DQ-NEXT: retq
1132
+ ;
1133
+ ; AVX512VBMI-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1134
+ ; AVX512VBMI: # %bb.0:
1135
+ ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1136
+ ; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1137
+ ; AVX512VBMI-NEXT: retq
1138
+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 61 , i32 62 , i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 >
1139
+ ret <64 x i8 > %r
1140
+ }
1141
+
1142
+ ; PR79799
1143
+ define <64 x i8 > @shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1144
+ ; AVX512F-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1145
+ ; AVX512F: # %bb.0:
1146
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1147
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1148
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1149
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1150
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1151
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1152
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1153
+ ; AVX512F-NEXT: retq
1154
+ ;
1155
+ ; AVX512BW-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1156
+ ; AVX512BW: # %bb.0:
1157
+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1158
+ ; AVX512BW-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
1159
+ ; AVX512BW-NEXT: retq
1160
+ ;
1161
+ ; AVX512DQ-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1162
+ ; AVX512DQ: # %bb.0:
1163
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1164
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1165
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1166
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1167
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1168
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1169
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1170
+ ; AVX512DQ-NEXT: retq
1171
+ ;
1172
+ ; AVX512VBMI-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1173
+ ; AVX512VBMI: # %bb.0:
1174
+ ; AVX512VBMI-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1175
+ ; AVX512VBMI-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
1176
+ ; AVX512VBMI-NEXT: retq
1177
+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 62 , i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 , i32 125 >
1178
+ ret <64 x i8 > %r
1179
+ }
1180
+
1181
+ define <64 x i8 > @shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1182
+ ; AVX512F-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1183
+ ; AVX512F: # %bb.0:
1184
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1185
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1186
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1187
+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1188
+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1189
+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1190
+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1191
+ ; AVX512F-NEXT: retq
1192
+ ;
1193
+ ; AVX512BW-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1194
+ ; AVX512BW: # %bb.0:
1195
+ ; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1196
+ ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1197
+ ; AVX512BW-NEXT: retq
1198
+ ;
1199
+ ; AVX512DQ-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1200
+ ; AVX512DQ: # %bb.0:
1201
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1202
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1203
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1204
+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1205
+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1206
+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1207
+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1208
+ ; AVX512DQ-NEXT: retq
1209
+ ;
1210
+ ; AVX512VBMI-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1211
+ ; AVX512VBMI: # %bb.0:
1212
+ ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1213
+ ; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1214
+ ; AVX512VBMI-NEXT: retq
1215
+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 , i32 125 , i32 126 >
1216
+ ret <64 x i8 > %r
1217
+ }
1218
+
1104
1219
define <64 x i8 > @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1105
1220
; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
1106
1221
; AVX512F: # %bb.0:
0 commit comments