@@ -43,73 +43,39 @@ define protected amdgpu_kernel void @vectorizePHI(ptr addrspace(3) %inptr0, ptr
43
43
; GFX7-LABEL: @vectorizePHI(
44
44
; GFX7-NEXT: entry:
45
45
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
46
- ; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
47
- ; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
48
- ; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
49
- ; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
50
- ; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
51
- ; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
52
- ; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
46
+ ; GFX7-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
53
47
; GFX7-NEXT: br label [[DO_BODY:%.*]]
54
48
; GFX7: do.body:
55
- ; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
56
- ; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
57
- ; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
58
- ; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
59
- ; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
60
- ; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
61
- ; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
62
- ; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
63
- ; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
64
- ; GFX7-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
65
- ; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
66
- ; GFX7-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
67
- ; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
68
- ; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
69
- ; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
70
- ; GFX7-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
71
- ; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
49
+ ; GFX7-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
50
+ ; GFX7-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
51
+ ; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
52
+ ; GFX7-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53
+ ; GFX7-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
54
+ ; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1:%.*]], align 2
72
55
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
73
56
; GFX7-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
74
57
; GFX7: exit:
75
- ; GFX7-NEXT: store <16 x i8> [[VEC13 ]], ptr [[OUT:%.*]], align 16
76
- ; GFX7-NEXT: store <16 x i8> [[VEC03 ]], ptr [[OUT1:%.*]], align 16
58
+ ; GFX7-NEXT: store <16 x i8> [[VEC131 ]], ptr [[OUT:%.*]], align 16
59
+ ; GFX7-NEXT: store <16 x i8> [[TMP3 ]], ptr [[OUT1:%.*]], align 16
77
60
; GFX7-NEXT: ret void
78
61
;
79
62
; GFX8PLUS-LABEL: @vectorizePHI(
80
63
; GFX8PLUS-NEXT: entry:
81
64
; GFX8PLUS-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
82
- ; GFX8PLUS-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
83
- ; GFX8PLUS-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
84
- ; GFX8PLUS-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
85
- ; GFX8PLUS-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
86
- ; GFX8PLUS-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
87
- ; GFX8PLUS-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
88
- ; GFX8PLUS-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
65
+ ; GFX8PLUS-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
89
66
; GFX8PLUS-NEXT: br label [[DO_BODY:%.*]]
90
67
; GFX8PLUS: do.body:
91
- ; GFX8PLUS-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
92
- ; GFX8PLUS-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
93
- ; GFX8PLUS-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
94
- ; GFX8PLUS-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
95
- ; GFX8PLUS-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
96
- ; GFX8PLUS-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
97
- ; GFX8PLUS-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
98
- ; GFX8PLUS-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
99
- ; GFX8PLUS-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
100
- ; GFX8PLUS-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
101
- ; GFX8PLUS-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
102
- ; GFX8PLUS-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
103
- ; GFX8PLUS-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
104
- ; GFX8PLUS-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
105
- ; GFX8PLUS-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
106
- ; GFX8PLUS-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
107
- ; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
68
+ ; GFX8PLUS-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
69
+ ; GFX8PLUS-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
70
+ ; GFX8PLUS-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
71
+ ; GFX8PLUS-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
72
+ ; GFX8PLUS-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
73
+ ; GFX8PLUS-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1:%.*]], align 2
108
74
; GFX8PLUS-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
109
75
; GFX8PLUS-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
110
76
; GFX8PLUS: exit:
111
- ; GFX8PLUS-NEXT: store <16 x i8> [[VEC13 ]], ptr [[OUT:%.*]], align 16
112
- ; GFX8PLUS-NEXT: store <16 x i8> [[VEC03 ]], ptr [[OUT1:%.*]], align 16
77
+ ; GFX8PLUS-NEXT: store <16 x i8> [[VEC131 ]], ptr [[OUT:%.*]], align 16
78
+ ; GFX8PLUS-NEXT: store <16 x i8> [[TMP3 ]], ptr [[OUT1:%.*]], align 16
113
79
; GFX8PLUS-NEXT: ret void
114
80
;
115
81
entry:
@@ -293,71 +259,26 @@ exit:
293
259
define protected amdgpu_kernel void @vectorizeShuffle (<16 x i8 > %invec , ptr %out , i32 %flag ) {
294
260
; GFX7-LABEL: @vectorizeShuffle(
295
261
; GFX7-NEXT: entry:
296
- ; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC:%.*]], i64 0
297
- ; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
298
- ; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
299
- ; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
300
- ; GFX7-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
301
- ; GFX7-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
302
- ; GFX7-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
303
- ; GFX7-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
304
- ; GFX7-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
305
- ; GFX7-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
306
- ; GFX7-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
307
- ; GFX7-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
308
- ; GFX7-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
309
- ; GFX7-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
310
- ; GFX7-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
311
- ; GFX7-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
312
- ; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
313
- ; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
314
- ; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
315
- ; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
316
- ; GFX7-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1
317
- ; GFX7-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1
318
- ; GFX7-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1
319
- ; GFX7-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1
320
- ; GFX7-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1
321
- ; GFX7-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1
322
- ; GFX7-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1
323
- ; GFX7-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1
324
- ; GFX7-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1
325
- ; GFX7-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1
326
- ; GFX7-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1
327
- ; GFX7-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1
328
- ; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
329
- ; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
330
- ; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
331
- ; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
332
- ; GFX7-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1
333
- ; GFX7-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1
334
- ; GFX7-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1
335
- ; GFX7-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1
336
- ; GFX7-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1
337
- ; GFX7-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1
338
- ; GFX7-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1
339
- ; GFX7-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1
340
- ; GFX7-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1
341
- ; GFX7-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1
342
- ; GFX7-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1
343
- ; GFX7-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1
344
- ; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
345
- ; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
346
- ; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
347
- ; GFX7-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
348
- ; GFX7-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
349
- ; GFX7-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
350
- ; GFX7-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
351
- ; GFX7-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
352
- ; GFX7-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
353
- ; GFX7-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
354
- ; GFX7-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
355
- ; GFX7-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
356
- ; GFX7-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
357
- ; GFX7-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
358
- ; GFX7-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
359
- ; GFX7-NEXT: [[VECINS15:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
360
- ; GFX7-NEXT: store <16 x i8> [[VECINS15]], ptr [[OUT:%.*]], align 16
262
+ ; GFX7-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
263
+ ; GFX7-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
264
+ ; GFX7-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1>
265
+ ; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
266
+ ; GFX7-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[TMP3]], <i8 1, i8 1, i8 1, i8 1>
267
+ ; GFX7-NEXT: [[TMP5:%.*]] = add <4 x i8> [[TMP4]], <i8 1, i8 1, i8 1, i8 1>
268
+ ; GFX7-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
269
+ ; GFX7-NEXT: [[TMP7:%.*]] = mul <4 x i8> [[TMP6]], <i8 1, i8 1, i8 1, i8 1>
270
+ ; GFX7-NEXT: [[TMP8:%.*]] = add <4 x i8> [[TMP7]], <i8 1, i8 1, i8 1, i8 1>
271
+ ; GFX7-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
272
+ ; GFX7-NEXT: [[TMP10:%.*]] = mul <4 x i8> [[TMP9]], <i8 1, i8 1, i8 1, i8 1>
273
+ ; GFX7-NEXT: [[TMP11:%.*]] = add <4 x i8> [[TMP10]], <i8 1, i8 1, i8 1, i8 1>
274
+ ; GFX7-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
275
+ ; GFX7-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
276
+ ; GFX7-NEXT: [[VECINS71:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
277
+ ; GFX7-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
278
+ ; GFX7-NEXT: [[VECINS112:%.*]] = shufflevector <16 x i8> [[VECINS71]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
279
+ ; GFX7-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
280
+ ; GFX7-NEXT: [[VECINS153:%.*]] = shufflevector <16 x i8> [[VECINS112]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
281
+ ; GFX7-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT:%.*]], align 16
361
282
; GFX7-NEXT: ret void
362
283
;
363
284
; GFX8PLUS-LABEL: @vectorizeShuffle(
0 commit comments