@@ -362,71 +362,26 @@ define protected amdgpu_kernel void @vectorizeShuffle(<16 x i8> %invec, ptr %out
362
362
;
363
363
; GFX8PLUS-LABEL: @vectorizeShuffle(
364
364
; GFX8PLUS-NEXT: entry:
365
- ; GFX8PLUS-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC:%.*]], i64 0
366
- ; GFX8PLUS-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
367
- ; GFX8PLUS-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
368
- ; GFX8PLUS-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
369
- ; GFX8PLUS-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
370
- ; GFX8PLUS-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
371
- ; GFX8PLUS-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
372
- ; GFX8PLUS-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
373
- ; GFX8PLUS-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
374
- ; GFX8PLUS-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
375
- ; GFX8PLUS-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
376
- ; GFX8PLUS-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
377
- ; GFX8PLUS-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
378
- ; GFX8PLUS-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
379
- ; GFX8PLUS-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
380
- ; GFX8PLUS-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
381
- ; GFX8PLUS-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
382
- ; GFX8PLUS-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
383
- ; GFX8PLUS-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
384
- ; GFX8PLUS-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
385
- ; GFX8PLUS-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1
386
- ; GFX8PLUS-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1
387
- ; GFX8PLUS-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1
388
- ; GFX8PLUS-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1
389
- ; GFX8PLUS-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1
390
- ; GFX8PLUS-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1
391
- ; GFX8PLUS-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1
392
- ; GFX8PLUS-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1
393
- ; GFX8PLUS-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1
394
- ; GFX8PLUS-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1
395
- ; GFX8PLUS-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1
396
- ; GFX8PLUS-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1
397
- ; GFX8PLUS-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
398
- ; GFX8PLUS-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
399
- ; GFX8PLUS-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
400
- ; GFX8PLUS-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
401
- ; GFX8PLUS-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1
402
- ; GFX8PLUS-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1
403
- ; GFX8PLUS-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1
404
- ; GFX8PLUS-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1
405
- ; GFX8PLUS-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1
406
- ; GFX8PLUS-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1
407
- ; GFX8PLUS-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1
408
- ; GFX8PLUS-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1
409
- ; GFX8PLUS-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1
410
- ; GFX8PLUS-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1
411
- ; GFX8PLUS-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1
412
- ; GFX8PLUS-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1
413
- ; GFX8PLUS-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
414
- ; GFX8PLUS-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
415
- ; GFX8PLUS-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
416
- ; GFX8PLUS-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
417
- ; GFX8PLUS-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
418
- ; GFX8PLUS-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
419
- ; GFX8PLUS-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
420
- ; GFX8PLUS-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
421
- ; GFX8PLUS-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
422
- ; GFX8PLUS-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
423
- ; GFX8PLUS-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
424
- ; GFX8PLUS-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
425
- ; GFX8PLUS-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
426
- ; GFX8PLUS-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
427
- ; GFX8PLUS-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
428
- ; GFX8PLUS-NEXT: [[VECINS15:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
429
- ; GFX8PLUS-NEXT: store <16 x i8> [[VECINS15]], ptr [[OUT:%.*]], align 16
365
+ ; GFX8PLUS-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
366
+ ; GFX8PLUS-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
367
+ ; GFX8PLUS-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1>
368
+ ; GFX8PLUS-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
369
+ ; GFX8PLUS-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[TMP3]], <i8 1, i8 1, i8 1, i8 1>
370
+ ; GFX8PLUS-NEXT: [[TMP5:%.*]] = add <4 x i8> [[TMP4]], <i8 1, i8 1, i8 1, i8 1>
371
+ ; GFX8PLUS-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
372
+ ; GFX8PLUS-NEXT: [[TMP7:%.*]] = mul <4 x i8> [[TMP6]], <i8 1, i8 1, i8 1, i8 1>
373
+ ; GFX8PLUS-NEXT: [[TMP8:%.*]] = add <4 x i8> [[TMP7]], <i8 1, i8 1, i8 1, i8 1>
374
+ ; GFX8PLUS-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
375
+ ; GFX8PLUS-NEXT: [[TMP10:%.*]] = mul <4 x i8> [[TMP9]], <i8 1, i8 1, i8 1, i8 1>
376
+ ; GFX8PLUS-NEXT: [[TMP11:%.*]] = add <4 x i8> [[TMP10]], <i8 1, i8 1, i8 1, i8 1>
377
+ ; GFX8PLUS-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
378
+ ; GFX8PLUS-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
379
+ ; GFX8PLUS-NEXT: [[VECINS71:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
380
+ ; GFX8PLUS-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
381
+ ; GFX8PLUS-NEXT: [[VECINS112:%.*]] = shufflevector <16 x i8> [[VECINS71]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
382
+ ; GFX8PLUS-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
383
+ ; GFX8PLUS-NEXT: [[VECINS153:%.*]] = shufflevector <16 x i8> [[VECINS112]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
384
+ ; GFX8PLUS-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT:%.*]], align 16
430
385
; GFX8PLUS-NEXT: ret void
431
386
;
432
387
entry:
0 commit comments