Skip to content

Commit 85abef2

Browse files
committed
[AMDGPU] Prefer vectorized i8s for PHIs
Change-Id: I45fac02309871c61ab9affd21e803568d0b38d47
1 parent a5a9c4b commit 85abef2

File tree

2 files changed

+47
-118
lines changed

2 files changed

+47
-118
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
308308

309309
unsigned GCNTTIImpl::getNumberOfParts(Type *Tp) const {
310310
if (auto VTy = dyn_cast<FixedVectorType>(Tp)) {
311-
if (VTy->getScalarSizeInBits() == 8) {
311+
if (DL.getTypeSizeInBits(VTy->getElementType()) == 8) {
312312
auto ElCount = VTy->getElementCount().getFixedValue();
313313
return ElCount / 4;
314314
}
@@ -1139,6 +1139,14 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
11391139

11401140
InstructionCost GCNTTIImpl::getPHIScalarizationOverhead(Type *ScalarTy,
11411141
VectorType *VTy) {
1142+
if (DL.getTypeSizeInBits(ScalarTy) != 8)
1143+
return 0;
1144+
1145+
if (auto FVTy = dyn_cast<FixedVectorType>(VTy)) {
1146+
unsigned NumElts = FVTy->getElementCount().getFixedValue();
1147+
return alignDown(NumElts, 4);
1148+
}
1149+
11421150
return 0;
11431151
}
11441152

llvm/test/Transforms/SLPVectorizer/AMDGPU/vectorize-i8.ll

Lines changed: 38 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -43,73 +43,39 @@ define protected amdgpu_kernel void @vectorizePHI(ptr addrspace(3) %inptr0, ptr
4343
; GFX7-LABEL: @vectorizePHI(
4444
; GFX7-NEXT: entry:
4545
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
46-
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
47-
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
48-
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
49-
; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
50-
; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
51-
; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
52-
; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
46+
; GFX7-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
5347
; GFX7-NEXT: br label [[DO_BODY:%.*]]
5448
; GFX7: do.body:
55-
; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
56-
; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
57-
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
58-
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
59-
; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
60-
; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
61-
; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
62-
; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
63-
; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
64-
; GFX7-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
65-
; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
66-
; GFX7-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
67-
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
68-
; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
69-
; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
70-
; GFX7-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
71-
; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
49+
; GFX7-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
50+
; GFX7-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
51+
; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
52+
; GFX7-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53+
; GFX7-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
54+
; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1:%.*]], align 2
7255
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
7356
; GFX7-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
7457
; GFX7: exit:
75-
; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 16
76-
; GFX7-NEXT: store <16 x i8> [[VEC03]], ptr [[OUT1:%.*]], align 16
58+
; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT:%.*]], align 16
59+
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1:%.*]], align 16
7760
; GFX7-NEXT: ret void
7861
;
7962
; GFX8PLUS-LABEL: @vectorizePHI(
8063
; GFX8PLUS-NEXT: entry:
8164
; GFX8PLUS-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
82-
; GFX8PLUS-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
83-
; GFX8PLUS-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
84-
; GFX8PLUS-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
85-
; GFX8PLUS-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
86-
; GFX8PLUS-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
87-
; GFX8PLUS-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
88-
; GFX8PLUS-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
65+
; GFX8PLUS-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
8966
; GFX8PLUS-NEXT: br label [[DO_BODY:%.*]]
9067
; GFX8PLUS: do.body:
91-
; GFX8PLUS-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
92-
; GFX8PLUS-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
93-
; GFX8PLUS-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
94-
; GFX8PLUS-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
95-
; GFX8PLUS-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
96-
; GFX8PLUS-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
97-
; GFX8PLUS-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
98-
; GFX8PLUS-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
99-
; GFX8PLUS-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
100-
; GFX8PLUS-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
101-
; GFX8PLUS-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
102-
; GFX8PLUS-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
103-
; GFX8PLUS-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
104-
; GFX8PLUS-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
105-
; GFX8PLUS-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
106-
; GFX8PLUS-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
107-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
68+
; GFX8PLUS-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
69+
; GFX8PLUS-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
70+
; GFX8PLUS-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
71+
; GFX8PLUS-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
72+
; GFX8PLUS-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
73+
; GFX8PLUS-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1:%.*]], align 2
10874
; GFX8PLUS-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
10975
; GFX8PLUS-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
11076
; GFX8PLUS: exit:
111-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 16
112-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC03]], ptr [[OUT1:%.*]], align 16
77+
; GFX8PLUS-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT:%.*]], align 16
78+
; GFX8PLUS-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1:%.*]], align 16
11379
; GFX8PLUS-NEXT: ret void
11480
;
11581
entry:
@@ -293,71 +259,26 @@ exit:
293259
define protected amdgpu_kernel void @vectorizeShuffle(<16 x i8> %invec, ptr %out, i32 %flag) {
294260
; GFX7-LABEL: @vectorizeShuffle(
295261
; GFX7-NEXT: entry:
296-
; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC:%.*]], i64 0
297-
; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
298-
; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
299-
; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
300-
; GFX7-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
301-
; GFX7-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
302-
; GFX7-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
303-
; GFX7-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
304-
; GFX7-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
305-
; GFX7-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
306-
; GFX7-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
307-
; GFX7-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
308-
; GFX7-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
309-
; GFX7-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
310-
; GFX7-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
311-
; GFX7-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
312-
; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
313-
; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
314-
; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
315-
; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
316-
; GFX7-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1
317-
; GFX7-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1
318-
; GFX7-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1
319-
; GFX7-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1
320-
; GFX7-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1
321-
; GFX7-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1
322-
; GFX7-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1
323-
; GFX7-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1
324-
; GFX7-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1
325-
; GFX7-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1
326-
; GFX7-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1
327-
; GFX7-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1
328-
; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
329-
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
330-
; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
331-
; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
332-
; GFX7-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1
333-
; GFX7-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1
334-
; GFX7-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1
335-
; GFX7-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1
336-
; GFX7-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1
337-
; GFX7-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1
338-
; GFX7-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1
339-
; GFX7-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1
340-
; GFX7-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1
341-
; GFX7-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1
342-
; GFX7-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1
343-
; GFX7-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1
344-
; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
345-
; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
346-
; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
347-
; GFX7-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
348-
; GFX7-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
349-
; GFX7-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
350-
; GFX7-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
351-
; GFX7-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
352-
; GFX7-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
353-
; GFX7-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
354-
; GFX7-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
355-
; GFX7-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
356-
; GFX7-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
357-
; GFX7-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
358-
; GFX7-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
359-
; GFX7-NEXT: [[VECINS15:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
360-
; GFX7-NEXT: store <16 x i8> [[VECINS15]], ptr [[OUT:%.*]], align 16
262+
; GFX7-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
263+
; GFX7-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
264+
; GFX7-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1>
265+
; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
266+
; GFX7-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[TMP3]], <i8 1, i8 1, i8 1, i8 1>
267+
; GFX7-NEXT: [[TMP5:%.*]] = add <4 x i8> [[TMP4]], <i8 1, i8 1, i8 1, i8 1>
268+
; GFX7-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
269+
; GFX7-NEXT: [[TMP7:%.*]] = mul <4 x i8> [[TMP6]], <i8 1, i8 1, i8 1, i8 1>
270+
; GFX7-NEXT: [[TMP8:%.*]] = add <4 x i8> [[TMP7]], <i8 1, i8 1, i8 1, i8 1>
271+
; GFX7-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
272+
; GFX7-NEXT: [[TMP10:%.*]] = mul <4 x i8> [[TMP9]], <i8 1, i8 1, i8 1, i8 1>
273+
; GFX7-NEXT: [[TMP11:%.*]] = add <4 x i8> [[TMP10]], <i8 1, i8 1, i8 1, i8 1>
274+
; GFX7-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
275+
; GFX7-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
276+
; GFX7-NEXT: [[VECINS71:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
277+
; GFX7-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
278+
; GFX7-NEXT: [[VECINS112:%.*]] = shufflevector <16 x i8> [[VECINS71]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
279+
; GFX7-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
280+
; GFX7-NEXT: [[VECINS153:%.*]] = shufflevector <16 x i8> [[VECINS112]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
281+
; GFX7-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT:%.*]], align 16
361282
; GFX7-NEXT: ret void
362283
;
363284
; GFX8PLUS-LABEL: @vectorizeShuffle(

0 commit comments

Comments
 (0)