Skip to content

Commit 0210138

Browse files
committed
Review comments + Rebase on top of #91016
Change-Id: If5ac53d5235ee8c65c53454b209c9f155c17edc4
1 parent b245a49 commit 0210138

File tree

8 files changed

+69
-95
lines changed

8 files changed

+69
-95
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ class TargetTransformInfo {
893893

894894
/// Whether or not there is any target-specific condition that imposes an
895895
/// overhead for scalarization
896-
bool hasScalarizationOverhead(ArrayRef<Value *> VL,
896+
bool hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VTy,
897897
std::pair<bool, bool> &ScalarizationKind) const;
898898

899899
/// Estimate the overhead of scalarizing an instructions unique
@@ -1928,7 +1928,7 @@ class TargetTransformInfo::Concept {
19281928
TargetCostKind CostKind) = 0;
19291929

19301930
virtual bool
1931-
hasScalarizationOverhead(ArrayRef<Value *> VL,
1931+
hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VTy,
19321932
std::pair<bool, bool> &ScalarizationKind) = 0;
19331933
virtual bool supportsEfficientVectorElementLoadStore() = 0;
19341934
virtual bool supportsTailCalls() = 0;
@@ -2467,9 +2467,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
24672467
}
24682468

24692469
bool
2470-
hasScalarizationOverhead(ArrayRef<Value *> VL,
2470+
hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VTy,
24712471
std::pair<bool, bool> &ScalarizationKind) override {
2472-
return Impl.hasScalarizationOverhead(VL, ScalarizationKind);
2472+
return Impl.hasScalarizationOverhead(VL, VTy, ScalarizationKind);
24732473
}
24742474

24752475
InstructionCost

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ class TargetTransformInfoImplBase {
371371
return 0;
372372
}
373373

374-
bool hasScalarizationOverhead(ArrayRef<Value *> VL,
374+
bool hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VTy,
375375
std::pair<bool, bool> &ScalarizationKind) {
376376
return false;
377377
}

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
807807
CostKind);
808808
}
809809

810-
bool hasScalarizationOverhead(ArrayRef<Value *> VL,
810+
bool hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VT,
811811
std::pair<bool, bool> &ScalarizationKind) {
812812
return false;
813813
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,9 @@ InstructionCost TargetTransformInfo::getScalarizationOverhead(
595595
}
596596

597597
bool TargetTransformInfo::hasScalarizationOverhead(
598-
ArrayRef<Value *> VL, std::pair<bool, bool> &ScalarizeKind) const {
599-
return TTIImpl->hasScalarizationOverhead(VLm ScalarizeKind);
598+
ArrayRef<Value *> VL, FixedVectorType *VTy,
599+
std::pair<bool, bool> &ScalarizeKind) const {
600+
return TTIImpl->hasScalarizationOverhead(VL, VTy, ScalarizeKind);
600601
}
601602

602603
InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead(

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,6 +1379,32 @@ int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
13791379
: getQuarterRateInstrCost(CostKind);
13801380
}
13811381

1382+
bool GCNTTIImpl::hasScalarizationOverhead(
1383+
ArrayRef<Value *> VL, FixedVectorType *VTy,
1384+
std::pair<bool, bool> &ScalarizationKind) const {
1385+
if (DL.getTypeSizeInBits(VTy->getElementType()) != 8)
1386+
return false;
1387+
1388+
unsigned Threshold = VL.size() / 2;
1389+
unsigned CrossBBUserCount = 0;
1390+
1391+
for (Value *V : VL) {
1392+
Instruction *Inst = dyn_cast<Instruction>(V);
1393+
if (!V)
1394+
continue;
1395+
for (User *IU : Inst->users()) {
1396+
Instruction *UseInst = cast<Instruction>(IU);
1397+
if (UseInst->getOpcode() == Instruction::PHI ||
1398+
UseInst->getParent() != Inst->getParent()) {
1399+
ScalarizationKind = {true, true};
1400+
return true;
1401+
}
1402+
}
1403+
}
1404+
1405+
return false;
1406+
}
1407+
13821408
std::pair<InstructionCost, MVT>
13831409
GCNTTIImpl::getTypeLegalizationCost(Type *Ty) const {
13841410
std::pair<InstructionCost, MVT> Cost = BaseT::getTypeLegalizationCost(Ty);

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
257257
FastMathFlags FMF,
258258
TTI::TargetCostKind CostKind);
259259

260+
bool hasScalarizationOverhead(ArrayRef<Value *> VL, FixedVectorType *VTy,
261+
std::pair<bool, bool> &ScalarizationKind) const;
262+
260263
/// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
261264
unsigned getCacheLineSize() const override { return 128; }
262265

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9085,7 +9085,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
90859085
}
90869086
InstructionCost CommonCost = 0;
90879087
std::pair<bool, bool> ScalarizationKind(false, false);
9088-
if (TTI->hasScalarizationOverhead(VL, ScalarizationKind)) {
9088+
if (TTI->hasScalarizationOverhead(VL, FinalVecTy, ScalarizationKind)) {
90899089
APInt DemandedElts = APInt::getAllOnes(VL.size());
90909090
CommonCost -= TTI->getScalarizationOverhead(
90919091
VecTy, DemandedElts,

llvm/test/Transforms/SLPVectorizer/AMDGPU/i8.ll

Lines changed: 30 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -43,73 +43,37 @@ define protected amdgpu_kernel void @phi(ptr addrspace(3) %inptr0, ptr addrspace
4343
; GFX7-LABEL: @phi(
4444
; GFX7-NEXT: entry:
4545
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
46-
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
47-
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
48-
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
49-
; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
50-
; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
51-
; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
52-
; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
46+
; GFX7-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
5347
; GFX7-NEXT: br label [[DO_BODY:%.*]]
5448
; GFX7: do.body:
55-
; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
56-
; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
57-
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
58-
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
59-
; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
60-
; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
61-
; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
62-
; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
63-
; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
64-
; GFX7-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
65-
; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
66-
; GFX7-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
67-
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
68-
; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
69-
; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
70-
; GFX7-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
71-
; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
49+
; GFX7-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
50+
; GFX7-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
51+
; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
52+
; GFX7-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
53+
; GFX7-NEXT: store <16 x i8> [[TMP4]], ptr addrspace(3) [[INPTR1:%.*]], align 2
7254
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
7355
; GFX7-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
7456
; GFX7: exit:
75-
; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 16
76-
; GFX7-NEXT: store <16 x i8> [[VEC03]], ptr [[OUT1:%.*]], align 16
57+
; GFX7-NEXT: store <16 x i8> [[TMP4]], ptr [[OUT:%.*]], align 16
58+
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1:%.*]], align 16
7759
; GFX7-NEXT: ret void
7860
;
7961
; GFX8PLUS-LABEL: @phi(
8062
; GFX8PLUS-NEXT: entry:
8163
; GFX8PLUS-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
82-
; GFX8PLUS-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
83-
; GFX8PLUS-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
84-
; GFX8PLUS-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
85-
; GFX8PLUS-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
86-
; GFX8PLUS-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
87-
; GFX8PLUS-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
88-
; GFX8PLUS-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
64+
; GFX8PLUS-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
8965
; GFX8PLUS-NEXT: br label [[DO_BODY:%.*]]
9066
; GFX8PLUS: do.body:
91-
; GFX8PLUS-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[OTHERELE3:%.*]], [[DO_BODY]] ]
92-
; GFX8PLUS-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[OTHERELE2:%.*]], [[DO_BODY]] ]
93-
; GFX8PLUS-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[OTHERELE1:%.*]], [[DO_BODY]] ]
94-
; GFX8PLUS-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[OTHERELE0:%.*]], [[DO_BODY]] ]
95-
; GFX8PLUS-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
96-
; GFX8PLUS-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
97-
; GFX8PLUS-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
98-
; GFX8PLUS-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
99-
; GFX8PLUS-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
100-
; GFX8PLUS-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
101-
; GFX8PLUS-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
102-
; GFX8PLUS-NEXT: [[VEC03:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
103-
; GFX8PLUS-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
104-
; GFX8PLUS-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
105-
; GFX8PLUS-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
106-
; GFX8PLUS-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
107-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr addrspace(3) [[INPTR1:%.*]], align 2
67+
; GFX8PLUS-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[DO_BODY]] ]
68+
; GFX8PLUS-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
69+
; GFX8PLUS-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
70+
; GFX8PLUS-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
71+
; GFX8PLUS-NEXT: store <16 x i8> [[TMP4]], ptr addrspace(3) [[INPTR1:%.*]], align 2
10872
; GFX8PLUS-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
10973
; GFX8PLUS-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[DO_BODY]]
11074
; GFX8PLUS: exit:
111-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 16
112-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC03]], ptr [[OUT1:%.*]], align 16
75+
; GFX8PLUS-NEXT: store <16 x i8> [[TMP4]], ptr [[OUT:%.*]], align 16
76+
; GFX8PLUS-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1:%.*]], align 16
11377
; GFX8PLUS-NEXT: ret void
11478
;
11579
entry:
@@ -189,69 +153,49 @@ define protected amdgpu_kernel void @arith_phi(ptr addrspace(3) %inptr0, ptr %ou
189153
; GFX7-LABEL: @arith_phi(
190154
; GFX7-NEXT: entry:
191155
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
192-
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
193156
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
194-
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
195157
; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
196-
; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
197158
; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
198-
; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
159+
; GFX7-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
199160
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
200161
; GFX7-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[BB_1:%.*]]
201162
; GFX7: bb.1:
202-
; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[ELE0]], 1
203-
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[ELE1]], 1
204-
; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[ELE2]], 1
205-
; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[ELE3]], 1
163+
; GFX7-NEXT: [[TMP1:%.*]] = add <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
164+
; GFX7-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
206165
; GFX7-NEXT: br label [[EXIT]]
207166
; GFX7: exit:
208-
; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[ADD0]], [[BB_1]] ]
209-
; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[ADD1]], [[BB_1]] ]
210-
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[ADD2]], [[BB_1]] ]
211-
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[ADD3]], [[BB_1]] ]
167+
; GFX7-NEXT: [[TMP3:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB_1]] ]
212168
; GFX7-NEXT: [[OTHERELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
213169
; GFX7-NEXT: [[OTHERELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
214170
; GFX7-NEXT: [[OTHERELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
215171
; GFX7-NEXT: [[OTHERELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
216-
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
217-
; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
218-
; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
219-
; GFX7-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
220-
; GFX7-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 2
172+
; GFX7-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
173+
; GFX7-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
174+
; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT:%.*]], align 2
221175
; GFX7-NEXT: ret void
222176
;
223177
; GFX8PLUS-LABEL: @arith_phi(
224178
; GFX8PLUS-NEXT: entry:
225179
; GFX8PLUS-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0:%.*]], i32 0
226-
; GFX8PLUS-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
227180
; GFX8PLUS-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
228-
; GFX8PLUS-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
229181
; GFX8PLUS-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
230-
; GFX8PLUS-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
231182
; GFX8PLUS-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
232-
; GFX8PLUS-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
183+
; GFX8PLUS-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
233184
; GFX8PLUS-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG:%.*]], 0
234185
; GFX8PLUS-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[BB_1:%.*]]
235186
; GFX8PLUS: bb.1:
236-
; GFX8PLUS-NEXT: [[ADD0:%.*]] = add i8 [[ELE0]], 1
237-
; GFX8PLUS-NEXT: [[ADD1:%.*]] = add i8 [[ELE1]], 1
238-
; GFX8PLUS-NEXT: [[ADD2:%.*]] = add i8 [[ELE2]], 1
239-
; GFX8PLUS-NEXT: [[ADD3:%.*]] = add i8 [[ELE3]], 1
187+
; GFX8PLUS-NEXT: [[TMP1:%.*]] = add <4 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1>
188+
; GFX8PLUS-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
240189
; GFX8PLUS-NEXT: br label [[EXIT]]
241190
; GFX8PLUS: exit:
242-
; GFX8PLUS-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], [[ENTRY:%.*]] ], [ [[ADD0]], [[BB_1]] ]
243-
; GFX8PLUS-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], [[ENTRY]] ], [ [[ADD1]], [[BB_1]] ]
244-
; GFX8PLUS-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], [[ENTRY]] ], [ [[ADD2]], [[BB_1]] ]
245-
; GFX8PLUS-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], [[ENTRY]] ], [ [[ADD3]], [[BB_1]] ]
191+
; GFX8PLUS-NEXT: [[TMP3:%.*]] = phi <4 x i8> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB_1]] ]
246192
; GFX8PLUS-NEXT: [[OTHERELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
247193
; GFX8PLUS-NEXT: [[OTHERELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
248194
; GFX8PLUS-NEXT: [[OTHERELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
249195
; GFX8PLUS-NEXT: [[OTHERELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
250-
; GFX8PLUS-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
251-
; GFX8PLUS-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
252-
; GFX8PLUS-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
253-
; GFX8PLUS-NEXT: [[VEC13:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
254-
; GFX8PLUS-NEXT: store <16 x i8> [[VEC13]], ptr [[OUT:%.*]], align 2
196+
; GFX8PLUS-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
197+
; GFX8PLUS-NEXT: [[VEC131:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
198+
; GFX8PLUS-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT:%.*]], align 2
255199
; GFX8PLUS-NEXT: ret void
256200
;
257201
entry:

0 commit comments

Comments
 (0)