Skip to content

Commit 5151440

Browse files
author
git apple-llvm automerger
committed
Merge commit '87d7757bbe14' from llvm.org/master into apple/main
2 parents 724e173 + 87d7757 commit 5151440

File tree

9 files changed

+95
-37
lines changed

9 files changed

+95
-37
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,11 @@ class TargetTransformInfo {
941941
/// applies when shouldMaximizeVectorBandwidth returns true.
942942
unsigned getMinimumVF(unsigned ElemWidth) const;
943943

944+
/// \return The maximum vectorization factor for types of given element
945+
/// bit width and opcode, or 0 if there is no maximum VF.
946+
/// Currently only used by the SLP vectorizer.
947+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
948+
944949
/// \return True if it should be considered for address type promotion.
945950
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
946951
/// profitable without finding other extensions fed by the same input.
@@ -1498,6 +1503,7 @@ class TargetTransformInfo::Concept {
14981503
virtual unsigned getMinVectorRegisterBitWidth() = 0;
14991504
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
15001505
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1506+
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
15011507
virtual bool shouldConsiderAddressTypePromotion(
15021508
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
15031509
virtual unsigned getCacheLineSize() const = 0;
@@ -1917,6 +1923,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
19171923
unsigned getMinimumVF(unsigned ElemWidth) const override {
19181924
return Impl.getMinimumVF(ElemWidth);
19191925
}
1926+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
1927+
return Impl.getMaximumVF(ElemWidth, Opcode);
1928+
}
19201929
bool shouldConsiderAddressTypePromotion(
19211930
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
19221931
return Impl.shouldConsiderAddressTypePromotion(

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,8 @@ class TargetTransformInfoImplBase {
356356

357357
unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
358358

359+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
360+
359361
bool
360362
shouldConsiderAddressTypePromotion(const Instruction &I,
361363
bool &AllowPromotionWithoutCommonHeader) {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,11 @@ unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
635635
return TTIImpl->getMinimumVF(ElemWidth);
636636
}
637637

638+
unsigned TargetTransformInfo::getMaximumVF(unsigned ElemWidth,
639+
unsigned Opcode) const {
640+
return TTIImpl->getMaximumVF(ElemWidth, Opcode);
641+
}
642+
638643
bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
639644
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
640645
return TTIImpl->shouldConsiderAddressTypePromotion(

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,12 @@ unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
288288
return 32;
289289
}
290290

291+
unsigned GCNTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
292+
if (Opcode == Instruction::Load || Opcode == Instruction::Store)
293+
return 32 * 4 / ElemWidth;
294+
return (ElemWidth == 16 && ST->has16BitInsts()) ? 2 : 1;
295+
}
296+
291297
unsigned GCNTTIImpl::getLoadVectorFactor(unsigned VF, unsigned LoadSize,
292298
unsigned ChainSizeInBytes,
293299
VectorType *VecTy) const {

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
170170
unsigned getNumberOfRegisters(unsigned RCID) const;
171171
unsigned getRegisterBitWidth(bool Vector) const;
172172
unsigned getMinVectorRegisterBitWidth() const;
173+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
173174
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
174175
unsigned ChainSizeInBytes,
175176
VectorType *VecTy) const;

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ static cl::opt<int>
126126
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
127127
cl::desc("Attempt to vectorize for this register size in bits"));
128128

129+
static cl::opt<unsigned>
130+
MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
131+
cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
132+
129133
static cl::opt<int>
130134
MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
131135
cl::desc("Maximum depth of the lookup for consecutive stores."));
@@ -741,6 +745,12 @@ class BoUpSLP {
741745
return MinVecRegSize;
742746
}
743747

748+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
749+
unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
750+
MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
751+
return MaxVF ? MaxVF : UINT_MAX;
752+
}
753+
744754
/// Check if homogeneous aggregate is isomorphic to some VectorType.
745755
/// Accepts homogeneous multidimensional aggregate of scalars/vectors like
746756
/// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
@@ -6191,6 +6201,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
61916201
unsigned Sz = R.getVectorElementSize(I0);
61926202
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
61936203
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
6204+
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
61946205
if (MaxVF < 2) {
61956206
R.getORE()->emit([&]() {
61966207
return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
@@ -7633,7 +7644,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
76337644
bool Changed = false;
76347645
SmallVector<Value *, 4> Incoming;
76357646
SmallPtrSet<Value *, 16> VisitedInstrs;
7636-
unsigned MaxVecRegSize = R.getMaxVecRegSize();
76377647

76387648
bool HaveVectorizedPhiNodes = true;
76397649
while (HaveVectorizedPhiNodes) {
@@ -7660,27 +7670,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
76607670

76617671
// Look for the next elements with the same type.
76627672
SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
7663-
Type *EltTy = (*IncIt)->getType();
7664-
7665-
assert(EltTy->isSized() &&
7666-
"Instructions should all be sized at this point");
7667-
TypeSize EltTS = DL->getTypeSizeInBits(EltTy);
7668-
if (EltTS.isScalable()) {
7669-
// For now, just ignore vectorizing scalable types.
7670-
++IncIt;
7671-
continue;
7672-
}
7673-
7674-
unsigned EltSize = EltTS.getFixedSize();
7675-
unsigned MaxNumElts = MaxVecRegSize / EltSize;
7676-
if (MaxNumElts < 2) {
7677-
++IncIt;
7678-
continue;
7679-
}
7680-
76817673
while (SameTypeIt != E &&
7682-
(*SameTypeIt)->getType() == EltTy &&
7683-
static_cast<unsigned>(SameTypeIt - IncIt) < MaxNumElts) {
7674+
(*SameTypeIt)->getType() == (*IncIt)->getType()) {
76847675
VisitedInstrs.insert(*SameTypeIt);
76857676
++SameTypeIt;
76867677
}

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,18 @@ bb:
123123
ret <2 x i16> %ins.1
124124
}
125125

126-
; FIXME: Should not vectorize
127126
define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
128127
; GCN-LABEL: @uadd_sat_v2i32(
129128
; GCN-NEXT: bb:
130-
; GCN-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]])
131-
; GCN-NEXT: ret <2 x i32> [[TMP0]]
129+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
130+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
131+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
132+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
133+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
134+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
135+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
136+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
137+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
132138
;
133139
bb:
134140
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
@@ -145,8 +151,15 @@ bb:
145151
define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
146152
; GCN-LABEL: @usub_sat_v2i32(
147153
; GCN-NEXT: bb:
148-
; GCN-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]])
149-
; GCN-NEXT: ret <2 x i32> [[TMP0]]
154+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
155+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
156+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
157+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
158+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
159+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
160+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
161+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
162+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
150163
;
151164
bb:
152165
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
@@ -163,8 +176,15 @@ bb:
163176
define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
164177
; GCN-LABEL: @sadd_sat_v2i32(
165178
; GCN-NEXT: bb:
166-
; GCN-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]])
167-
; GCN-NEXT: ret <2 x i32> [[TMP0]]
179+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
180+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
181+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
182+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
183+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
184+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
185+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
186+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
187+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
168188
;
169189
bb:
170190
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
@@ -181,8 +201,15 @@ bb:
181201
define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
182202
; GCN-LABEL: @ssub_sat_v2i32(
183203
; GCN-NEXT: bb:
184-
; GCN-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]])
185-
; GCN-NEXT: ret <2 x i32> [[TMP0]]
204+
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
205+
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
206+
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
207+
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
208+
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
209+
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
210+
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0
211+
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
212+
; GCN-NEXT: ret <2 x i32> [[INS_1]]
186213
;
187214
bb:
188215
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
@@ -267,8 +294,14 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
267294
;
268295
; GFX8-LABEL: @uadd_sat_v4i16(
269296
; GFX8-NEXT: bb:
270-
; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
271-
; GFX8-NEXT: ret <4 x i16> [[TMP0]]
297+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
298+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1>
299+
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
300+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
301+
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
302+
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
303+
; GFX8-NEXT: [[INS_3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
304+
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
272305
;
273306
bb:
274307
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/round.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ bb:
1818
ret <2 x half> %tmp5
1919
}
2020

21-
; TODO: Should probably not really be vectorizing this
2221
; GCN-LABEL: @round_v2f32(
23-
; GCN: call <2 x float> @llvm.round.v2f32
22+
; GCN: call float @llvm.round.f32(
23+
; GCN: call float @llvm.round.f32(
2424
define <2 x float> @round_v2f32(<2 x float> %arg) {
2525
bb:
2626
%tmp = extractelement <2 x float> %arg, i64 0

llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -slp-vectorizer -S -slp-max-reg-size=32 < %s | FileCheck -check-prefix=MAX32 %s
3-
; RUN: opt -slp-vectorizer -S -slp-max-reg-size=256 < %s | FileCheck -check-prefix=MAX256 %s
4-
; RUN: opt -slp-vectorizer -S -slp-max-reg-size=1024 < %s | FileCheck -check-prefix=MAX1024 %s
2+
; RUN: opt -slp-vectorizer -S -slp-max-vf=1 < %s | FileCheck -check-prefix=MAX32 %s
3+
; RUN: opt -slp-vectorizer -S -slp-max-vf=8 < %s | FileCheck -check-prefix=MAX256 %s
4+
; RUN: opt -slp-vectorizer -S -slp-max-vf=32 < %s | FileCheck -check-prefix=MAX1024 %s
5+
; RUN: opt -slp-vectorizer -S < %s | FileCheck -check-prefix=MAX1024 %s
6+
7+
; Make sure we do not vectorize to create PHI wider than requested.
8+
; On AMDGPU target wider vectorization will result in a higher register pressure,
9+
; spilling, or even inability to allocate registers.
510

611
define void @phi_float32(half %hval, float %fval) {
712
; MAX32-LABEL: @phi_float32(
@@ -120,6 +125,7 @@ define void @phi_float32(half %hval, float %fval) {
120125
; MAX32-NEXT: [[PHI30:%.*]] = phi float [ [[I63]], [[BB3]] ], [ [[FVAL]], [[BB4]] ], [ [[FVAL]], [[BB5]] ], [ [[FVAL]], [[BB1]] ]
121126
; MAX32-NEXT: [[PHI31:%.*]] = phi float [ [[I65]], [[BB3]] ], [ [[FVAL]], [[BB4]] ], [ [[I65]], [[BB5]] ], [ [[I65]], [[BB1]] ]
122127
; MAX32-NEXT: [[PHI32:%.*]] = phi float [ [[I67]], [[BB3]] ], [ [[I67]], [[BB4]] ], [ [[FVAL]], [[BB5]] ], [ [[I67]], [[BB1]] ]
128+
; MAX32-NEXT: store float [[PHI31]], float* undef, align 4
123129
; MAX32-NEXT: ret void
124130
;
125131
; MAX256-LABEL: @phi_float32(
@@ -296,6 +302,8 @@ define void @phi_float32(half %hval, float %fval) {
296302
; MAX256-NEXT: [[TMP154:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[TMP93]], [[BB4]] ], [ [[TMP132]], [[BB5]] ], [ [[TMP54]], [[BB1]] ]
297303
; MAX256-NEXT: [[TMP155:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[TMP103]], [[BB4]] ], [ [[TMP142]], [[BB5]] ], [ [[TMP64]], [[BB1]] ]
298304
; MAX256-NEXT: [[TMP156:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[TMP113]], [[BB4]] ], [ [[TMP152]], [[BB5]] ], [ [[TMP74]], [[BB1]] ]
305+
; MAX256-NEXT: [[TMP157:%.*]] = extractelement <8 x float> [[TMP156]], i32 6
306+
; MAX256-NEXT: store float [[TMP157]], float* undef, align 4
299307
; MAX256-NEXT: ret void
300308
;
301309
; MAX1024-LABEL: @phi_float32(
@@ -481,6 +489,8 @@ define void @phi_float32(half %hval, float %fval) {
481489
; MAX1024-NEXT: br label [[BB2]]
482490
; MAX1024: bb2:
483491
; MAX1024-NEXT: [[TMP165:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[TMP125]], [[BB4]] ], [ [[TMP164]], [[BB5]] ], [ [[TMP86]], [[BB1]] ]
492+
; MAX1024-NEXT: [[TMP166:%.*]] = extractelement <32 x float> [[TMP165]], i32 30
493+
; MAX1024-NEXT: store float [[TMP166]], float* undef, align 4
484494
; MAX1024-NEXT: ret void
485495
;
486496
bb:
@@ -603,5 +613,6 @@ bb2:
603613
%phi30 = phi float [ %i63, %bb3 ], [ %fval, %bb4 ], [ %fval, %bb5 ], [ %fval, %bb1 ]
604614
%phi31 = phi float [ %i65, %bb3 ], [ %fval, %bb4 ], [ %i65, %bb5 ], [ %i65, %bb1 ]
605615
%phi32 = phi float [ %i67, %bb3 ], [ %i67, %bb4 ], [ %fval, %bb5 ], [ %i67, %bb1 ]
616+
store float %phi31, float* undef
606617
ret void
607618
}

0 commit comments

Comments
 (0)