Skip to content

Commit c7aac38

Browse files
[SLP]Correctly detect minnum/maxnum patterns for select/cmp operations on floats.
The patch enables detection of minnum/maxnum patterns for float point instruction, represented as select/cmp. Also, enables better cost estimation for integer min/max patterns since the compiler starts to estimate the scalars separately. Reviewers: nikic, RKSimon Reviewed By: RKSimon Pull Request: #98570
1 parent a6d2da8 commit c7aac38

File tree

5 files changed

+133
-86
lines changed

5 files changed

+133
-86
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8644,10 +8644,7 @@ llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
86448644
if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
86458645
Value *LHS, *RHS;
86468646
auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
8647-
if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) ||
8648-
CurrentPattern.Flavor == SPF_FMINNUM ||
8649-
CurrentPattern.Flavor == SPF_FMAXNUM ||
8650-
!I->getType()->isIntOrIntVectorTy())
8647+
if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor))
86518648
return false;
86528649
if (SelectPattern.Flavor != SPF_UNKNOWN &&
86538650
SelectPattern.Flavor != CurrentPattern.Flavor)
@@ -8666,6 +8663,10 @@ llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
86668663
return {Intrinsic::smax, AllCmpSingleUse};
86678664
case SPF_UMAX:
86688665
return {Intrinsic::umax, AllCmpSingleUse};
8666+
case SPF_FMAXNUM:
8667+
return {Intrinsic::maxnum, AllCmpSingleUse};
8668+
case SPF_FMINNUM:
8669+
return {Intrinsic::minnum, AllCmpSingleUse};
86698670
default:
86708671
llvm_unreachable("unexpected select pattern flavor");
86718672
}

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9316,7 +9316,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
93169316
function_ref<InstructionCost(InstructionCost)> VectorCost) {
93179317
// Calculate the cost of this instruction.
93189318
InstructionCost ScalarCost = 0;
9319-
if (isa<CastInst, CmpInst, SelectInst, CallInst>(VL0)) {
9319+
if (isa<CastInst, CallInst>(VL0)) {
93209320
// For some of the instructions no need to calculate cost for each
93219321
// particular instruction, we can use the cost of the single
93229322
// instruction x total number of scalar instructions.
@@ -9637,9 +9637,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
96379637
? CmpInst::BAD_FCMP_PREDICATE
96389638
: CmpInst::BAD_ICMP_PREDICATE;
96399639

9640-
return TTI->getCmpSelInstrCost(E->getOpcode(), OrigScalarTy,
9641-
Builder.getInt1Ty(), CurrentPred, CostKind,
9642-
VI);
9640+
InstructionCost ScalarCost = TTI->getCmpSelInstrCost(
9641+
E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred,
9642+
CostKind, VI);
9643+
auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VI);
9644+
if (MinMaxID != Intrinsic::not_intrinsic) {
9645+
IntrinsicCostAttributes CostAttrs(MinMaxID, OrigScalarTy,
9646+
{OrigScalarTy, OrigScalarTy});
9647+
InstructionCost IntrinsicCost =
9648+
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
9649+
// If the selects are the only uses of the compares, they will be
9650+
// dead and we can adjust the cost by removing their cost.
9651+
if (SelectOnly) {
9652+
auto *CI = cast<CmpInst>(VI->getOperand(0));
9653+
IntrinsicCost -= TTI->getCmpSelInstrCost(
9654+
CI->getOpcode(), OrigScalarTy, Builder.getInt1Ty(),
9655+
CI->getPredicate(), CostKind, CI);
9656+
}
9657+
ScalarCost = std::min(ScalarCost, IntrinsicCost);
9658+
}
9659+
9660+
return ScalarCost;
96439661
};
96449662
auto GetVectorCost = [&](InstructionCost CommonCost) {
96459663
auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size());
@@ -9649,17 +9667,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
96499667
// Check if it is possible and profitable to use min/max for selects
96509668
// in VL.
96519669
//
9652-
auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
9653-
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
9654-
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
9655-
{VecTy, VecTy});
9670+
auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VL);
9671+
if (MinMaxID != Intrinsic::not_intrinsic) {
9672+
IntrinsicCostAttributes CostAttrs(MinMaxID, VecTy, {VecTy, VecTy});
96569673
InstructionCost IntrinsicCost =
96579674
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
96589675
// If the selects are the only uses of the compares, they will be
96599676
// dead and we can adjust the cost by removing their cost.
9660-
if (IntrinsicAndUse.second)
9661-
IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy,
9677+
if (SelectOnly) {
9678+
auto *CI =
9679+
cast<CmpInst>(cast<Instruction>(VL.front())->getOperand(0));
9680+
IntrinsicCost -= TTI->getCmpSelInstrCost(CI->getOpcode(), VecTy,
96629681
MaskTy, VecPred, CostKind);
9682+
}
96639683
VecCost = std::min(VecCost, IntrinsicCost);
96649684
}
96659685
return VecCost + CommonCost;

llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll

Lines changed: 72 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -75,48 +75,64 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
7575
; CHECK-NEXT: [[TMP4TT_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_END_LOOPEXIT]] ]
7676
; CHECK-NEXT: [[PB_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PB]], [[ENTRY]] ], [ [[SCEVGEP311]], [[WHILE_END_LOOPEXIT]] ]
7777
; CHECK-NEXT: [[PA_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PA]], [[ENTRY]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
78-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 0, i32 2>
79-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> <i32 0, i32 2>
80-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> <i32 1, i32 3>
82-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> <i32 1, i32 3>
83-
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84-
; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32>
85-
; CHECK-NEXT: [[TMP57:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32>
86-
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], [[TMP57]]
78+
; CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 0
79+
; CHECK-NEXT: [[VGETQ_LANE45:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 1
80+
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[VGETQ_LANE]], [[VGETQ_LANE45]]
81+
; CHECK-NEXT: [[CONV48:%.*]] = trunc i64 [[ADD]] to i32
82+
; CHECK-NEXT: [[VGETQ_LANE51:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 0
83+
; CHECK-NEXT: [[VGETQ_LANE55:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 1
84+
; CHECK-NEXT: [[ADD57:%.*]] = add i64 [[VGETQ_LANE51]], [[VGETQ_LANE55]]
85+
; CHECK-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD57]] to i32
86+
; CHECK-NEXT: [[VGETQ_LANE63:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 0
87+
; CHECK-NEXT: [[VGETQ_LANE67:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 1
88+
; CHECK-NEXT: [[ADD69:%.*]] = add i64 [[VGETQ_LANE63]], [[VGETQ_LANE67]]
89+
; CHECK-NEXT: [[CONV72:%.*]] = trunc i64 [[ADD69]] to i32
90+
; CHECK-NEXT: [[VGETQ_LANE75:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 0
91+
; CHECK-NEXT: [[VGETQ_LANE79:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 1
92+
; CHECK-NEXT: [[ADD81:%.*]] = add i64 [[VGETQ_LANE75]], [[VGETQ_LANE79]]
93+
; CHECK-NEXT: [[CONV84:%.*]] = trunc i64 [[ADD81]] to i32
8794
; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127
8895
; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31
8996
; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]]
9097
; CHECK: while.body88:
9198
; CHECK-NEXT: [[PA_ADDR_1291:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_END121:%.*]] ], [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ]
9299
; CHECK-NEXT: [[PB_ADDR_1290:%.*]] = phi ptr [ [[INCDEC_PTR89:%.*]], [[WHILE_END121]] ], [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ]
100+
; CHECK-NEXT: [[_CTT_0289:%.*]] = phi i32 [ [[ADD99:%.*]], [[WHILE_END121]] ], [ [[CONV48]], [[WHILE_END]] ]
101+
; CHECK-NEXT: [[_CFF_0288:%.*]] = phi i32 [ [[ADD106:%.*]], [[WHILE_END121]] ], [ [[CONV60]], [[WHILE_END]] ]
102+
; CHECK-NEXT: [[_CTF_0287:%.*]] = phi i32 [ [[ADD113:%.*]], [[WHILE_END121]] ], [ [[CONV72]], [[WHILE_END]] ]
103+
; CHECK-NEXT: [[_CFT_0286:%.*]] = phi i32 [ [[ADD120:%.*]], [[WHILE_END121]] ], [ [[CONV84]], [[WHILE_END]] ]
93104
; CHECK-NEXT: [[NBBOOLBLOCK_1285:%.*]] = phi i32 [ [[SUB:%.*]], [[WHILE_END121]] ], [ [[AND]], [[WHILE_END]] ]
94-
; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP34:%.*]], [[WHILE_END121]] ], [ [[TMP17]], [[WHILE_END]] ]
95-
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4
96-
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4
105+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4
106+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4
97107
; CHECK-NEXT: br label [[WHILE_BODY93:%.*]]
98108
; CHECK: while.body93:
99-
; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP19]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ]
100-
; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP20]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ]
109+
; CHECK-NEXT: [[_CTT_1283:%.*]] = phi i32 [ [[_CTT_0289]], [[WHILE_BODY88]] ], [ [[ADD99]], [[WHILE_BODY93]] ]
110+
; CHECK-NEXT: [[_CFF_1282:%.*]] = phi i32 [ [[_CFF_0288]], [[WHILE_BODY88]] ], [ [[ADD106]], [[WHILE_BODY93]] ]
111+
; CHECK-NEXT: [[_CTF_1281:%.*]] = phi i32 [ [[_CTF_0287]], [[WHILE_BODY88]] ], [ [[ADD113]], [[WHILE_BODY93]] ]
112+
; CHECK-NEXT: [[_CFT_1280:%.*]] = phi i32 [ [[_CFT_0286]], [[WHILE_BODY88]] ], [ [[ADD120]], [[WHILE_BODY93]] ]
113+
; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP10]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ]
114+
; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP11]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ]
101115
; CHECK-NEXT: [[SHIFT_0277:%.*]] = phi i32 [ 0, [[WHILE_BODY88]] ], [ [[INC:%.*]], [[WHILE_BODY93]] ]
102-
; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP18]], [[WHILE_BODY88]] ], [ [[TMP34]], [[WHILE_BODY93]] ]
103116
; CHECK-NEXT: [[AND94:%.*]] = and i32 [[A_0279]], 1
104117
; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1
105118
; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1
106119
; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1
107120
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0
108121
; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0
122+
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL98]], i1 false
123+
; CHECK-NEXT: [[LAND_EXT:%.*]] = zext i1 [[TMP12]] to i32
124+
; CHECK-NEXT: [[ADD99]] = add i32 [[_CTT_1283]], [[LAND_EXT]]
109125
; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0
110126
; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0
111-
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL100]], i32 0
112-
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TOBOOL]], i32 1
113-
; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
114-
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL98]], i32 0
115-
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> [[TMP25]], i1 [[TOBOOL103]], i32 1
116-
; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i1> [[TMP27]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
117-
; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer
118-
; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32>
119-
; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]]
127+
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL103]], i1 false
128+
; CHECK-NEXT: [[LAND_EXT105:%.*]] = zext i1 [[TMP13]] to i32
129+
; CHECK-NEXT: [[ADD106]] = add i32 [[_CFF_1282]], [[LAND_EXT105]]
130+
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL103]], i1 false
131+
; CHECK-NEXT: [[LAND_EXT112:%.*]] = zext i1 [[TMP14]] to i32
132+
; CHECK-NEXT: [[ADD113]] = add i32 [[_CTF_1281]], [[LAND_EXT112]]
133+
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL98]], i1 false
134+
; CHECK-NEXT: [[LAND_EXT119:%.*]] = zext i1 [[TMP15]] to i32
135+
; CHECK-NEXT: [[ADD120]] = add i32 [[_CFT_1280]], [[LAND_EXT119]]
120136
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[SHIFT_0277]], 1
121137
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 32
122138
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[WHILE_END121]], label [[WHILE_BODY93]]
@@ -128,53 +144,61 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
128144
; CHECK-NEXT: br i1 [[CMP86]], label [[WHILE_BODY88]], label [[WHILE_END122]]
129145
; CHECK: while.end122:
130146
; CHECK-NEXT: [[NBBOOLBLOCK_1_LCSSA:%.*]] = phi i32 [ [[AND]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_END121]] ]
147+
; CHECK-NEXT: [[_CFT_0_LCSSA:%.*]] = phi i32 [ [[CONV84]], [[WHILE_END]] ], [ [[ADD120]], [[WHILE_END121]] ]
148+
; CHECK-NEXT: [[_CTF_0_LCSSA:%.*]] = phi i32 [ [[CONV72]], [[WHILE_END]] ], [ [[ADD113]], [[WHILE_END121]] ]
149+
; CHECK-NEXT: [[_CFF_0_LCSSA:%.*]] = phi i32 [ [[CONV60]], [[WHILE_END]] ], [ [[ADD106]], [[WHILE_END121]] ]
150+
; CHECK-NEXT: [[_CTT_0_LCSSA:%.*]] = phi i32 [ [[CONV48]], [[WHILE_END]] ], [ [[ADD99]], [[WHILE_END121]] ]
131151
; CHECK-NEXT: [[PB_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR89]], [[WHILE_END121]] ]
132152
; CHECK-NEXT: [[PA_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR]], [[WHILE_END121]] ]
133-
; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP17]], [[WHILE_END]] ], [ [[TMP34]], [[WHILE_END121]] ]
134153
; CHECK-NEXT: [[CMP130_NOT299:%.*]] = icmp eq i32 [[NBBOOLBLOCK_1_LCSSA]], 0
135154
; CHECK-NEXT: br i1 [[CMP130_NOT299]], label [[WHILE_END166:%.*]], label [[WHILE_BODY132_PREHEADER:%.*]]
136155
; CHECK: while.body132.preheader:
137-
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4
156+
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4
138157
; CHECK-NEXT: [[SUB125:%.*]] = sub nuw nsw i32 32, [[NBBOOLBLOCK_1_LCSSA]]
139-
; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP36]], [[SUB125]]
140-
; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4
141-
; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP37]], [[SUB125]]
158+
; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP16]], [[SUB125]]
159+
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4
160+
; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP17]], [[SUB125]]
142161
; CHECK-NEXT: br label [[WHILE_BODY132:%.*]]
143162
; CHECK: while.body132:
163+
; CHECK-NEXT: [[_CTT_2306:%.*]] = phi i32 [ [[ADD142:%.*]], [[WHILE_BODY132]] ], [ [[_CTT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
164+
; CHECK-NEXT: [[_CFF_2305:%.*]] = phi i32 [ [[ADD150:%.*]], [[WHILE_BODY132]] ], [ [[_CFF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
165+
; CHECK-NEXT: [[_CTF_2304:%.*]] = phi i32 [ [[ADD157:%.*]], [[WHILE_BODY132]] ], [ [[_CTF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
166+
; CHECK-NEXT: [[_CFT_2303:%.*]] = phi i32 [ [[ADD164:%.*]], [[WHILE_BODY132]] ], [ [[_CFT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
144167
; CHECK-NEXT: [[NBBOOLBLOCK_2302:%.*]] = phi i32 [ [[DEC165:%.*]], [[WHILE_BODY132]] ], [ [[NBBOOLBLOCK_1_LCSSA]], [[WHILE_BODY132_PREHEADER]] ]
145168
; CHECK-NEXT: [[A_1301:%.*]] = phi i32 [ [[SHR135:%.*]], [[WHILE_BODY132]] ], [ [[SHR126]], [[WHILE_BODY132_PREHEADER]] ]
146169
; CHECK-NEXT: [[B_1300:%.*]] = phi i32 [ [[SHR136:%.*]], [[WHILE_BODY132]] ], [ [[SHR128]], [[WHILE_BODY132_PREHEADER]] ]
147-
; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP51:%.*]], [[WHILE_BODY132]] ], [ [[TMP35]], [[WHILE_BODY132_PREHEADER]] ]
148170
; CHECK-NEXT: [[AND133:%.*]] = and i32 [[A_1301]], 1
149171
; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1
150172
; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1
151173
; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1
152174
; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0
153175
; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0
176+
; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL139]], i1 false
177+
; CHECK-NEXT: [[LAND_EXT141:%.*]] = zext i1 [[TMP18]] to i32
178+
; CHECK-NEXT: [[ADD142]] = add i32 [[_CTT_2306]], [[LAND_EXT141]]
154179
; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0
155180
; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0
156-
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL144]], i32 0
157-
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i1> [[TMP40]], i1 [[TOBOOL137]], i32 1
158-
; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i1> [[TMP41]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
159-
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL139]], i32 0
160-
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP42]], i1 [[TOBOOL147]], i32 1
161-
; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP39]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
162-
; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer
163-
; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32>
164-
; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]]
181+
; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL147]], i1 false
182+
; CHECK-NEXT: [[LAND_EXT149:%.*]] = zext i1 [[TMP19]] to i32
183+
; CHECK-NEXT: [[ADD150]] = add i32 [[_CFF_2305]], [[LAND_EXT149]]
184+
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL147]], i1 false
185+
; CHECK-NEXT: [[LAND_EXT156:%.*]] = zext i1 [[TMP20]] to i32
186+
; CHECK-NEXT: [[ADD157]] = add i32 [[_CTF_2304]], [[LAND_EXT156]]
187+
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL139]], i1 false
188+
; CHECK-NEXT: [[LAND_EXT163:%.*]] = zext i1 [[TMP21]] to i32
189+
; CHECK-NEXT: [[ADD164]] = add i32 [[_CFT_2303]], [[LAND_EXT163]]
165190
; CHECK-NEXT: [[DEC165]] = add nsw i32 [[NBBOOLBLOCK_2302]], -1
166191
; CHECK-NEXT: [[CMP130_NOT:%.*]] = icmp eq i32 [[DEC165]], 0
167192
; CHECK-NEXT: br i1 [[CMP130_NOT]], label [[WHILE_END166]], label [[WHILE_BODY132]]
168193
; CHECK: while.end166:
169-
; CHECK-NEXT: [[TMP52:%.*]] = phi <4 x i32> [ [[TMP35]], [[WHILE_END122]] ], [ [[TMP51]], [[WHILE_BODY132]] ]
170-
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP52]], i32 3
171-
; CHECK-NEXT: store i32 [[TMP53]], ptr [[CTT:%.*]], align 4
172-
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[TMP52]], i32 2
173-
; CHECK-NEXT: store i32 [[TMP54]], ptr [[CFF:%.*]], align 4
174-
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP52]], i32 1
175-
; CHECK-NEXT: store i32 [[TMP55]], ptr [[CTF:%.*]], align 4
176-
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0
177-
; CHECK-NEXT: store i32 [[TMP56]], ptr [[CFT:%.*]], align 4
194+
; CHECK-NEXT: [[_CFT_2_LCSSA:%.*]] = phi i32 [ [[_CFT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD164]], [[WHILE_BODY132]] ]
195+
; CHECK-NEXT: [[_CTF_2_LCSSA:%.*]] = phi i32 [ [[_CTF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD157]], [[WHILE_BODY132]] ]
196+
; CHECK-NEXT: [[_CFF_2_LCSSA:%.*]] = phi i32 [ [[_CFF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD150]], [[WHILE_BODY132]] ]
197+
; CHECK-NEXT: [[_CTT_2_LCSSA:%.*]] = phi i32 [ [[_CTT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD142]], [[WHILE_BODY132]] ]
198+
; CHECK-NEXT: store i32 [[_CTT_2_LCSSA]], ptr [[CTT:%.*]], align 4
199+
; CHECK-NEXT: store i32 [[_CFF_2_LCSSA]], ptr [[CFF:%.*]], align 4
200+
; CHECK-NEXT: store i32 [[_CTF_2_LCSSA]], ptr [[CTF:%.*]], align 4
201+
; CHECK-NEXT: store i32 [[_CFT_2_LCSSA]], ptr [[CFT:%.*]], align 4
178202
; CHECK-NEXT: ret void
179203
;
180204
entry:

0 commit comments

Comments
 (0)